# 8192  theFilter.tcl
###########################################################################
# Copyright (c) 1998, Jeffrey Glen Rennie
# All rights reserved.
###########################################################################

######################################################################
# resolveRelativeURL takes the url of the document begin parsed, 
# and a reference (possible relative to the current path) and 
# returns a url without the http and without the port number
proc resolveRelativeURL { url rel } {
    #remove the http from the url
    regsub -nocase ^http:// $url {} url
    #remove any put args from the rel 
    regsub {\?.*$} $rel {} rel
    #if this is a relative..
    if [regexp {^(\.\./)+} $rel relativePart] {
	#substitute the ../../ with the real path
	regsub {^(\.\./)+} $rel {} absolutePart
	regsub -all {\.\./} $relativePart {[^/]+/} extractor
	append extractor {([^/]?)$}
	regsub {/[^/]*$} $url {/} url
	regsub $extractor $url $absolutePart rel
    } elseif [regsub -nocase ^http:// $rel {} rel] {
	#It's an absolute href so get rid of the http and the port number
	regsub {:[0-9]+/} $rel / rel	    
    } elseif [regexp ^/ $rel] {
	#it's an href relative to the root
	regexp -nocase {^(http://)?([^/]*)} $url match proto host
	set rel $host$rel
    } else {
	#it's relative to the current path
	regsub {[^/]*$} $url {} currentPath
	set rel $currentPath$rel
    }
    set rel
}	

proc defaultFilter { tagname slash options args} {
    global token
    global url

    if [llength $args] {
	set text [lindex $args 0]
    } else {
	set text {}
    }
    set result [rebuildTag $tagname $slash $options $text]

    set tagname [string trim [string toupper $tagname]]
    if { $tagname != "" } {

	set w " \n\r\t"

	global $token
	upvar 0 $token state
	global parseResult
	if { $tagname == "A" && $slash == "" } {
	    if { [regexp -nocase \
		    "href\[$w]*=\[$w]*(\[^>$w]*)" \
		    $options match dest] } {
		if { [ConflictUrl $url $dest] || [isKnownAdURL $dest] } {
		    set state(noimages) 1
		}
	    }
	} elseif {$tagname == "A" && $slash != "" } {
	    catch { unset state(noimages) }
	}
	if { $tagname == "IMG" } {
	    if [info exists state(noimages)] {
		set result $text
	    } elseif { [regexp -nocase "\[$w]src=(\[\"']?)(\[^$w>\"']*)(\[\"']?)" \
		    $options match q imgSrc q] && [isKnownAdURL $imgSrc] } {
		set result $text
	    }		
	}
    }
    append parseResult $result
}

proc rebuildTag { tagname slash options text } {
    if { [string trim $options] == "" } {
	set result "<$slash$tagname>$text"
    } else {
	set result "<$slash$tagname $options>$text"
    }
}

proc isSurfcheckFriend { dest} {
    regexp -nocase surfcams $dest
}

proc friendFilter { procIsFriend tagname slash options args} {
    global token
    global url
    global parseResult

    if [llength $args] {
	set text [lindex $args 0]
    } else {
	set text {}
    }
    set tagname [string trim [string toupper $tagname]]

    if { $tagname != "" } {

	set result [rebuildTag $tagname $slash $options $text]
	set w " \n\r\t"

	global $token
	upvar 0 $token state
	if { $tagname == "A" && $slash == "" } {
	    if { [regexp -nocase \
		    "href\[$w]*=\[$w]*(\[^$w>]*)" \
		    $options match dest] && [ConflictUrl $url $dest] && \
		    ![eval $procIsFriend [list $dest]] } {
		set state(noimages) 1
	    }
	} elseif {$tagname == "A" && $slash != "" } {
	    catch { unset state(noimages) }
	}
	if { $tagname == "IMG" } {
	    if [info exists state(noimages)] {
		set result $text
	    } elseif { [regexp -nocase \
		    "\[$w]src=(\[\"']?)(\[^$w>\"']*)(\[\"']?)" \
		    $options match q imgSrc q] && [isKnownAdURL $imgSrc] } {
		set result $text
	    }		
	}
	append parseResult $result
    }
}

proc defaultFilterFinish { token } {
    global $token
    upvar 0 $token state
    catch { unset state(noimages) }
    set result ""
}

proc friendFilterFinish { token } {
    global $token
    upvar 0 $token state
    catch { unset state(noimages) }
    set result ""
}

proc hexDigit { num } {
    format %c 0x$num
}

proc subHexDigits { dest } {
    #substitute hex coding for their char value
    set hexDigit {[a-fA-F0-9]}
    regsub -all -nocase "%($hexDigit$hexDigit)" $dest {[hexDigit \1]} dest
    eval set result $dest
}

proc ConflictUrl { urlSource urlDest } {
    # first, get the source host -- easy
    regexp -nocase {^(http://)?([^:/]*)} $urlSource match proto hostSource
    # first trim quotes
    set urlDest [string trim $urlDest \"]
    # convert hex digits
    catch {set urlDest [subHexDigits $urlDest] }
    set result 0

    set httpRoot "http://"
    if { [regsub -nocase -all $httpRoot $urlDest \x81 dest] && \
	    [regexp "\x81(\[^\x81]*)\$" $dest x urlPath] && \
	    [regexp {^[^&:/]*} $urlPath hostDest] } {	

	set result 1
	# remove the trailing .com or whatever
	regsub {\.[^\.]*$} $hostSource {} hostSource
	regsub {\.[^\.]*$} $hostDest {} hostDest
	# split into pieces
	set hostListSource [split [string tolower $hostSource] .]
	# set hostListDest [split [string tolower $hostDest] .]
	# now compare pieces
	foreach piece $hostListSource {
	    if { ![regexp -nocase www $piece] && \
		    [regexp -nocase $piece $hostDest] } {
		set result 0
	    }
	}
    }
    set result
}

proc defaultBlocker { tagname slash options args} {
    global token
    global url
    set tagname [string trim [string toupper $tagname]]
    if { $tagname != "" } {
	set w " \n\r\t"

	if [llength $args] {
	    set text [lindex $args 0]
	} else {
	    set text {}
	}

	global $token
	upvar 0 $token state
	global parseResult
	if { $tagname == "A" && $slash == "" } {
	    if { [regexp -nocase \
		    "href\[$w]*=\[$w]*(\[^>$w]*)" \
		    $options match dest] } {
		if { [ConflictUrl $url $dest] || [isKnownAdURL $dest] } {
		    set state(noimages) 1
		}
	    }
	} elseif {$tagname == "A" && $slash != "" } {
	    catch { unset state(noimages) }
	}
	if { $tagname == "IMG" && [regexp -nocase \
		"\[$w]src=(\[\"']?)(\[^$w>\"']*)(\[\"']?)" \
		$options match q imgSrc q] } {
	    if { [info exists state(noimages)] || [isKnownAdURL $imgSrc] } {
		lappend parseResult [resolveRelativeURL $url $imgSrc]
	    }
	}
    }
}

proc isFriend { urlTarget friends } {
    set result 0
    foreach friend $friends {
	if [regexp $friend $urlTarget] {
	    set result 1
	}
    }
}

proc friendBlocker { procIsFriend tagname slash options args} {
    global token
    global url
    global parseResult
    set tagname [string trim [string toupper $tagname]]
    if { $tagname != "" } {
	set w " \n\r\t"

	if [llength $args] {
	    set text [lindex $args 0]
	} else {
	    set text {}
	}

	global $token
	upvar 0 $token state
	if { $tagname == "A" && $slash == "" } {
	    if { [regexp -nocase \
		    "href\[$w]*=\[$w]*(\[^>$w]*)" \
		    $options match dest] && [ConflictUrl $url $dest] && \
		    ![eval $procIsFriend [list $dest]] } {
		set state(noimages) 1
	    }
	} elseif {$tagname == "A" && $slash != "" } {
	    catch { unset state(noimages) }
	}
	if { $tagname == "IMG" && [regexp -nocase \
		"\[$w]src=(\[\"']?)(\[^$w>\"']*)(\[\"']?)" \
		$options match q imgSrc q] } {
	    if { [info exists state(noimages)] || [isKnownAdURL $imgSrc] } {
		lappend parseResult [resolveRelativeURL $url $imgSrc]
	    }
	}
    }
}

proc defaultBlockerFinish { token } {
    global $token
    upvar 0 $token state
    catch { unset state(noimages) }
    set result ""
}    

proc friendBlockerFinish { token } {
    global $token
    upvar 0 $token state
    catch { unset state(noimages) }
    set result ""
}    

proc noComments { tagname slash options args } {
    global parseResult
    if { $tagname == "!--" || $tagname == "SCRIPT"} {
	    eval append parseResult $args
    } else {
	eval defaultFilter [list $tagname $slash $options] $args
    }
}

set knownAdURLs [list \
	.*ads.lycos.com* \
	.*us.yimg.com/promotions.* \
	.*imageserv.imgis.com/images/ad.* \
	.*ad.doubleclick.net.* \
	.*adforce.imgis.com.* \
	.*focalink.com.* \
	.*/ad(s?)/.* \
	.*adclick.exe.* \
	.*adserver.exe.* \
	.*randomizer=.* \
	.*ad.preferences.com.* \
	.*click.ng.* \
	.*event.ng.* \
	]

# concat all the expressions into one big regexp for faster evaluation
set regexpKnownAdURLs [join $knownAdURLs ")|("]
set regexpKnownAdURLs "($regexpKnownAdURLs)"

proc isKnownAdURL { url } {
    global regexpKnownAdURLs
    regexp -nocase $regexpKnownAdURLs $url
}

# the filter table:
set theFilterTable [list \
    {}                  defaultFilter \
    www.goto.com        noComments \
    www.surfcheck.com   [list friendFilter isSurfcheckFriend] \
]


set theBlockerTable [list \
    {}                  defaultBlocker \
    www.surfcheck.com   [list friendBlocker isSurfcheckFriend] \
]



###########################################################################
# Copyright (c) 1998, Jeffrey Glen Rennie
# All rights reserved.
###########################################################################
