#!/usr/local/bin/perl # print "Content-type: text/html\n\n"; $tail = $ENV{'QUERY_STRING'}; $tail = $tail || 250; # Location of the log file in Extended Log Format # You must edit this line $log = "/home/yourusername/yourdomainname-logs/access-log"; # Local domain name to cut out of referer info # You must edit this line - and that's it! $local = "http://yourdomainname.com"; # ------------------------- %MONTHS=( 'Jan','01','Feb','02','Mar','03','Apr','04','May','05','Jun','06', 'Jul','07','Aug','08','Sep','09','Oct','10','Nov','11','Dec','12' ); sub ReadLog { $LOG=shift; open (LOG,"tail -$tail $LOG |") || print "ERROR: Couldn't open the log file: $LOG"; $lineerrors=0; LOGLOOP: while () { unless (($site,$rfc931,$user,$when,$request,$status,$bytes,$from,$agent)= /^(\S+)\s*(\S+)\s*(\S+)\s*\[([^\]]+)]\s*\"([^\"]+)"\s*(\S+)\s*(\S+)\s*\"\s*([^\"]*)\s*\"\s*\"\s*([^\"]*)\s*\"\s*$/o) { $lineerrors++; push (@badlines,$_); next; } ($page) = ($request =~ m|[^\/]*(\S*)|); next if ($page =~ /\.gif/); # skip the gifs $site="\L$site"; # get all the time info ($date,$month,$year,$hour,$min,$sec) = ($when =~ m|(..)/(...)/..(..):(..):(..):(..)|o); $month = $MONTHS{$month}; $datestamp = $year . $month . $date . $hour . $min; unless ($first{$site}) { $first{$site} = $datestamp; } $datestring = "${month}/${date}\ \;${hour}:${min}"; $from =~ s|$local||; # Record the browser info $agent{$site} = $agent; # Update the array for site $line = join("|",$datestring,$page,$status,$bytes,$from); push(@{$sessions{$site}},$line); } close(LOG); } &ReadLog($log); sub search_words { return if ($found); my($name) = shift; my($url) = shift; my($string) = shift; $old_from = $from; if (($from =~ /$url/i) && ($from =~ /[\?\&]$string\=/)) { $from =~ s|.*[\?\&]$string=([^\&]*).*|$1|; $from =~ s|\+| |g; $from =~ s/%(..)/pack("c",hex($1))/ge; $from = "$name: $from"; $old_from = "[Q] "; $from = $old_from . $from; $found=1; } } print <<"END"; Access Analysis END if ($lineerrors > 0) { print <<"END"; $lineerrors lines were ignored. Are you sure your log file is in the Extended Log Format?
An ELF entry looks like this example:
host.domain.com - - [10/Jul/1999:12:34:56 -0500] "GET / HTTP/1.1" 200 2376 "http://referer.com" "Browser Name"

The following lines were ignored:
END foreach (@badlines) { print "$_
"; } print "

"; } print <<"END"; END foreach $site (sort {$first{$b} <=> $first{$a}} keys %sessions) { print <<"END"; END foreach (@{$sessions{$site}}) { ($date,$page,$status,$bytes,$from) = split(/\|/,$_); # Search keywords $found=0; if ($from =~ /\?/) { # Query results? &search_words("Excite","excite.com","search"); &search_words("Excite","excite.com","FI_1"); &search_words("Excite","excite.com","s"); &search_words("AOL Search","netfind.aol.com","search"); &search_words("AOL Search","netfind.aol.com/search.gw","s"); &search_words("Metacrawler","metacrawler.com","general"); &search_words("Infoseek","infoseek.com","qt"); &search_words("AltaVista","altavista.digital.com","q"); &search_words("AltaVista","altavista.com","q"); &search_words("AltaVista","altavista.com","r"); &search_words("AltaVista @ Telia.com","altavista.telia.com","q"); &search_words("Search.Com","search.com","QUERY"); &search_words("Yahoo","yahoo.com","p"); &search_words("Yahoo (Non-USA)", "yahoo." , "p"); &search_words("DogPile","dogpile.com","q"); &search_words("HotBot","hotbot.com","MT"); &search_words("Lycos","lycos.com","query"); &search_words("Lycos","lycos.","query"); &search_words("Webcrawler","webcrawler.com","searchText"); &search_words("Webcrawler","webcrawler.com","search"); &search_words("Inference Find","inference.com","query"); &search_words("Anzwers","anzwers.ozemail.net","MT"); &search_words("GoTo.com","goto.com","Keywords"); &search_words("Highway 61","highway61.com","string"); &search_words("Yellow Pages","altavista.yellowpages.com.au","q"); &search_words("LookSmart","looksmart.com","key"); &search_words("Snap","snap.com","keyword"); &search_words("Ask Jeeves","askjeeves.com","ask"); &search_words("Ask Jeeves","askjeeves.com","MetaTopic"); &search_words("Excite at Netscape","excitesearch.netscape.com","search"); &search_words("Excite at Netscape","excitesearch.netscape.com","s"); &search_words("Developer.com","developer.com","search"); &search_words("Magellan","mckinley.com","search"); &search_words("1Blink","1blink.com","q"); &search_words("Developer.com","developer.com","search"); &search_words("Go2Net","go2net.com","general"); &search_words("Go.com","go.com","qt"); &search_words("MSN","msn.com","MT"); &search_words("HotMail Email Message","hotmail.com",""); &search_words("Ask Jeeves","ask.com","ask"); &search_words("Excite (UK)","excite.co.uk","search"); # &search_words("","",""); # &search_words("","",""); unless ($found) { if ($from =~ /^http/) { $from = "$from"; } } } elsif ($from =~ /^http/) { $from = "$from"; } # Modify based on content-type if ($page =~ /\.zip$/) { $page = "" . $page . ""; } if ($page =~ /\.jpg$/) { $page = "" . $page .""; } if ( ($status !~ /^2/) && ($status !~ /^304/) ) { $status = "$status"; } print <<"END"; END } } print "
$site - $agent{$site}
Date Page Status Referer
$date $page $status $from
\n\n";