find_status.pl -t10 200 ~www/logs/access_log
TOP 10 URLS/HOSTS WITH STATUS CODE 200:
REQUESTS URL/HOST
-------- --------
1845 /www/wilogo.gif
1597 /cgi-bin/contig/sts_by_name?database=release
1582 /WWW/faqs/www-security-faq.html
1263 /icons/caution.xbm
930 /
886 /ftp/pub/software/WWW/cgi_docs.html
773 /cgi-bin/contig/phys_map
713 /icons/dna.gif
686 /WWW/pics/small_awlogo.gif
Code Message Description 200 OK The URL was found. Its contents follows. 301 Moved The URL has permanently moved to a new location. 302 Found The URL can be temporarily found at a new location. 304 Not Modified The URL has not been modified since the indicated date. 400 Bad Request Syntax error in the request. 401 Unauthorized Used in authorization schemes. 403 Forbidden This URL is forbidden, and authorization won't help. 404 Not Found It isn't here. 500 Internal Error The server encountered an unexpected error.
#!/usr/local/bin/perl
# File: find_status.pl
require "getopts.pl";
&Getopts('L:t:h') || die <<USAGE;
Usage: find_status.pl [-Lth] <code1> <code2> <code3> ...
Scan Web server log files and list a summary
of URLs whose requests had the one of the
indicated status codes.
Options:
-L <domain> Ignore local hosts matching this domain
-t <integer> Print top integer URLS/HOSTS [10]
-h Sort by host rather than URL
USAGE
;
if ($opt_L) {
$opt_L=~s/\./\\./g;
$IGNORE = "(^[^.]+|$opt_L)\$";
}
$TOP=$opt_t || 10;
while (@ARGV) {
last unless $ARGV[0]=~/^\d+$/;
$CODES{shift @ARGV}++;
}
while (<>) {
($host,$rfc931,$user,$date,$request,$URL,$status,$bytes) =
/^(\S+) (\S+) (\S+) \[([^]]+)\] "(\w+) (\S+).*" (\d+) (\S+)/;
next unless $CODES{$status};
next if $IGNORE && $host=~/$IGNORE/io;
$info = $opt_h ? $host : $URL;
$found{$status}->{$info}++;
}
foreach $status (sort {$a<=>$b;} sort keys %CODES) {
$info = $found{$status};
$count = $TOP;
foreach $i (sort {$info->{$b} <=> $info->{$a};} keys %{$info}) {
write;
last unless --$count;
}
$- = 0; # force a new top-of-report
}
format STDOUT_TOP=
TOP @## URLS/HOSTS WITH STATUS CODE @##:
$TOP, $status
REQUESTS URL/HOST
-------- --------
.
format STDOUT=
@##### @<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
$info->{$i},$i
.
|
|
| Contents | Next |