Logging to a Relational Database

On-the-fly logging to a relational database lets you do ad hoc queries.

Database Schema

   CREATE TABLE access_log (
	when    datetime     not null,
	host	varchar(255) not null,
	method  char(4)      not null,
	url	varchar(255) not null,
	auth	varchar(50),
	browser varchar(50),
	referer varchar(255),
	status  smallint(3)  not null,
	bytes	int(8)       default 0,
        key(host(30)),key(url(30)),key(status)
 );

Custom Log Entries in httpd.conf

 LogFormat "\"%{%Y-%m-%d %H:%M:%S}t\" %h \"%r\" %u \"%{User-agent}i\" %{Referer}i %s %b" mysql
 CustomLog "| /usr/local/apache/bin/mysqllog" mysql

Script I.1.9: Database Logging Script

  #!/usr/local/bin/perl
 # script: mysqllog
 use DBI;

 use constant DSN       => 'dbi:mysql:www';
 use constant DB_TABLE  => 'access_log';
 use constant DB_USER   => 'nobody';
 use constant DB_PASSWD => '';

 $PATTERN = '"([^"]+)" (\S+) "(\S+) (\S+) [^"]+" (\S+) "([^"]+)" (\S+) (\d+) (\S+)';

 $db = DBI->connect(DSN,DB_USER,DB_PASSWD) || die DBI->errstr;
 $sth = $db->prepare("INSERT INTO ${\DB_TABLE} VALUES(?,?,?,?,?,?,?,?,?)") 
     || die $db->errstr;
 while (<>) {
     chomp;
     my($date,$host,$method,$url,$user,
        $browser,$referer,$status,$bytes) = /$PATTERN/o;
     $user    = undef if $user    eq '-';
     $referer = undef if $referer eq '-';
     $browser = undef if $browser eq '-';
     $bytes   = undef if $bytes   eq '-';
     $sth->execute($date,$host,$method,$url,$user,
                      $browser,$referer,$status,$bytes);
 }
 $sth->finish;
 $db->disconnect;

Handy Queries

How many hits have we had?
SELECT count(*) from access_log;
How many total bytes transferred?
SELECT sum(bytes) from access_log;
How many days have we been running?
SELECT to_days(max(when))-to_days(min(when))
      from access_log
How many hits, sorted by day of the week?
SELECT date_format(when,'W') as weekday,count(*) 
        from access_log group by weekday;
How many hits, sorted by hour of day?
SELECT date_format(when,'H') as hour,count(*) 
        from access_log group by hour;
What is our bandwidth, sorted by hour of day?
SELECT date_format(when,'H') as hour,
      sum(bytes)/(60*60) as bytes_per_min
      from access_log group by hour;
What are the top ten most popular URLs?
SELECT url,count(*) as count from access_log 
        group by url order by count desc limit 10;
What URLs are broken? (404 "not found")
SELECT url,count(url) from access_log 
       where status=404 group by url;
Who is linking to /elegans.html?
SELECT left(referer,60),count(*) as cnt from access_log
      where url='/WWW/software/CGI/cgi_docs.html'
      group by referer order by cnt desc;

<< Previous Contents >> Next >>

Lincoln D. Stein, lstein@cshl.org
Cold Spring Harbor Laboratory
Last modified: Mon Aug 17 10:43:10 EDT 1998