#!/usr/bin/perl =head1 NAME weblogtail.pl - Displays running HTTP logs to a terminal with pretty colors =head1 SYNOPSIS weblogtail.pl [-c configfile] =head1 DESCRIPTION Allows an admin to view multiple Apache HTTP logs color coded for easy viewability as they're written. Also hilights incoming referrer strings and has a built in "grep -v" style mechanism to prevent information overload. This script is specifically written for the "Combined" log format. Logs are followed using B, then parsed, munged, and displayed in the ANSI color specified by the B. A sample line (note: the line measures 80 characters plus newline so it may wrap on your display): S<05:54 bob user-92.bhm2|sl.pol.co.uk /parties/electro/pages/CarolAlVideo.html 200> Where: B<05:54> = hh:mm pulled from log B = The username used in HTTP auth. or "-" if anonymous. B = a munged client hostname (note the pipe) B = the HTTP request. Method only shows when its not a GET. Protocol is discarded B<200> = HTTP return code The client hostname will be munged to no less than 23 characters (including separator) and should always be missing an "even" middle +/- 1 character. Get it? If the munged client hostname is already at minimal size and the request is still too long to fit it all in the alotted space, the request is then truncated from the front so you see the tail end of it. Tack plaintext descriptions with meaningless variables onto the end of long URL query strings in your code. If a request comes in with a referrer string that does not match the B config option, the URL is printed in that logs' color bolded. =head1 OPTIONS The default config file location is ~/.weblogtailrc C<-c> B =head1 CONFIGURATION The config file is straight perl because I couldn't think of an easier human-readable format. The config file gets eval()'d after the defaults are set so a user may override the $width and $tailcount settings if they so choose. The main config variable is B<%conf>. %conf = ( '/path/to/apache/log/file' => { 'refer' => '(www|host4).example.org', 'color' => 'green' 'ignore' => '\.(gif|jpg)$' } ); =over =item B A regex of this file's possible hostname(s) to be ignored in the referrer tags. If a hit comes in with a referrer not in this RE, a bolded line will be printed to say so. Very cool for recognizing search engine hits in real-time. =item B Defines the display color for this log. See B for different options. Do not use bold here because its used by the referrer routines. =item B A regex applied negatively against the user request (GET /blah/blah/blah up to " HTTP/1.[01]" exclusive) so your display doesn't get slammed by graphics intensive requests for instance. =back =head1 AUTHOR Frank Johnson Eratty at they.orgE. Copyright (c) 2003 Distributable under the Gnu GPL version 2 http://web.they.org/software/ http://web.they.org/software/weblogtail.pl http://web.they.org/software/images/weblogview2.png =head1 VERSION weblogtail.pl v0.5 =cut $| = 1; use File::Tail; use Term::ANSIColor; use strict; my $conffile = $ENV{'HOME'} . "/.weblogtailrc"; my $tailcount = 10; my $width = 80; my (@readset,@pending); my ($i, $j, $in, %conf); my ($ct) = 0; while ($i = shift) { if ($i eq '-c') { die "$0: -c requires a filename arg.\n" if (! ($conffile = shift)); } else { die "$0: invalid arg '$i'\n"; } } &readconf($conffile,$tailcount); while (1) { (undef,undef,@pending) = File::Tail::select(undef,undef,undef,1,@readset); if (!$pending[0]) { sleep 1; next; } foreach $i (@pending) { next if (! $i); $in = $i->read(); chomp $in; if ($in !~ /^(\S+)\s+(\S+)\s+(\S+)\s+\[([^\]]+)\]\s+"(.*) HTTP\/1\.\d"\s+([0-9-]+)\s+([0-9-]+)\s+"(.*)"\s+"(.+)"$/) { print "Bad Line: $_\n"; next; } if (defined $conf{$i->input}{ignore}) { $j = $conf{$i->input}{ignore} ; next if ($in =~ /$j/); } $_ = $in; my ($host, $huh, $user, $time, $query, $status, $size, $referrer, $agent) = /^(\S+)\s+(\S+)\s+(\S+)\s+\[([^\]]+)\]\s+"(.*) HTTP\/1\.\d"\s+([0-9-]+)\s+([0-9-]+)\s+"(.*)"\s+"(.+)"$/; $time =~ s#^\d+/\w+/\d+:(\d\d:\d\d):\d\d .*$#$1#; $query =~ s/^GET (.*)$/$1/; $query =~ s/^(.*) HTTP\/1.[01]$/$1/; my (@barf) = split (/\//,$query); ## I put in some short-circuit matches here that are common to many/all ## of my web domains. You may wish to make your own. # next if ($barf[$#barf] =~ /\?fil=th_/); # next if ($query =~ /thumbs\/th_/); # next if ($query =~ /\?th=1&/); # next if ($query =~ /^\/default.ida/); my ($myurl) = $conf{$i->input}{'refer'}; my ($mci) = $conf{$i->input}{'color'}; if ($referrer ne '-' && $referrer !~ m#^http://$myurl#i) { print colored ("Referrer: $referrer\n","bold $mci"); } my ($tog) = 0; while (length("$time $host $user $query $status") > $width) { if (length($host) > 23) { my $gg = int(length($host)/2); if ($host =~ /\|/) { if ($tog) { $host =~ s/^(.*).\|(.*)$/$1|$2/; $tog = 0; } else { $host =~ s/^(.*)\|.(.*)$/$1|$2/; $tog = 1; } } else { $host =~ s/^(.{${gg}}).(.*)$/$1|$2/; } } else { $query =~ s/^.(.+)$/$1/; } } print colored ("$time $user $host $query $status\n",$mci); # If there's more text already, might as well suck all we can right now. if ($i->predict == 0) { unshift(@pending,$i); } } } sub readconf { my ($conffile,$tailcount) = @_; my ($x); die "$0: conffile '$conffile' not found or unreadable.\n" if (! (-f $conffile && -r $conffile)); eval(`cat $conffile`); foreach $x (keys(%conf)) { push(@readset,File::Tail->new(name=>$x,tail=>$tailcount)); } }