#!/usr/bin/perl -w $CHILDREN = 40; # Number of children to spawn $TIMEOUT = 30; # DNS timeout $FLUSH = 3000; # Flush buffer every $FLUSH lines $DEBUG = 0; # ip2host v0.04 - Resolve IPs to hostnames in web server logs # Maurice Aubrey # # $Id: ip2host,v 1.1.1.4 2000/04/14 12:33:41 maurice Exp $ # # CHANGES: # # 0.05 Fri Apr 14 05:31:38 PDT 2000 # - Add POD to allow inclusion in CPAN # # 0.04 Mon Nov 22 17:54:07 PST 1999 # - Check socketpair() return value # - Updated documentation # # 0.03 Thu Nov 18 16:57:53 PST 1999 # - Renamed $BUFFER to $FLUSH # - Improved documentation # # 0.02 Sat Oct 16 00:05:29 PDT 1999 # - Initial public release use strict; use vars qw( $CHILDREN $TIMEOUT $FLUSH $DEBUG %Buffer $Next_Line %Cache ); use Socket; use IO::Handle; use IO::Select; my $cache_file = shift @ARGV; if ($cache_file) { # Cache results to disk if asked require DB_File; tie %Cache, 'DB_File', $cache_file or die "unable to tie '$cache_file': $!"; } # Write as many lines as we can until we come across one # that's missing (that means it's still pending DNS). sub flush_buffer { for (; exists $Buffer{ $Next_Line }; $Next_Line++) { print delete $Buffer{ $Next_Line }; } } # Spawn the children my $read_select = new IO::Select; my $write_select = new IO::Select; for(my $child = 1; $child <= $CHILDREN; $child++) { my($child_fh, $parent_fh) = (new IO::Handle, new IO::Handle); socketpair($child_fh, $parent_fh, AF_UNIX, SOCK_STREAM, PF_UNSPEC) or die "socketpair failed: $!"; $child_fh->autoflush; $parent_fh->autoflush; if (my $pid = fork) { close $parent_fh; $write_select->add( $child_fh ); # Start out writing to all children } else { # Child starts here die "cannot fork: $!" unless defined $pid; close $child_fh; close STDIN; close STDOUT; $SIG{'ALRM'} = sub { die 'alarmed' }; while(defined(my $ip = <$parent_fh>)) { # Get IP to resolve chomp($ip); my $host = undef; eval { # Try to resolve, but give up after $TIMEOUT seconds alarm( $TIMEOUT ); my $ip_struct = inet_aton $ip; $host = gethostbyaddr $ip_struct, AF_INET; alarm(0); }; # XXX Debug if ($DEBUG and $@ =~ /alarm/) { $host ||= 'TIMEOUT'; # print STDERR "Alarming ($ip)...\n"; } $host ||= $ip; print $parent_fh "$ip $host\n"; } exit 0; } } $Next_Line = 1; my $lineno = 0; my %pending = (); while(1) { # XXX Debug # print STDERR "buff[", scalar keys %Buffer, "] pend[", scalar keys %pending, # "] cache[", scalar keys %Cache, "]\n"; my($readable, $writable) = IO::Select->select( $read_select, $write_select, undef ); if (@$writable) { # One or more children ready for an IP my $line = ''; while(@$writable and defined($line = )) { my($ip, $rest) = split / /, $line, 2; flush_buffer if ++$lineno % $FLUSH == 0; if (exists $Cache{ $ip }) { # We found this answer already $Buffer{ $lineno } = "$Cache{ $ip } $rest"; } elsif (exists $pending{ $ip }) { # We're still looking push @{ $pending{ $ip } }, [ $lineno, $rest ]; } else { # Send IP to child my $write_fh = shift @$writable; print $write_fh "$ip\n"; $pending{ $ip } = [ [ $lineno, $rest ] ]; $write_select->remove( $write_fh ); # Move to read set $read_select->add( $write_fh ); } } defined $line or undef $write_select; # Are we done with input? } while (@$readable) { # One or more children have an answer my $read_fh = shift @$readable; my $str = <$read_fh>; chomp($str); my($ip, $host) = split / /, $str, 2; $Cache{ $ip } = $host; # Take all the lines that were pending for this IP and # toss them into the output buffer foreach my $pending (@{ $pending{ $ip } }) { $Buffer{ $pending->[0] } = "$host $pending->[1]"; } delete $pending{ $ip }; $read_select->remove( $read_fh ); # Move to write set $write_select->add( $read_fh ) if defined $write_select; } last if not defined $write_select and not keys %pending; } flush_buffer; =pod =head1 NAME ip2host - Resolve IPs to hostnames in web server logs =head1 SYNOPSIS ip2host [cache_file] < infile > outfile infile - Web server log file. Any log format is acceptable, as long as each line begins with the remote client's IP address. outfile - Same as input file, but with all of the IPs resolved to hostnames. =head1 DESCRIPTION This script is a drop-in replacement for the logresolve.pl script distributed with the Apache web server. ip2host has the same basic design (fork children to handle the DNS resolution in parallel), but multiplexes the communication. This results in a significant speed improvement (approximately 10x faster), and the performance degrades more gracefully as the DNS timeout value ($TIMEOUT) is increased. This script is reported to work under Linux, FreeBSD, Solaris, Tru64, and IRIX. =head1 AUTHOR Maurice Aubrey Emaurice@hevanet.comE =head1 COPYRIGHT Copyright 1999-2000, Maurice Aubrey Emaurice@hevanet.comE. All rights reserved. This module is free software; you may redistribute it and/or modify it under the same terms as Perl itself. =head1 README Drop-in replacement for the logresolve.pl script distributed with the Apache web server that's approximately 10x faster. =head1 SCRIPT CATEGORIES Web =cut