weblint++¸˜›¸˜œmBIN‚Eu#!/usr/bin/perl -w # $Id: weblint++,v 1.2 2002/02/19 21:28:28 comdog Exp $ use strict; =head1 NAME weblint++ =head1 SYNOPSIS weblint++ [-v [level] ] [-m [md5 digest] ] [-d file] [-u username -p password] url =head1 DESCRIPTION The C program fetches a web resource and runs the response through an HTML lint filter as well as other tests. You can use this program interactively if you specify the C<-v> switch, or use it in batch mode by observing the exit status. =head1 OPTIONS =over4 =item -d file The C<-d> switch performs a diff between the HTTP response message body and the specified file. The program exits if they differ. =item -m [md5 digest] The C<-m> switch by itself reports the MD5 digest (in hex) of the message body of the request from URL. The program exits if the digests do not match. =item -p password The C<-p> switch specifies the Basic authentication password. =item -s file The C<-s> switch specifies the file to save the HTTP message body to. =item -u username The C<-u> switch specifies the Basic authentication user name. =item -v [level] The C<-v> switch turns on verbose reporting. The greater the value of C, the more verbose the reporting. If you do not specify C<-v>, you will see no output, although you can observe the results from the exit status. =back =head1 ORDER OF TESTS The program performs the tests, and possibly exits based on errors, in this order: HTTP fetch MD5 digest comparison ( C<-m> switch ) File content comparison ( C<-d> switch ) HTML Lint warnings =head1 EXIT STATUSES =over 4 =item -1 The MD5 digest of the HTTP response message body did not match the digest specified with C<-m>, if you specified one. =item -2 The file specified with the C<-d> switch does not exist. =item -3 The HTTP response message body differed from the content of the file specified with <-d>. =item < 0 The program encountered HTTP error. The exit code is the HTTP response code negated. If the HTTP response was 404 (Not Found), the exit status is -404. =item > 0 C found HTML errors. The exit status is the number of HTML errors. =item 0 Success. No HTTP errors, no MD5 digest mismatches, no HTML warnings. =back =head1 TO DO =head1 AUTHOR brian d foy =head1 COPYRIGHT Copyright 2000, brian d foy. All rights reserved. =head1 LICENSE This program may be redistributed under the same turns as Perl itself. =head1 SCRIPT CATEGORIES Web =head1 SEE ALSO L =cut use vars qw( %opts $VERBOSE ); require 5.6.0; use HTML::Lint; require LWP::UserAgent; require HTTP::Request; require URI; my $url = URI->new( pop @ARGV ); die "[$url] is not a valid URI\n" unless ref $url; # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # command line argument processing while( my $arg = shift @ARGV ) { unless( $arg =~ m/^-(.)/ ) { shift @ARGV; next; } my $letter = $1; if( $ARGV[0] =~ m/^-/ or not @ARGV ) { $opts{$letter} = 1; next; } $opts{$letter} = shift @ARGV; } $VERBOSE = $opts{v}; # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # HTTP objects my $user_agent = LWP::UserAgent->new; my $request = HTTP::Request->new( GET => $url ); $request->authorization_basic( $opts{u}, $opts{p} ) if( exists $opts{u} and exists $opts{p} ); print $request->as_string if $VERBOSE > 1; my $response = $user_agent->request( $request ); # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # the tests if( $response->is_success ) { print $response->as_string if $VERBOSE > 1; my $data = $response->content; # # # save the data if( exists $opts{'s'} and $opts{'s'} ne '1' ) { if( open(FILE, "> $opts{'s'}") ) { print FILE $data; close FILE; } else { print STDERR "Could not open $opts{s} for writing: $!\n"; } } # # # MD5 differences if( exists $opts{'m'} and $opts{'m'} ) { require Digest::MD5; my $digest = Digest::MD5::md5_hex( $data ); print "MD5 digest (hex) $digest\n" if( exists $opts{'m'} and $VERBOSE ); if( $opts{'m'} ne 1 and $opts{'m'} ne $digest ) { print "MD5 digests do not match!\n", "Expected [$opts{m}] got [$digest]\n" if $VERBOSE; exit -1; } } # # # File differences if( exists $opts{'d'} and $opts{'d'} and -e $opts{'d'} ) { require Text::Diff; my $diff = Text::Diff::diff( $opts{'d'}, \$data ); unless( $diff eq '0' ) { print "Files are different\n$diff\n" if $VERBOSE; exit -3; } print "Response is same as $opts{d}\n" if $VERBOSE; } elsif( exists $opts{'d'} and $opts{'d'} and not -e $opts{'d'} ) { print STDERR "File $opts{'d'} does not exist\n" if $VERBOSE; exit -2; } my $lint = HTML::Lint->new(); $lint->parse( $data ); my $errors = $lint->errors(); exit 0 unless $errors; if( $VERBOSE ) { foreach my $error ( $lint->errors() ) { print $error->as_string(), "\n"; } } exit $errors; } else { print STDERR "Could not fetch resource [", $response->code, "]" if $VERBOSE; exit -( $response->code ); }