#! /usr/bin/perl eval 'exec /usr/bin/perl -S $0 ${1+"$@"}' if 0; # not running under some shell use 5.0008; use strict; use warnings; use Carp qw( croak ); use Config; use English qw( -no_match_vars ); use File::Basename; use File::Find; use File::Path; use File::Slurp; use File::Spec; use File::Spec::Unix; use GDBM_File; use Getopt::Long; use HTML::EasyTags; use IO::File; use IO::Handle; use MIME::Base64 qw( decode_base64 ); use Pod::HtmlEasy 0.09; use Readonly; use Regexp::Common qw{ whitespace }; use Storable qw( store retrieve ); our $VERSION = 0.600; # Also appears in "=head1 VERSION" in the POD below # Static configuration Readonly my $debug => 0; Readonly my $DOT => q{.}; Readonly my $EMPTY => q{}; Readonly my $NL => qq{\n}; Readonly my $NUL => qq{\0}; Readonly my $me => basename($PROGRAM_NAME); Readonly my $page_cols => 2; Readonly my @suffixes => qw{ \.pm \.pod \.pl }; Readonly my $title => q{Perl Documentation}; Readonly my $user => q{root\@localhost}; # Variables that may be set from the command line my $dumpdb; my $help; my $scratch; my $verbose; # Implied by the presence of files in @ARGV my $local_files; # Longest first, otherwise alpha my @prefixes = reverse sort { length $a <=> length $b or $a cmp $b } @INC; if ( $prefixes[-1] eq $DOT ) { pop @prefixes; } # For the source directory, take the shortest path in @INC and # eliminate the last directory component. With luck, this will # give you /usr/lib/perl5 (or equivalent) for a standard installation. my $sourcedir = dirname( $prefixes[-1] ); my $targetdir = q{/usr/local/doc/HTML/Perl}; my @addpods = qw{ /usr/local/doc/POD }; # Adds on to @INC if ($debug) { # To debug in a smaller environment, use these. # Note that if you should use something like # /usr/lib/perl5/vendor_perl/5.8.8/Regexp # in order to get a small-sample "live" test, the links will be # CPAN searches as the last directory will be lost, resulting in # failure of the inverted hash lookup in on_L(). $targetdir = qq{$ENV{HOME}/Perl/pod2_test_output}; $sourcedir = qq{$ENV{HOME}/Perl/pod2_test_input}; @addpods = (); @prefixes = ($sourcedir); } # Make a compiled regex. Readonly my $prefixer => q{\A} . join( q{/|\A}, @prefixes, @addpods ) . q{/}; Readonly my $prefixre => qr{$prefixer}; # Persistant hashes that track the state of PODs converted to HTML. # %html_track: # key: path to original (.pod, .pm or .pl) file # value: [ path to the .html file, last mod time of POD ] # %nopod_track: # key: path to original .pm or .pl file # value: the last-modified time of the .pm or .pl file # that was found to have no POD. # %index_track: # key: tag for the index (Foo::Bar) # value: ARRAY of [ path to the .html file, last mod time of POD ] # See insert_latest() for a specific example. my ( %html_track, %nopod_track, %index_track ); my ( $html_track_ref, $nopod_track_ref, $index_track_ref ) = ( \%html_track, \%nopod_track, \%index_track ); # %pod_convert: # key: path to the POD file to convert # value: path to the HTML file that will receive the conversion my (%pod_convert); # Files accessed in the course of index generation # Index tracking and nopod tracking files Readonly my $html_track => "$targetdir/.html_track"; Readonly my $index_track => "$targetdir/.index_track"; Readonly my $nopod_track => "$targetdir/.nopod_track"; # Default URL for referenced uninstalled PODs Readonly my $CPAN => q{search.cpan.org}; # HTML index for converted PODs Readonly my $index_file => "$targetdir/index.html"; # Stylesheet for generated HTML Readonly my $css => "$targetdir/.doc.css"; # Generate a css file. # Data for the css file is found at the end of this file sub if_verbose { if ( defined $verbose ) { print shift; } return; } my $css_data; sub get_css { local $INPUT_RECORD_SEPARATOR = $EMPTY; $css_data = ; return; } # Get the last modified time of the file arg. sub mtime { return ( stat shift )[9]; } # Execute the Storable module's retrieve() to load the persistent hashes. sub do_retrieve { if ( defined $scratch ) { # Hashes are not loaded. return; } if ( !-e $html_track ) { # Hash files are not found. return; } if_verbose("Retrieving $html_track$NL"); $html_track_ref = retrieve($html_track) or croak "Unable to retrieve $html_track - $!$NL"; if_verbose("Retrieving $index_track$NL"); $index_track_ref = retrieve($index_track) or croak "Unable to retrieve $index_track - $!$NL"; if_verbose("Retrieving $nopod_track$NL$NL"); $nopod_track_ref = retrieve($nopod_track) or croak "Unable to retrieve $nopod_track - $!$NL"; return; } # Execute the Storable module's store() to save the persistent hashes. sub do_store { store( $html_track_ref, $html_track ); store( $index_track_ref, $index_track ); store( $nopod_track_ref, $nopod_track ); return; } # Build HTML for the header/trailer that goes at the top and bottom of # generated HTML pages. sub build_header { my ( $name, $alt ) = @_; if ( not defined $name ) { $name = $alt; } my $header = <<"_HEAD_";
 $name
_HEAD_ return $header; } # Maintain the array of index targets with the latest date as element [0]. # If you wish to maintain the array sorted by date, use this: # return [ sort { $b->[1] <=> $a->[1] } @$arrayref, $htmlfile ] # Here's a sample of the hash. Note the nested ARRAYs. # HASH(0x930e668) # 'Item_error_Pod_Readme' => ARRAY(0x930f3ac) # 0 ARRAY(0x930f3c4) # 0 '/home/geoff/Perl/pod2_test_output/Item_error_Pod_Readme.html' # 1 1151020294 # 1 ARRAY(0x930f3f4) # 0 '/home/geoff/Perl/pod2_test_output2/Item_error_Pod_Readme.html' # 1 1150299856 # 'OLEwriter' => ARRAY(0x930f424) # 0 ARRAY(0x930f43c) # 0 '/home/geoff/Perl/pod2_test_output/OLEwriter.html' # 1 1150299856 sub insert_latest { my ( $arrayref, $htmlfile ) = @_; if ( not defined $arrayref ) { # First time for this htmlfile return [$htmlfile]; } # Multiple links for a particular tag. # The $htmlfile (ref to ARRAY) goes at the head of the list if # its date is later than the one currently there, and at the # back of the list otherwise. We're interested only in the head # of the list for link generation when this tag is referenced. $arrayref->[0][1] > $htmlfile->[1] ? push @{$arrayref}, $htmlfile : unshift @{$arrayref}, $htmlfile; return $arrayref; } sub usage { print <<"_USAGE_"; $me $VERSION [-dumpdb] Dump persistent files and exit. [-help] Print this and exit. [-scratch] Rebuild the persistent files, which re-creates all HTML. [-verbose] Babble. [path ...] Single-file POD conversion. $me $VERSION converts POD (.pm, .pl or .pod) files to HTML, and indexes them. The state of the HTML is tracked, and re-conversion is avoided if unnecessary. If path(s) are specified on the command line, indexing is skipped, and conversion takes place directly. /foo/bar/test.pm => ./test.pm.html _USAGE_ exit 0; } # Subroutines which replace the subs internal to pod2html # and associated global variables. # $HTML_file: # the file currently being generated, and is used for # links within the page. my ($HTML_file); sub on_L { my ( $this, $L, $text, $page, $section, $type ) = @_; if ( $type eq q{pod} ) { if ( not defined $text ) { return q{(undefined)}; } # Corrupt the href to avoid having it recognized (and converted) by _add_uri_href my $textc = $text; $textc =~ s{\A(.)}{$1$NUL}smx; if ( not defined $page ) { $page = $EMPTY; } if ( defined $section ) { $section =~ s/\s+/-/mxg; return exists $index_track_ref->{$page} ? qq{$textc} : qq{$textc}; } # $text is expected to contain Foo::Bar. # Returning a link to an HTML file in the conversion directory my $link = $index_track_ref->{$text}; return defined $link ? qq{$textc} : # Returning a search command that will (hopefully) locate POD for # a module that is not installed qq{$textc}; } # OK, not a POD. Try something else if ( $type eq q{man} ) { return qq{man $text}; } if ( $type eq q{url} ) { # Corrupt the href to avoid having it recognized (and converted) by _add_uri_href $page =~ s{\A(.)}{$1$NUL}smx; $text =~ s{\A(.)}{$1$NUL}smx; return qq{$text}; } print qq{Pod::HtmlEasy asked to process link type $type, }, q{but that's not supported.}; return $EMPTY; } my $podhtml = Pod::HtmlEasy->new( on_L => \&on_L, ); # This a "wanted" sub for File::Find. # It accepts files with extension .pod or with extension .pm or .pl and # which have a line that begins with "=\w+" and saves them in the # html_track hash. If they've been modified since last conversion, # or never converted, the file is stuffed into the pod_convert hash. # If a POD file is removed, it's entry will linger in the persistent files. sub list_pods { my $podfile = $File::Find::name; # Perl::Critic complains, but there's no sub interface my ( $name, $path, $suffix ) = fileparse( $podfile, @suffixes ); if ( length $suffix == 0 ) { return; } if_verbose(qq{$podfile$NL}); # Source podfile is assumed to exist. Otherwise, how did we get here? # $podmtime is the last-modified time of the .pm or .pod we're # considering. my $podmtime = mtime($podfile); # Check if there is anything to do. my $ftime = $nopod_track_ref->{$podfile}; if ( defined $ftime and ( $ftime >= $podmtime ) ) { # The podfile was previously examined and had no POD and # it has't been modified since. if_verbose(qq{ in .nopod_track$NL}); return; } # Now let's see if the podfile was previously converted and # whether its been modified since then. my $track = $html_track_ref->{$podfile}; if ( defined $track ) { # We've converted this POD before. my ( $htmlfile, $ftime ) = @{$track}; if ( -e $htmlfile and ( $ftime >= $podmtime ) ) { # The podfile was previously converted and it hasn't # been modified since, and the HTML version is still around. if_verbose(qq{ already converted $NL}); return; } } # .pod files should have POD; with .pm or .pl its problematic. # So, check the .pm or .pl file to see if there are any lines # beginning with "=" if ( $suffix ne q{.pod} ) { # Get the source and check # This is not foolproof. But its cheap. if ( not grep { m/^=\w+/mx } ${ read_file( $podfile, scalar_ref => 1 ) } ) { if_verbose(qq{ no POD$NL}); $nopod_track_ref->{$podfile} = $podmtime; return; } } # We have a POD to convert. The ti me is last-modified of POD. # %html_track has the state of the index database. # %pod_convert has the PODs that are to be converted this run. $path =~ s{$sourcedir}{$targetdir}mx; my $htmlfile = qq{$path$name.html}; $html_track_ref->{$podfile} = [ $htmlfile, $podmtime ]; $pod_convert{$htmlfile} = $podfile; if_verbose(qq{ to be converted$NL}); return; } # Convert a POD file to HTML sub convert_pod { my ( $pod_file, $html_file ) = @_; # $html_file is global as its needed by on_L() $HTML_file = $html_file; # Make an HTML file from the POD my ( $name, $path ) = fileparse($HTML_file); if ( !-d $path ) { mkpath($path); } print qq{$pod_file =>$NL $HTML_file$NL}; my @html = $podhtml->pod2html( $pod_file, css => defined $local_files ? $css_data : $css, top => 'uArr', ); my $heading = build_header( $podhtml->pm_name, $pod_file ); # Build header and footer for the page # This should be near the top of the page foreach my $html_line (@html) { $html_line =~ m{' ) or croak qq{$me: Can't open $index_file: $!$NL}; my $html = HTML::EasyTags->new(); $html->groups_by_default(1); # Do the HTML document header $fh->print( $html->start_html( $title, [ $html->link( rel => 'stylesheet', href => $css, type => 'text/css' ), $html->link( rev => 'made', href => "mailto:$user" ), ], ), $html->font_start( size => 5 ), $html->strong( $html->center($title) ), $html->font_end, $html->hr, $html->p, $html->table_start ); # Get set up to dump %index_track as links to the docs my @tags = sort { uc $a cmp uc $b } keys %{$index_track_ref}; my $items = @tags; my $nrows = int $items / $page_cols; my $item = 0; my $row = 0; # Create a for each row (reading across) of s while ( $row < $nrows ) { $fh->print( $html->tr_start ); foreach my $tag ( @tags[ $item .. $item + $page_cols - 1 ] ) { $fh->print( $html->td( [ $html->a( href => $index_track_ref->{$tag}[0][0], text => $tag ) ] ) ); } $fh->print( $html->tr_end ); $row++; $item += $page_cols; } # Last row might be not be full if ( $item < $items ) { $fh->print( $html->tr_start ); foreach my $tag ( @tags[ $item .. $items - 1 ] ) { $fh->print( $html->td( [ $html->a( href => $index_track_ref->{$tag}[0][0], text => $tag ) ] ) ); } $fh->print( $html->tr_end ); } $fh->print( $html->table_end, $html->end_html ); $fh->close(); return; } # Here's where it all begins if ( not GetOptions( 'dumpdb' => \$dumpdb, 'help' => \$help, 'scratch' => \$scratch, 'verbose' => \$verbose, ) ) { usage(); } if ( defined $help ) { usage(); } $SIG{INT} = sub { # Persistant hashes not updated. exit 1; }; *STDOUT->autoflush(); do_retrieve(); if ( defined $dumpdb ) { if ( defined $scratch ) { croak qq{Can't combine -scratch and -dumpdb!$NL}; } print qq{$html_track$NL}; for ( sort keys %{$html_track_ref} ) { print qq{ $_ =>$NL $html_track_ref->{$_}->[0]$NL }, scalar localtime $html_track_ref->{$_}->[1], $NL; } my @index_refs; print qq{$NL$index_track$NL}; for ( sort keys %{$index_track_ref} ) { print qq{ $_ =>$NL}; if ( int @{ $index_track_ref->{$_} } > 1 ) { push @index_refs, $_; } foreach my $ary_ref ( @{ $index_track_ref->{$_} } ) { print qq{ $ary_ref->[0]$NL }, scalar localtime $ary_ref->[1], $NL; } } print qq{$NL$NL$nopod_track$NL}; for ( sort keys %{$nopod_track_ref} ) { print qq{ $_ =>$NL }, scalar localtime $nopod_track_ref->{$_}, $NL; } if (@index_refs) { print qq{${NL}Multiple index references$NL}; foreach (@index_refs) { print qq{ $_ =>$NL}; foreach my $ary_ref ( @{ $index_track_ref->{$_} } ) { print qq{ $ary_ref->[0]$NL }, scalar localtime $ary_ref->[1], $NL; } } } exit 0; } get_css($css); if ( not defined $local_files ) { write_file( $css, \$css_data ); } if ( not -d $targetdir ) { mkdir $targetdir or croak "Unable to create $targetdir"; } if ( not -w $targetdir ) { croak "Unable to write $targetdir"; } if (@ARGV) { $verbose = 1; $local_files = 1; foreach my $pod_file (@ARGV) { if ( !-e $pod_file ) { print qq{No file $pod_file$NL}; next; } my ( $name, $path, $suffix ) = fileparse( $pod_file, @suffixes ); convert_pod( $pod_file, qq{$name.html} ); } exit 0; } # Generate a hash (in %pod_convert) of qualifying .pm, .pl and .pod files # and keep up-to-date the hash of the HTML file state (%html_track) if_verbose(qq{Indexing .pm, .pl and .pod$NL$NL}); find( \&list_pods, $sourcedir, @addpods ); # Update the %index_track hash taking into account that there may be more # than one path that maps to a particular name. This is used to find # the HTML for linking. if_verbose(qq{${NL}Processing HTML paths$NL$NL}); foreach my $podfile ( values %pod_convert ) { # Extract the name of the POD my $htmlfile = $html_track_ref->{$podfile}; # "." marks the beginning of the suffix my ($tag) = $podfile =~ m{$prefixre(.*)\.}mx; if ( not defined $tag ) { # Match didn't work. Try Plan B. my ( $hf, $ht ) = @{$htmlfile}; print qq{Tag match failure for$NL $hf$NL}; my ( $name, $path, $suffix ) = fileparse( $hf, '\.html' ); $tag = $name; } # PODs in the pod/ directory of the standard distribution are referred # to without the pod:: prefix, so let's just dump it. $tag =~ s{^pod/}{}mx; # Convert the POD path (Foo/Bar) to a POD name (Foo::Bar) $tag =~ s{/}{::}mxg; if_verbose(qq{$podfile =>$NL $htmlfile->[0]$NL}); # Insert the new reference into the %index_track entry. $index_track_ref->{$tag} = insert_latest( $index_track_ref->{$tag}, $htmlfile ); } if_verbose(qq{${NL}Generating HTML$NL$NL}); foreach my $html_file ( keys %pod_convert ) { convert_pod( $pod_convert{$html_file}, $html_file ); } # Make an index of the HTML files build_index(); # Save data do_store(); exit 0; __DATA__ BODY { background: white; color: black; font-family: arial,sans-serif; margin: 0; padding: 1ex; } TABLE { border-collapse: collapse; border-spacing: 0; border-width: 0; color: inherit; } IMG { border: 0; } FORM { margin: 0; } input { margin: 2px; } A.fred { text-decoration: none; } A:link, A:visited { background: transparent; color: #006699; } TD { margin: 0; padding: 0; } DIV { border-width: 0; } DT { margin-top: 1em; } TH { background: #bbbbbb; color: inherit; padding: 0.4ex 1ex; text-align: left; } TH A:link, TH A:visited { background: transparent; color: black; } A.m:link, A.m:visited { background: #006699; color: white; font: bold 10pt Arial,Helvetica,sans-serif; text-decoration: none; } A.o:link, A.o:visited { background: #006699; color: #ccffcc; font: bold 10pt Arial,Helvetica,sans-serif; text-decoration: none; } A.o:hover { background: transparent; color: #ff6600; text-decoration: underline; } A.m:hover { background: transparent; color: #ff6600; text-decoration: underline; } table.dlsip { background: #dddddd; border: 0.4ex solid #dddddd; } .pod PRE { background: #eeeeee; border: 1px solid #888888; color: black; padding: 1em; white-space: pre; } .HF { background: #eeeeee; border: 1px solid #888888; color: black; margin: 1ex 0; padding: 0.5ex 1ex; } .pod H1 { background: transparent; color: #006699; font-size: large; } .pod H2 { background: transparent; color: #006699; font-size: medium; } .pod IMG { vertical-align: top; } .pod .toc A { text-decoration: none; } .pod .toc LI { line-height: 1.2em; list-style-type: none; } __END__ =pod =head1 NAME pod2indexed_html - Convert POD files to HTML, create an index page =head1 VERSION This documentation refers to pod2indexed_html version 0.6.0 =head1 USAGE pod2indexed_html [-dumpdb] [-help] [-scratch] [-verbose] [POD file ...] Use pod2indexed_html with no arguments if you want to index all your installed modules, or just list a few modules to see how it works. =head1 REQUIRED ARGUMENTS There are none. However, consult the "Static configuration" section at the beginning of the script to make sure that you like the choices. =head1 OPTIONS -dumpdb Dump .html_track, .index_track and .nopod_track, then exit. This also provides a list of all links having the same tag. -help Print this and exit. -scratch Rebuild the persistent files, which re-creates all HTML. -verbose Babble. =head1 DESCRIPTION pod2indexed_html locates all the PODs in your distribution, converts them to HTML and makes an index page. Links are rendered so as to refer to the appropriate pages. The principal advantage of pod2indexed_html is that it uses a persistent database of module creation times so that once its been run for the first time, subsequent executions are relatively quick, depending of course on what has been changed. pod2indexed_html notices both new modules and updates. The generated HTML index is flat, organized to look like a module hierarchy. For example, F goes to F, and is indexed under F. HTML display is controled by a stylesheet, F<.doc.css> in the document root directory. =head1 ENVIRONMENT $ENV{HOME} is used in debug mode. =head1 DEPENDENCIES This script requires the following modules: L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L It also requires Perl 5.8.0, but should run under earlier version with only minor modifications. Required modules willing, of course. =head1 CONFIGURATION AND ENVIRONMENT The persistent user environment, configured in the script. =over =item @addpods Defaults to F. This is a list of paths to search for POD files, in addition to @INC. =item $CPAN Defaults to "search.cpan.org". This is the URL to be used to search for modules that are mentioned in a POD, but have not been installed locally. =item $sourcedir Defaults to the shortest path in @INC. Usually, that's F =item $targetdir Defaults to F =item $user Mailto for the creator of the index page. Defaults to "root@localhost". =item $title Defaults to "Perl Documentation" =back =head1 DIAGNOSTICS =over 4 =item C<-verbose> This will produce all sorts of (allegedly) helpful info. =item Tag references more than one POD. Somtimes, one index entry will map to more than one HTML file. For example, both F and F would be pointed to by F. Obviously, this is not going to work very well. The situation is resolved by choosing the most recently modified POD file. To see what's going on, run with -dumpdb and look for the section titled "Multiple index references" at the end. =item Unable to open F - I Failure to open (or create) the database files. =item Tag match failure for F Failure of the regular expression matching that strips @INC paths. =item Can't combine -dumpdb and -scratch. Don't combine C<-dumpdb> and C<-scratch>, as this will delete the database before dumping it. =item Pod::HtmlEasy asked to process link of type I, but that's not supported. HTML conversion has missed a particular link type. Supported types are: URL, manpage, URI. =back =head1 FILES Assuming the you are using the default target directory. =over 4 =item F Tracks the HTML file by the corresponding POD (.pm or .pod) file. /usr/local/doc/HTML/Perl/.html_track /usr/lib/perl5/5.8.8/AnyDBM_File.pm => /usr/local/doc/HTML/Perl/5.8.8/AnyDBM_File.html Sun Jun 4 16:45:28 2006 /usr/lib/perl5/5.8.8/Attribute/Handlers.pm => /usr/local/doc/HTML/Perl/5.8.8/Attribute/Handlers.html Sun Jun 4 16:45:28 2006 /usr/lib/perl5/5.8.8/AutoLoader.pm => /usr/local/doc/HTML/Perl/5.8.8/AutoLoader.html Sun Jun 4 16:45:28 2006 ... =item F Tracks the HTML index entry by the file tag and HTML file. /usr/local/doc/HTML/Perl/.index_track APR => /usr/local/doc/HTML/Perl/vendor_perl/5.8.8/i386-linux-thread-multi/APR.html Sat Feb 11 23:29:50 2006 APR::Base64 => /usr/local/doc/HTML/Perl/vendor_perl/5.8.8/i386-linux-thread-multi/APR/Base64.html Sat Feb 11 23:29:57 2006 APR::Brigade => /usr/local/doc/HTML/Perl/vendor_perl/5.8.8/i386-linux-thread-multi/APR/Brigade.html Sat Feb 11 23:29:34 2006 ... There are some entries that point to multiple HTML files (found at the end out the output), for example: Archive::Extract => /usr/local/doc/HTML/Perl/vendor_perl/5.8.8/Archive/Extract.html Thu Jan 19 04:53:02 2006 /usr/local/doc/HTML/Perl/site_perl/5.8.8/Archive/Extract.html Thu Jan 19 04:53:02 2006 ... =item F Tracks those .pm files that were not found to have POD. /usr/local/doc/HTML/Perl/.nopod_track /usr/lib/perl5/5.8.8/CGI/eg/make_links.pl => Sun Jun 4 16:45:28 2006 /usr/lib/perl5/5.8.8/CPAN/Config.pm => Mon Jun 19 17:02:39 2006 ... =item F The style sheet that's generated to go with the HTML. The source for this file is found in the script, at the end of the file. =item F The "back to the top" arrow gliph. The source for this file is found in the script. =back =head1 INCOMPATIBILITIES None known. =head1 BUGS AND LIMITATIONS None known. =head1 EXIT STATUS 0 on success, 1 on failure. =head1 AUTHOR Geoffrey Leach =head1 LICENSE AND COPYRIGHT Copyright 2006 by Geoffrey Leach This script is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =head1 SCRIPT CATEGORIES CPAN/Administrative =cut