# -*-cperl-*- Use perl. It's faster to use something like #!/usr/bin/perl # but an absolute path is less flexible. eval 'exec perl -S $0 "$@"' if 0; eval 'exec perl -S $0 ${1+"$@"}' if $running_under_some_shell; # # text2texi - converts simple textfiles to GNU texinfo-files # # Documentation is embedded in POD-format after the __END__ cut mark at the # bottom of this script. # # Author: Andreas Spindler # Location: www.prismtk.de # License: GPL (see www.gnu.org) # History: x.xxx lost in the dark of history--probably not used by anyone # 1.02f bugfixes: lists, tables, character styles # # Copyright (C) 1999-2001 Andreas Spindler. # # $Writestamp: 2001-03-03 21:16:48 Administrator$ # package text2texi; use 5.004; use strict; BEGIN { use vars qw/$VERSION $COPYRIGHT $infile $outfile $first %values $i $j/; use vars qw/$wsp/; use vars qw/$in_texinfo $in_table/; use vars qw/$in_example $in_footnote/; use vars qw/@item_list @item_indent_list/; use vars qw/$RX_safe_wsp $RX_blank_line $RX_table_col_split $RX_table_heading $RX_table_heading_char/; $VERSION = '1.02f'; $COPYRIGHT = 'Copyright (C) 2001 A. Spindler'; $RX_safe_wsp = '[ \r\t]'; # don't match '\n' as \s does $RX_blank_line = '^\s*$'; $RX_table_heading = '[\w\d]+'; # valid name for heading in tables $RX_table_col_split = '[|;]{1}'; # how to split table columns $RX_table_heading_char = '[|;\w\d \t]'; } { sub local_date { my($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time); my @MONTH_NAMES_EN = ('January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December'); $year += ($year < 70) ? 2000 : 1900; return $MONTH_NAMES_EN[$mon].', '.$mday.' '.$year; # US style } sub error { die 'ERROR: '.join(' ',@_).": $!" } sub lastindent { $item_indent_list[$#item_indent_list] } sub pushitem { my($s,$i,$x) = @_; push @item_list, $s; push @item_indent_list, $i; print OUT "\@$s \@$x\n" if defined $x; print OUT "\@$s \@\n" unless defined $x; } sub popitem { return unless @item_list; pop @item_indent_list; print OUT "\@end " . pop(@item_list) . "\n"; } ########################################################################### # Start script # ########################################################################### print "text2texi v$VERSION $COPYRIGHT\n"; ($infile,$outfile) = @ARGV; $outfile = $infile.'.txi' unless $outfile; open(IN,"<$infile") || error "can't open '$infile'"; open(OUT,">$outfile") || error "can't open '$outfile'"; print OUT "\\input texinfo\n"; ########################################################################### # Process simplified texinfo header (if one). # ########################################################################### if (=~/^\@text2texi/) { while () { $values{title} = $1 if /\@set title (.+)$/; $values{subtitle}->[0] = $2 if( /\@set subtitle (.+)$/ ); $values{subtitle}->[ord($1)-49] = $2 if( /\@set subtitle(\d) (.+)$/ ); $values{author}->[0] = $2 if( /\@set author (.+)$/ ); $values{author}->[ord($1)-49] = $2 if( /\@set author(\d) (.+)$/ ); $values{copyrightyears} = $1 if( /\@set copyrightyears (.+)$/ ); $values{copyrightowner} = $1 if( /\@set copyrightowner (.+)$/ ); $values{edition} = $1 if( /\@set edition (.+)$/ ); $values{date} = $1 if( /\@set date (.+)$/ ); last if /\@end/; } } else { print OUT; } # Use default values for Texinfo variables. if( exists $values{subtitle2} ) { $values{subtitle3} = 'Version @value{edition}' } else { $values{subtitle2} = 'Version @value{edition}' } $values{title} = $infile unless exists $values{title}; $values{copyrightowner} = $values{author}->[0] unless exists $values{copyrightowner}; $values{date} = local_date unless exists $values{copyrightowner}; $values{author}->[1] = $values{date} unless length $values{author}->[1]; # Write TexInfo header. print OUT "\@c generated by text2texi $VERSION $COPYRIGHT\n"; print OUT "\@c maintained at http://www.prismtk.de\n"; print OUT "\@c %**start of header\n"; print OUT "\@setfilename $infile\n"; print OUT "\@settitle $values{title}\n"; print OUT "\@set title $values{title}\n"; print OUT "\@set copyrightyears $values{copyrightyears}\n"; print OUT "\@set copyrightowner $values{copyrightowner}\n"; print OUT "\@set edition $values{edition}\n"; print OUT "\@set date $values{date}\n"; print OUT "\@defindex au\n"; # Author Index print OUT "\@defindex hy\n"; # Hyperlinks Index if( defined $values{subtitle} ) { for( $i=0; $i < @{$values{subtitle}}; $i++ ) { $j=$i+1; print OUT "\@set subtitle$j $values{subtitle}->[$i]\n"; } } if( defined $values{author} ) { for ( $i=0; $i < @{$values{author}}; $i++ ) { $j=$i+1; print OUT "\@set author$j $values{author}->[$i]\n"; } } print OUT "\@c %**end of header\n"; print OUT "\@setchapternewpage odd\n"; print OUT "\@titlepage\n"; print OUT "\@sp 10\n"; print OUT "\@title $values{title}\n"; map { print OUT "\@subtitle $_\n" } @{$values{subtitle}}; map { print OUT "\@author $_\n" } @{$values{author}}; print OUT "\@page\n"; print OUT "\@vskip 0pt plus 1filll\n"; print OUT "Copyright \@copyright{} $values{copyrightyears} $values{copyrightowner}.\n"; print OUT "\@end titlepage\n"; # Enter main loop. my($has_authors,$has_concepts,$has_keywords,$has_types,$has_links); while() { if( /\@texinfo/ ) { $in_texinfo = !$in_texinfo; next; } unless( $in_texinfo ) { my $new_chapter; $wsp = undef; study; ############################################################################ # Chapters and sections. # ############################################################################ unless( s/^\*\*\*\*/\@subsubsection / ) { unless( s/^\*\*\*/\@subsection / ) { unless( s/^\*\*/\@section / ) { unless( s/^\*/\@chapter / ) { unless( $first ) { $first = 1; } } else { $new_chapter = 1 } } else { $new_chapter = 1 } } else { $new_chapter = 1 } } else { $new_chapter = 1 } if( $new_chapter ) { print OUT "\@end example\n" if $in_example; popitem while @item_list; } ############################################################################ # Enumeration (enumerated lists) and itemization (bulleted lists). # ############################################################################ unless( $in_example || $in_table ) { # Implicit (indented block that starts with one or more '-', # '+', '*' or numbers, followed by some non-whitespaces). If # the line starts with a number, that number must be followed # by a whitespace, '.' or ')' character. my $itemize; # Set $itemize true if new list item; load line indentation to # $wsp. if( /^(\s*[0-9]+[\.\-\)]{1,2}\s+)/ || /^(\s*[\-\*\+]{1,2}\s+)/ ) { $itemize = 1; $wsp = length $1; } else { /^(\s*)/; $wsp = length $1; } unless( /$RX_blank_line/ ) { # While indentation of this line is *less* than last indent # (number at top of @item_indent_list), or if the line is # not indented ($wsp==0), close item/table/enum-lists. popitem while @item_list && ($wsp==0 || $wsp < lastindent); # Open a new item list or continue the current one if( @item_list || $itemize ) { if( s/^\s*([\-\+\*\^]+)\s*/\@item / ) { if( $itemize ) { my $c = substr($1,0,1); if( @item_list==0 || $wsp > lastindent ) { pushitem 'itemize', $wsp, "bullet" if $c eq '*'; pushitem 'itemize', $wsp, "minus" if $c eq '-' || $c eq '+'; } } } elsif( s/^\s*[0-9]+[\.\-]{1}\s+/\@item / ) { if( @item_list==0 || $wsp > lastindent ) { pushitem 'enumerate', $wsp; } } } if( @item_list ) { if( $wsp > lastindent ) { # No digit or itemizing character, but indentation has # increased. This is monospaced text. print OUT "\@example\n"; $in_example = 1; } else { # Remove all leading whitespace to not interfere with # texi2html--this script will format lines with leading # whitespaces as sample text ((monospaced font). s/^[ \t]+//; } } } } ############################################################################ # Monospaced text. # ############################################################################ my $RX_heading_def = $RX_table_heading.$RX_safe_wsp.'*'.$RX_table_col_split.$RX_safe_wsp.'*'; my $is_table_heading = /^$RX_heading_def$RX_table_heading_char*$/; unless( $in_table ) { if(!$in_example && !@item_list && /^(\s+)[^\s]+/ ) { print OUT "\@example\n"; $in_example = 1; } elsif( $in_example && /$RX_blank_line/ ) { print OUT "\@end example\n"; $in_example = 0; next; # ignore blank lines in examples... } elsif( $in_example && /^[^\s]+/ ) { print OUT "\@end example\n"; $in_example = 0; } if( $in_example ) { # Escape all '@' characters (required by texinfo). s/@/@@/g; } } ############################################################################ # Regular text. # ############################################################################ unless( $in_example ) { ######## # Lines in the input text that serve as horizontal rulers. # if( /^[\-\+\*\#\=\~]{3,}\s*$/ ) { $_ .= "\n"; print OUT "\n"; } else { ######## # Hyperlinks. # # No blanks are # permitted before or after the '@@'. First # parse 'TEXT @@' where TEXT itself serves as LINK. Second # parse 'TEXT @@ LINK'. # my $op = '\@\@'; my $chs_term = '([\s\-\.:]{1,1})'; # character after LINK or @@ my $chs_nw = '([^@"\s]+)'; # character sequence, no whitespace my $chs_w = '"([^@"]+)"'; # character sequence, maybe whitespace while( # TEXT @@ s/$chs_nw$op$chs_term/\@uref\{$1,$1\}$2/g || s/$chs_nw$op$/\@uref\{$1,$1\}/g || # "TEXT" @@ s/$chs_w$op$chs_term/\@uref\{$1,$1\}$2/g || s/$chs_w$op$/\@uref\{$1,$1\}/g ) { $has_links = 1; print OUT "\@hyindex $1 -- \@emph{\@url\{$1\}}\n"; } while( # TEXT @@ LINK s/$chs_nw$op$chs_nw$chs_term/\@uref\{$2,$1\}$3/g || s/$chs_nw$op$chs_nw$/\@uref\{$2,$1\}/g || # "TEXT" @@ LINK s/$chs_w$op$chs_nw$chs_term/\@uref\{$2,$1\}$3/g || s/$chs_w$op$chs_nw$/\@uref\{$2,$1\}/g || # TEXT @@ "LINK" s/$chs_nw$op$chs_w$chs_term/\@uref\{$2,$1\}$3/g || s/$chs_nw$op$chs_w$/\@uref\{$2,$1\}/g || # "TEXT" @@ "LINK" s/$chs_w$op$chs_w$chs_term/\@uref\{$2,$1\}$3/g || s/$chs_w$op$chs_w$/\@uref\{$2,$1\}/g ) { $has_links = 1; print OUT "\@hyindex $1 -- \@emph{\@url\{$2\}}\n"; } ######## # Do fuzzy things with well-known text sequences and tokens # such as _CPPUNWND, #define, #endif etc. # fuzzytag( \$_ ); ######## # Strong, emphasized and code text # texitag( \$_, '\*', '@strong' ); texitag( \$_, '#', '@strong' ); texitag( \$_, '>', '@emph' ); texitag( \$_, '_', '@emph' ); texitag( \$_, ':', '@code' ); ######## # Handle footnotes. # if(!$in_footnote and s/\^\^/\@footnote\{/ ) { $in_footnote = 1; } if( $in_footnote and s/\^\^/\}/ ) { $in_footnote = 0; } } } ############################################################################ # Insert Texinfo tags for multicolumn Tables. # # Begin a table if the lines is indended and contains at least one # column delimiter ($is_table_heading is true). ############################################################################ unless( $in_example ) { unless( $in_table ) { if( $is_table_heading ) { $in_table = 1; my @cfrac = ( .25, .25, .40 ); # list of floats; # sum must be <= 1.0 # (100%) #print OUT '@paragraphindent 4'; pushitem 'multitable', undef, "columnfractions " . join( ' ', @cfrac ); s/(\w+)/\@strong{$1}/g; } } elsif( /$RX_blank_line/ ) { # The first blank line closes the table. $in_table = 0; popitem; #print OUT '@paragraphindent 0'; } if( $in_table ) { # Replace all columns delimiters with '@tab' keyword. # WARNING: don't use '\s' because that strips '\n'. s/[ \t]*$RX_table_col_split[ \t]*/ \@tab /g; # Hack: indent table with empty cols at left side; looks # better if texi2html is used on this output. my $extra; # = "\@tab " x 4; $_ = "\@item $extra $_"; } } ############################################################################ # Concept, author, hyperlinks, keyword/variable/function indices. # ############################################################################ while( s/C\{([^\}]+)\}/$1/ ) { # concepts print OUT "\@cindex $1\n"; $has_concepts = 1; } while( s/AU\{([^\}]+)\}/$1/ ) { # authors print OUT "\@auindex $1\n"; $has_authors = 1; } while( s/HY\{([^\}]+)\}/$1/ ) { # hyperlinks print OUT "\@hyindex $1 -- \@emph{\@url\{$1\}}\n"; $has_links = 1; } while( s/[KVFT]{1}\{([^\}]+)\}/$1/ ) { # keywords print OUT "\@kyindex $1\n"; $has_keywords = 1; } ###### # Finished. Write out current line of text... # unless( $in_example ) { # Make sure that at least one whitespace is between consecutive # texinfo-tags. s/(@(sub){0,2}section)@/$1 @/; # Remove escaped labels. s/\\\#/\#/g; s/\\\*/\*/g; s/\\\:/\:/g; s/\\\_/\_/g; s/\\\>/\>/g; s/\\\' ? '<' : $o; $c = "([^\\s]{1})([^$o]*?|[^\\\\]{1})$c"; $$ln =~ s/^$o$c(\W)/$tx\{$1$2\}$3/; # at beginning of line $$ln =~ s/$o$c$/$tx\{$1$2\}/; # at end of line $$ln =~ s/([^\\]{1})$o$c(\W)/$1$tx\{$2$3\}$4/g; # within line undef; } sub fuzzytag { # Do fuzzy things with well-known text-sequences in string LINE such as # cpp-directives (#if, #define etc.). Must be called before texitag(). my $ln = shift or die; my @macros = ( '_CPPUNWIND', '_M_IX86', '__cplusplus', '_MSC_VER', '_MFC_VER' ); my @ansiC = ( 'const', 'volatile', 'errno' ); my @ansiCPP = ( 'mutable' ); my @cppdirectives = ( 'ifndef', 'ifdef', 'if', 'else', 'elif', 'pragma', 'define', 'endif', 'error' ); # Monospace standard C preprocessor directives. foreach( @cppdirectives ) { $$ln =~ s/^#\s*($_)(\W)/\@code{\\#$2}$3/; # begin of line $$ln =~ s/^#\s*($_)$/\@code{\\#$2}/; # begin+end of line $$ln =~ s/(\W)#\s*($_)$/$1\@code{\\#$2}/; # end of line $$ln =~ s/(\W)#\s*($_)(\W)/$1\@code{\\#$2}$3/g; # somewhere in line } # Monospace computer language constants. foreach( @macros, @ansiC, @ansiCPP ) { $$ln =~ s/^$_(\W)/\@code{$_}$1/; # begin of line $$ln =~ s/^$_$/\@code{$_}/; # begin+end of line $$ln =~ s/(\W)$_$/$1\@code{$_}/; # end of line $$ln =~ s/(\W)$_(\W)/$1\@code{$_}$2/g; # somewhere in line } # Monospaces program switches/options $$ln =~ s/^(-+[A-Za-z0-9_\-=\[\]]+)/\@code{$1}/g; # begin of line $$ln =~ s/(\s)(-+[A-Za-z0-9_\-=\[\]]+)/$1\@code{$1}/g; # somewhere in line } __END__ =head1 NAME text2texi - converts simple textfiles to GNU texinfo-files =head2 SYNOPSIS text2texi TEXT-FILENAME OUTPUT-TEXINFO-FILENAME =head2 DESCRIPTION text2texi is a line-oriented Perl5 script that converts an ordinary textfile into a GNU texinfo input file. As sample is available L. You'll need none of the L to use this script. text2texi will start the output with the line C<\input texinfo> and terminate it gracefully with a C<@bye> line. Between these texinfo marks, while applying character- and paragraph-oriented conversions, text2texi basically inserts your text literally. Therefore you can embed as many private texinfo tags as required (see L). =head2 SIMPLIFIED TEXINFO HEADER text2texi will parse a simplified texinfo header (optionally). This header is a list of texinfo C<@set> statements defined between C<@text2texi> ... C<@end> tags on separate lines. Each of the keywords C<@text2texi>, C<@end> and C<@set> must be placed in column 1 (no preceding whitespace allowed). Example, long header: @text2texi -*-mode:outline; fill-column:100-*- @set title Syntax and Semantic Aspects on C and C++ @set subtitle1 Translation Phases / Language Elements / Object Model @set subtitle2 Version @value{edition} @set edition 0.1b @set date $Writestamp: 2001-02-26 18:42:59 e685$ @set author1 Andreas Spindler @set author2 Various @set copyrightyears 1998-2001 @set copyrightowner @value{author1} @end Your text goes here... Example, short header: @text2texi -*- mode:doc; fill-column:100-*- @set title Syntax and Semantic Aspects on C and C++ @set subtitle Translation Phases / Language Elements / Object Model @set edition 0.1b @set author Andreas Spindler @set copyrightyears 1998-2001 @end Your text goes here... In the short version, text2texi implicitly parses C as C and C as C. Furthermore, it sets C to "Version @value{edition}", C and C to today's date (in US style), and C to "@value{author1}". =head2 MARKING UP CHARACTERS, WORDS AND PHRASES Format *text* and #text# as C<@strong{text}> (usually bold). Format >text< and _text_ as C<@emph{text}> (emphasized, usually italics). Format :text: as C<@code{text}> (source code). =head2 PARAGRAPH-ORIENTED CONVERSIONS =head3 Indeces The following syntax rule create index entries: entry: key '{' text-spec '}' concept-key: 'C' // concept index | 'K' // key index | 'V' // variables index | 'F' // functions index | 'T' // data types index | 'AU' // author index | 'HY' // hyperlink index text-spec: non-whitespace-character-sequence '"' any-character-sequence '"' Example A: define the concepts of I, I and I. Searching for language objects in the syntax tree by object name (identifier) is called C{name lookup}. C and C++ uses two algorithms: C{ordinary name lookup} (OL) or C{argument-dependent name lookup} (ADL). ADL is oftenly called C{koenig lookup}. Example B: Define the concept of C and its author. The concept of C{foo} in software design is mysterious. As far as I know, the word is taken from the spell "fee, fie, foo" in the book "Alice in Wonderland", written by AU{Lewis Caroll}. =head3 Hyperlinks B hyperlink: text-spec '@@' text-spec text-spec '@@' text-spec: non-whitespace-character-sequence '"' any-character-sequence '"' The I on the right side of the I declaration servers as the URL specification, the I on the left as the text that receives a link (TEXT). Blanks are not allowed before and after I<@@>. The whole definition must fit into one line. B If URL is omitted TEXT serves as both, TEXT and URL. Example: http://www.cpan.org @@ If C<@@> is preceed by a double-quoted character sequence matches the whole sequence as TEXT; otherwise just matches characters back to the next whitespace. Example: "Fee Fie Foo" @@ "http://www.prismtk.de" If TEXT would not have been enclosed by double-quotes, text2texi would have just hyperlinked "Foo" instead of "Fee Fie Foo". You are free to use native texinfo-code to create links. Example: C<@uref{LINK,TEXT}> text2texi puts all hyperlinks automatically into the HYPERLINKS INDEX. =head3 Headings Convert lines starting with asterisks in the first column. '*' => @chapter '**' => @section '***' => @subsection '****' => @subsubsection Background for this notation: the '*' style is used by default in Emacs outline mode. =head3 Monospaced Paragraphs Indented lines outside of L<"Enumerations"> are implicitly placed between texinfo C<@example> and C<@end example> directives. =head3 Footnotes ^^ your text ^^ This definition is converted to the texinfo-code C<@footnote{your text}> Whitespaces after the opening and before the closing I<^^> are ignored. The definition can span multiple lines: ^^ your text ^^ Note that footnotes cannot be nested. The following text defines two footnotes in a row ^^ your text ^^ your other text ^^ now continues ^^ The first footnote defined here is "your text", the other is "now continues". =head3 Bulleted Lists and Enumerations To create bulleted lists just use 0 1 2 3 4 5 6 7 8 9 + * - If :*: is used the list item must be intended; otherwise it is misinterpreted as a heading. Blocks with multiple lines are parsed correctly if lines "hang" on the itemizing character. For example - fee fie foo If "- fee" is at column one, "fie" and "foo" both must be preceeded by two blanks. In case the indentation changes or a blank line appears, either the current list is closed or a new one is opened. Between items, blanks lines are permitted - fee - fie - foo =head3 Multicolumn Tables To begin a table (using C<@multitable>), a text line must contain a heading name (at column 1) followed by a column delimiter. More heading names and column delimiters are permitted but not required. A heading name is a character sequence made of alphanumeric characters, digits and '_'. No whitespace is permitted within the heading name lexeme. Columns are delimited with C<|> or C<;>. Empty columns can be defined using C<||>, C<;;;> etc. Whitespace before and after the delimiter is ignored. The first blank line closes the table. Each row in the table must appear in exactly one line. Example: . (ordinary text) . . KEY | FUNCTION | DESCRIPTION :C-x C-f: | :find-file: | Open a file and create a buffer with its contents. :C-x C-c: | :save-buffers-kill-emacs: | Offer to save each modified buffer, then terminate this Emacs process. :C-x C-w: | :write-file: | Write current buffer into a physical file (prompts for filename). | :revert-buffer: | Replace current buffer text with the text of the visited file on disk, undoing all changes. :C-x C-b: | :list-buffers: | Display buffer list in a separate window. (blank line) . . (ordinary text) . =head3 Monospaced Text By default indented text is interpreted as "sample text". This means it renders in a typewrite font (is monospace) and is not interpreted (text2texi definitions are not recognized). Exception: the first character opens a Bulleted List or Enumeration. =head2 EMBEDDING NATIVE TEXINFO-CODE You enable and disable the "texinfo-fall-through-mode" in text2texi with the directive C<@texinfo> on separate lines. Example: . (ordinary text) . . @texinfo @table @code @item --help @itemx -h Display program usage details. @end table @texinfo . . (ordinary text) . =cut =head2 INSTALLATION You'll just need the script, I to be placed somewhere into your load path. On DOS/Windows systems, it is convenient to convert it to a .bat-file first: p2bat texi2text.pl The I utility is part of the L. =cut =head2 HISTORY The motivation for this script was to have a less cryptic document format than texinfo itself. I wondered how anyone was able to edit big files in texinfo, since at least 30% of the information you are looking at controls formatting and organization of your document. I found this situation inacceptable. My idea was to use just Emacs' I and I to edit documents. The documents should be convertable to full-featured HTML; therefore the output had to be first compatible with I (not yet TeX), so I could use I. The implemenation of I is very straightforward; the compilation happens in just one pass. [Note: Please make sure that you had a look on Perls' POD (I) format. POD, together with its rich and growing set of document converters, has good chances to be very useful for you.] =cut =head2 EXAMPLES See L. =head1 SEE ALSO =over 4 =head1 AUTHOR / COPYRIGHT Author: Andreas Spindler (info@prismtk.de). Copyright (c) 1998. This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself. Permission to copy, use, modify, sell and distribute this software is granted provided this copyright notice [and possibly a note that this code was modified] appears in all copies. THIS SOFTWARE IS PROVIDED "AS IS" WITHOUT EXPRESSED OR IMPLIED WARRANTY, AND WITH NO CLAIM AS TO ITS SUITABILITY FOR ANY PURPOSE. Maintained at http://www.prismtk.de. =cut