#!/usr/local/bin/perl # Julius C. Duque use diagnostics; use strict; use warnings; use Getopt::Long; use TeX::Hyphen; my ($width, $hyphenate, $left, $centered, $right, $both); my ($indent, $newline); GetOptions("width=i" => \$width, "help" => \$hyphenate, "left" => \$left, "centered" => \$centered, "right" => \$right, "both" => \$both, "indent:i" => \$indent, "newline" => \$newline); my $hyp = new TeX::Hyphen; syntax() if (!$width); $indent = 0 if (!$indent); local $/ = ""; while (<>) { my @linein = split; printpar(@linein); print "\n" if ($newline); } sub printpar { my (@par) = @_; my $firstline = 0; while (@par) { $firstline++; my ($buffer, $word); my ($charcount, $wordlen) = (0, 0); my $linewidth = $width; if ($firstline == 1) { $linewidth -= $indent; print " " x $indent; } while (($charcount < $linewidth) and (@par)) { $word = shift @par; $buffer .= $word; $wordlen = length($word); $charcount += $wordlen; $buffer .= " "; $charcount++; } chop $buffer; $charcount--; if ($charcount == $wordlen) { $linewidth = $wordlen; my ($pos, $pre_word_len) = (0, 0); if ($hyphenate) { if ($word =~ /^([^a-zA-Z]*)([a-zA-Z-']+)([^a-zA-Z]*)$/) { my $pre_word = $1; $pre_word_len = length($pre_word); my $stripped_word = $2; $pos = hyphenate_word($stripped_word, $width); $pos = 0 if ($wordlen <= $width); } if ($pos) { $charcount = $pre_word_len + $pos; my $post_word = substr $word, $charcount; unshift(@par, $post_word); $buffer = substr $word, 0, $charcount; $buffer .= "-"; $charcount++; } } } my $lineout = $buffer; if ($charcount > $linewidth) { my ($pos, $pre_word_len) = (0, 0); if ($hyphenate) { if ($word =~ /^([^a-zA-Z]*)([a-zA-Z-']+)([^a-zA-Z]*)$/) { my $pre_word = $1; $pre_word_len = length($pre_word); my $stripped_word = $2; my $unfilled = $linewidth - $charcount + $wordlen - $pre_word_len + 1; $pos = hyphenate_word($stripped_word, $unfilled); } } $charcount -= $wordlen; if ($pos == 0) { $charcount--; unshift(@par, $word); } else { my $post_word = substr $word, ($pre_word_len + $pos); unshift(@par, $post_word); $charcount = $charcount + $pre_word_len + $pos; } $lineout = substr $buffer, 0, $charcount; if ($pos) { $lineout .= "-"; $charcount++; } } my $spaces_to_fill = $linewidth - $charcount; if ($centered) { my $leftfill = int($spaces_to_fill/2); print " " x $leftfill; } elsif ($right) { print " " x $spaces_to_fill; } elsif ($both) { my $tempbuf = $lineout; my $replacements_made = 0; if (@par) { my $reps = 1; while (length($tempbuf) < $linewidth) { last if ($tempbuf !~ /\s/); if ($tempbuf =~ /(\S+ {$reps})(\S+)/) { $tempbuf =~ s/(\S+ {$reps})(\S+)/$1 $2/; $replacements_made++; $tempbuf = reverse $tempbuf; } else { $reps++; } } } if ($replacements_made % 2 == 0) { $lineout = $tempbuf; } else { $lineout = reverse $tempbuf; } } print "$lineout\n"; } } sub hyphenate_word { my ($tword, $unfilled) = @_; my @hyphen_places = $hyp->hyphenate($tword); if (@hyphen_places) { @hyphen_places = reverse @hyphen_places; foreach my $places (@hyphen_places) { return $places if ($places < $unfilled - 1); } } return 0; } sub syntax { print "Usage:\n"; print " $0 --width=n [options] file1 [file2 file3 ...]\n"; print " cat file1 [file2 file3 ...] | $0 --width=n [options]\n\n"; print "Options:\n"; print "--width=n (or -w=n or -w n) Line width is n chars "; print "long\n"; print "--left (or -l) Left-justified"; print " (default)\n"; print "--right (or -r) Right-justified\n"; print "--centered (or -c) Centered\n"; print "--both (or -b) Both left- and\n"; print " right-justified\n"; print "--indent=n (or -i=n or -i n) Leave n spaces for "; print "initial\n"; print " indention (defaults "; print "to 0)\n"; print "--newline (or -n) Output an empty line \n"; print " between "; print "paragraphs\n"; print "--hyphenate (or -h) Hyphenate word that "; print "doesn't\n"; print " fit on a line\n"; exit 0; } =head1 NAME pawh - a small Perl script that reformats lines of ASCII text so that the resulting lines are justified in any of the following formats: left-justified (default), right-justified, centered, or both left- and right-justified. =head1 README Paragraph Adjuster with Hyphenation (PAwH) is a small Perl script that reformats lines of ASCII text so that the resulting lines are justified in any of the following formats: left-justified (default), right-justified, centered, or both left- and right-justified. PAwH has various switches, most are optional, to control its output. The only mandatory switch is the line width (--width). For PAwH to work properly, input paragraphs must be separated by blank lines. PAwH is also capable of hyphenating a word that cannot be accommodated on a line. =head1 DESCRIPTION Paragraph Adjuster with Hyphenation (PAwH) is a small Perl script that reformats lines of ASCII text so that the resulting lines are justified in any of the following formats: left-justified (default), right-justified, centered, or both left- and right-justified. PAwH has various switches, most are optional, to control its output. The only mandatory switch is the line width (--width). For PAwH to work properly, input paragraphs must be separated by blank lines. =head1 USAGE You can use PAwH in any of two ways: ./pawh.pl.pl --width=n [options] file1 [file2 file3 ...] or cat file1 [file2 file3 ...] | ./pawh.pl --width=n [options] where file1, file2, file3, and so on, are the files to be reformatted. There's only one output, though. =head1 SWITCHES The available switches are: --width=n (or -w=n or -w n) Line width is n chars long --left (or -l) Output is left-justified (default) --right (or -r) Output is right-justified --centered (or -c) Output is centered --both (or -b) Output is both left- and right-justified --indent=n (or -i=n or -i n) Leave n spaces for initial indention (defaults to 0) --newline (or -n) Insert blank lines between paragraphs --hyphenate (or -h) Hyphenate word that doesn't fit on a line =head1 EXAMPLES The following command reformats the file, LICENSE, so that the line width is at most 70 characters, both left- and right-justified, with blank lines inserted between consecutive paragraphs, and words that can't fit at the end of lines are hyphenated. pawh.pl --width=70 --both --newline --hyphenate LICENSE You can also use the shortened version: pawh.pl -w=70 -b -n -h LICENSE If you want to indent each paragraph, just use the --indent switch. Say, you want to indent the LICENSE file with 4 leading spaces, type: pawh.pl --width=70 --both --newline --hyphenate --indent=4 LICENSE or pawh.pl -w=70 -b -n -h -i=4 LICENSE =head1 GUI VERSION There is also a Perl/Tk version of pawh.pl, pawh-tk.pl. This Perl/Tk version is less flexible than the command-line version, though, because it can only read one file at a time. =head1 PREREQUISITE You need Jan Pazdziora's Perl module, TeX::Hyphen, available from the Comprehensive Perl Archive Network (CPAN), to use the hyphenation feature. For Windows users, you can install TeX::Hyphen by following these steps: 1. Uncompress the TeX::Hyphen module, TeX-Hyphen-0.140.tar.gz. 2. Descend (cd) into the TeX-Hyphen-0.140/lib and copy the TeX directory into \lib. For example, if your Perl binaries are installed on E:\Perl, copy the TeX directory into E:\Perl\lib. =head1 COPYRIGHT AND LICENSE Copyright (C) 2003 Julius C. Duque. Please read contact.html that comes with this distribution for details on how to contact the author. This library is free software; you can redistribute it and/or modify it under the same terms as the GNU General Public License. =pod OSNAMES any =pod SCRIPT CATEGORIES CPAN/Administrative Fun/Educational =cut