package PDFREP; #-----------------------------------------------------------------------------# # # # PDFREP # # # # OVERVIEW # # # # This module is used to create a basic data generated PDF file it's main # # Purpose is to be generic and usable by all perl scripts in an easy manner # # It does create all the indexes, but not the thumbnails # # # # DEVELOPMENT # # # # STARTED 11th May 2000 # # COMPLETED # # # # VERSION 0.002 # # # # WRITTEN BY Trevor Ward # # # # MODIFICATION INDEX # # # # This comments area to include all modifications from Version 1.0 # # The version number to be incremented by .1 for each modification # # # # Date Version By Comments # # # #-----------------------------------------------------------------------------# use strict; use English; #use CGI; #-----------------------------------------------------------------------------# # GLOBAL VARIABLES # # # # The following list details the global variables and the functions they are # # Updated in and used in # # # # $objcount - This is used to store the total amount of objects created # # It is updated in the ??????? function # # It is output in the trailer function # # # # $startxref - This is used to store the cross reference start value # # It is updated in the ????????? function # # It is output in the trailer function # # # # $rc - This is used as the return code to the calling program which allows # # for the checking of print return codes. # # # # %pdoffs - This hash is used to store the byte offset of the new object # # when created for use by the cross reference. This should then # # ba able to create the index afterwards # # # # $offset - This is used to store the current offset value from all the text # # which has been printed to the file # # # # $pagecnt - This is used to store up the total count of new pages within the # # pdf file. # # # # %pageref - This is used to store up the page number reference as the key # # and the object reference of the page. # # # # %fontstr - This is used to store the font internal name and the font's # # physical name for all the fonts defined. # # # # $filetyp - This is used to store the physical location of the PDF data file # # # # $temptyp - This is used to store the physical location of the TMP data file # # # # $fontcnt - This is used to store the total number of fonts for calculation # # # # @pdpageline - This is the variable used to store the lines of data to be # # output as the text within the document # # # # $pditem - This is used within the pagedata sub as a global counter which # # needs retaining # # # # $pdlcnt - This keeps track of how many line of data have to be written in # # the sub pagedata. # # # # $pdlgth - This is used within the pagedata sub as the total length of the # # data passed to the stream part of the pdf file # # # # $lnum - This is used to store the current line number of the page. It # # starts at line 80 top of page and subtracts down. # # # # $lcnt - This is used to check the amount of lines written out to the page # # # #-----------------------------------------------------------------------------# # These variables are initialised within the heading sub routine my ($objcount, $startxref, $rc, %pdoffs, $offset, $pagecnt, %pageref, %fontstr, $filetyp, $temptyp); my ($fontcnt, @pdprintline, $item, $lcnt, $pdlgth); # #my (@pdpageline, $pditem, $pdlcnt, $pdlgth, $lnum, $lcnt); #-----------------------------------------------------------------------------# # SUB HEADING # # # # This receives the file name and directory from the calling program and # # Opens the output file for the first time writing the PDF Header record # # It returns the Message and Status code as with all the functions within # # this package Status code 0 is succesful and 1 is failure. It also # # Initialises all the global variables and counters used # #-----------------------------------------------------------------------------# sub heading { # Receive the passed variables # $callpgm is always PDFREP set by using the Package information # $filenam is the name of the output PDF file required # $filedir is the directory for this file to be created in # @rubbish is a catchall filed used incase additional parameters are entered - this is not used my ($callpgm, $filenam, $filedir, @rubbish) = @_; # Initialise all the global variables %pdoffs = "0"; $offset = "0"; $objcount = "0"; $startxref = "0"; $pagecnt = "0"; %pageref = ''; %fontstr = ''; $fontcnt = '0'; $rc = "0"; # Set the heading text value this will remain constant my $heading = "%PDF-1.0\n"; # Check the passed parameters contain values return false if not my $mess; if (!$filenam) { $mess = "No File Name"; return ('0', $mess); } if (!$filedir) { $mess = "No Directory Details"; return ('0', $mess); } # Create the data file name variable and open the file return false if file open fails # Also create the temporary work file which is used to store the page data $filetyp = $filedir . $filenam . ".pdf"; $temptyp = $filedir . $filenam . ".tmp"; open(PDFFILE, "> $filetyp") || warn return ('0' , "File open failure - $filetyp - $!"); open(TMPFILE, "> $temptyp") || warn return ('0' , "File open failure - $temptyp - $!"); # Write the heading record to the file check the return value $rc = print PDFFILE "$heading"; $offset = $offset + length($heading); if (!$rc) { return ('0', 'PDFREP Write PDF File Failure - Heading'); } # Call the Catalogue sub which produces the catalogue object $rc = &catalog(); if (!$rc) { return ('0', 'PDFREP Write PDF File Failure - Catalog'); } # Call the Outline sub which produces the Outlines object $rc = &outlines(); if (!$rc) { return ('0', 'PDFREP Write PDF File Failure - Outline'); } # Set the object count to two for the first three objects just created $objcount = 2; # Return the succesful message and true value to the called program. return ('1', "PDFREP Heading Succesful"); } #-----------------------------------------------------------------------------# # SUB CATALOG # # # # This sub produces the catalog reference which is used to identify the # # Pages object and the Outlines object Which are also fixed objects numbers. # # The Catalog object number is always 1 # # This sub is called from the heading sub as it is fixed # #-----------------------------------------------------------------------------# sub catalog { my @catline = ''; my $item = ''; # Setup the array of all the data required to produce the catalog object $catline[0] = "1 0 obj\n"; $catline[1] = "<<\n"; $catline[2] = "/Type /Catalog\n"; $catline[3] = "/Pages 3 0 R\n"; $catline[4] = "/Outlines 2 0 R\n"; $catline[5] = ">>\n"; $catline[6] = "endobj\n"; # Set the Offset for this object in the offset hash store $pdoffs{1} = $offset; # Write out the data to the PDF file check the return code and throw error if failure foreach $item (@catline) { $rc = print PDFFILE "$item"; # Calculate the new offset depending on the data passed although the data is fixed this # method uses the actaul characters written $offset = $offset + length($item); if (!$rc) { return 0; } } return 1; } #-----------------------------------------------------------------------------# # SUB OUTLINES # # # # This sub produces the outlines object reference in a fixed format during # # testing at anyway. # # The Outlines object is always number 2 # # This sub is called from the heading sub as it is fixed # #-----------------------------------------------------------------------------# sub outlines { my @outline = ''; my $item = ''; # Setup the data into the array required for the Outlines object $outline[0] = "2 0 obj\n"; $outline[1] = "<<\n"; $outline[2] = "/Type /Outlines\n"; $outline[3] = "/Count 0\n"; $outline[4] = ">>\n"; $outline[5] = "endobj\n"; # Set the offset for this object using the offset hash $pdoffs{2} = $offset; # Write out the data to the PDF file check the return code and throw error if failure foreach $item (@outline) { $rc = print PDFFILE "$item"; # Calculate the new offset depending on the data passed although the data is fixed this # method uses the actaul characters written $offset = $offset + length($item); if (!$rc) { return 0; } } return 1; } #-----------------------------------------------------------------------------# # SUB FONTSET # # # # This sub is where the font will be set during page creation. Hopefully # # this is will sort out all font changes within the text which is to be # # printed. It accepts the font name from the calling program # #-----------------------------------------------------------------------------# sub fontset { # Receive the passed variables # $callpgm is always PDFREP set by using the Package information # $fontnam is the internal name of the font # $fonttyp is the physical font used # @rubbish is a catchall filed used incase additional parameters are entered - this is not used my ($callpgm, $fontnam, $fonttyp, @rubbish) = @_; # Now need to store these values until after the pages have been created into a global hash # storing the font name as the key and adding 1 to the total font counter $fontstr{$fontnam} = $fonttyp; $fontcnt++; # Return a succesful code return ('1', 'PDFREP Font Set Succesful'); } #-----------------------------------------------------------------------------# # SUB PAGEDATA # # # # This sub is where the page data is set it is run after the page head has # # been run and it produces a line of data at a time to enable the page to be # # built as opposed to constructed. It receives various parameters prior to # # the text. # # Type of Info # # np = new page # # nl = new line # # # #-----------------------------------------------------------------------------# sub pagedata { # Receive the passed variables # $callpgm is always PDFREP set by using the Package information # $ltype this is the type of data either new page (np) or new line (nl) # $lcol this is the column offset from the left hand side of the page # $lfont this is the size of the font # $nfont this is the internal name of the font # $ldata this is the actual text data to be used # @rubbish is a catchall filed used incase additional parameters are entered - this is not used my ($callpgm, $ltype, $lcol, $lfont, $nfont, $nextf, $ital, $red, $green, $blue, $ldata, $psize, $porin, @rubbish) = @_; # Keep a check on the line count per page current maximum is 38 if over blow away files return error if ($ltype eq 'nl') { $lcnt = $lcnt - $lfont; $rc = print TMPFILE "$red $green $blue rg $lcol $nextf Td ($ldata) Tj\n"; if (!$rc) { return ('0', 'PDFREP Write TMP File Failure - New Line'); } $rc = print TMPFILE "/$nfont $lfont Tf 1 0 $ital 1 10 $lcnt Tm\n"; if (!$rc) { return ('0', 'PDFREP Write TMP File Failure - New Line'); } if ($lcnt <= 10) { &crashed(); return ('0', 'PDFREP Write Page over Max Lines - Files Deleted'); } } if ($ltype eq 'np') { $lcnt = '760'; $lcnt = '760' if ($psize eq 'LE' && $porin eq 'PO'); $lcnt = '582' if ($psize eq 'LE' && $porin eq 'LA'); $lcnt = '810' if ($psize eq 'A4' && $porin eq 'PO'); $lcnt = '565' if ($psize eq 'A4' && $porin eq 'LA'); $pagecnt++; # After reseting line count and incrementing page count output unique line for identification to tmp file $rc = print TMPFILE "XXXXXXXXXXNEW PAGE - $pagecnt\n"; if (!$rc) { return ('0', 'PDFREP Write TMP File Failure - New Page'); } $rc = print TMPFILE "$red $green $blue rg $lcol $nextf Td($ldata) Tj\n"; if (!$rc) { return ('0', 'PDFREP Write TMP File Failure - New Line'); } $rc = print TMPFILE "/$nfont $lfont Tf 1 0 $ital 1 10 $lcnt Tm\n"; if (!$rc) { return ('0', 'PDFREP Write TMP File Failure - New Line'); } } return ('1', "PDFREP Page Data Succesful"); } #-----------------------------------------------------------------------------# # SUB WRITEPDF # # # # This is the final subroutine called from the caling program. It writes the # # PDF file from all the data input so far with all the references and so on # # It is interesting in the fact that it has to be called but it also ends the # # PDFREP program's output- Don't try adding anymore after this # # ----------------------------------------------------------------------------# sub writepdf { # Lets start by closing and opening the TMPFILE which stores the page data. # To get it back to the first record. my ($pgmname, $psize, $porin) = @_; print TMPFILE "XXXXXXXXXXNEW END - 0\n"; close(TMPFILE) || warn return ('0' , "File Close failure - $temptyp - $!"); open(TMPFILE, "< $temptyp") || warn return ('0' , "File open failure - $temptyp - $!"); # Now it's time to initialise the variables which are used to output the data # @pdprintline - this is used to store the data ready for printing. Local because not needed elsewhere my @pdprintline = ''; my $lcnt = '0'; my $item = ''; my $pobjcnt = '0'; # read first line of page data file and split it down my $firstln = ; chomp $firstln; my ($pt1, $pt2, $pt3, $pt4) = split(/\s/, $firstln); # Write the Pages Header Object Which calculates the total number of objects and the page objects # It works on 2 objects per page and 1 object per font thus needing these object numbers # OK Calculate the page and font object numbers. # Use two hashes to store the data. my %pagenum = ''; my %fontnum = ''; my $tmpcnt = '000'; my $tmpobj = $objcount + 2; while ($pagecnt > $tmpcnt) { $pagenum{$tmpcnt} = $tmpobj; $tmpcnt++; while (length($tmpcnt) < 3) { $tmpcnt = "0" . $tmpcnt; } $tmpobj = $tmpobj + 2; } foreach $item (sort keys(%fontstr)) { $fontnum{$item} = $tmpobj; $tmpobj++; } # Start of new object add object count, should = 3 for this object. # Update Offset for this object. $objcount++; $pdoffs{$objcount} = $offset; $pobjcnt = $objcount; $pdprintline[$lcnt] = "$objcount 0 obj\n"; $lcnt++; $pdprintline[$lcnt] = "<<\n"; $lcnt++; $pdprintline[$lcnt] = "/Type /Pages\n"; $lcnt++; $pdprintline[$lcnt] = "/Count $pagecnt\n"; $lcnt++; # Setup the kids info $pdprintline[$lcnt] = "/Kids ["; foreach $item (sort keys(%pagenum)) { $pdprintline[$lcnt] = $pdprintline[$lcnt] . " $pagenum{$item} 0 R"; } $pdprintline[$lcnt] = $pdprintline[$lcnt] . "]\n"; $lcnt++; $pdprintline[$lcnt] = ">>\n"; $lcnt++; $pdprintline[$lcnt] = "endobj\n"; $lcnt++; # Write the Pages object out foreach $item (@pdprintline) { $rc = print PDFFILE "$item"; $offset = $offset + length($item); if (!$rc) { return ('0', 'PDFREP Write PDF File Failure - Pages Object'); } } # OK now it's time to produce the page data output which will use the font's defined within the # Page heading and the temporary file to retrieve the actual page data. $tmpcnt = '0'; while ($pagecnt > $tmpcnt) { $objcount++; $pdoffs{$objcount} = $offset; $lcnt = '0'; @pdprintline = '0'; $pdprintline[$lcnt] = "$objcount 0 obj\n"; $lcnt++; $pdprintline[$lcnt] = "<<\n"; $lcnt++; $pdprintline[$lcnt] = "/Type /Page\n"; $lcnt++; $pdprintline[$lcnt] = "/Parent $pobjcnt 0 R\n"; $lcnt++; $pdprintline[$lcnt] = "/Resources << "; # Setup the font references for for the page $pdprintline[$lcnt] = $pdprintline[$lcnt] . "/Font <<"; foreach $item (sort keys(%fontnum)) { $pdprintline[$lcnt] = $pdprintline[$lcnt] . " /$item $fontnum{$item} 0 R"; } $pdprintline[$lcnt] = $pdprintline[$lcnt] . " >> "; $pdprintline[$lcnt] = $pdprintline[$lcnt] . "/Procset [/PDF]\n"; $lcnt++; $pdprintline[$lcnt] = ">>\n"; $lcnt++; my $ncnt = $objcount + 1; if ($psize eq 'LE' && $porin eq 'PO') { $pdprintline[$lcnt] = "/MediaBox [0 0 612 792]\n"; } if ($psize eq 'LE' && $porin eq 'LA') { $pdprintline[$lcnt] = "/MediaBox [0 0 792 612]\n"; } if ($psize eq 'A4' && $porin eq 'PO') { $pdprintline[$lcnt] = "/MediaBox [0 0 595 842]\n"; } if ($psize eq 'A4' && $porin eq 'LA') { $pdprintline[$lcnt] = "/MediaBox [0 0 842 595]\n"; } $lcnt++; $pdprintline[$lcnt] = "/Contents $ncnt 0 R\n"; $lcnt++; $pdprintline[$lcnt] = ">>\n"; $lcnt++; $pdprintline[$lcnt] = "endobj\n"; $lcnt++; # Write the Page object out foreach $item (@pdprintline) { $rc = print PDFFILE "$item"; $offset = $offset + length($item); if (!$rc) { return ('0', 'PDFREP Write PDF File Failure - Page $objcount Object'); } } @pdprintline = ''; $lcnt = '0'; # So now it's time to write out the page data. # Lets get the page data for the current page. my $ncnt = $tmpcnt + 1; if ($pt1 eq 'XXXXXXXXXXNEW' && $pt4 eq $ncnt) { $objcount++; $pdlgth = 6; $pdoffs{$objcount} = $offset; $pdprintline[$lcnt] = "endobj\n"; $lcnt++; $pdprintline[$lcnt] = "endstream\n"; $lcnt++; $pdprintline[$lcnt] = "ET\n"; $lcnt++; while () { my $ldata = $_; chomp $ldata; ($pt1, $pt2, $pt3, $pt4) = split (/\s/, $ldata); $ncnt = $tmpcnt + 1; if ($pt1 eq 'XXXXXXXXXXNEW' && $pt4 ne $ncnt) { $pdprintline[$lcnt] = "BT\n"; $lcnt++; $pdprintline[$lcnt] = "stream\n"; $lcnt++; $pdprintline[$lcnt] = "<< /Length $pdlgth >>\n"; $lcnt++; $pdprintline[$lcnt] = "$objcount 0 obj\n"; $lcnt++; my $tmplgth = @pdprintline; $tmplgth--; my $tmplgt1 = 0; my @pdprintlin1; while ($tmplgth >= 0) { $pdprintlin1[$tmplgt1] = $pdprintline[$tmplgth]; $tmplgth--; $tmplgt1++; } foreach $item (@pdprintlin1) { $rc = print PDFFILE "$item"; $offset = $offset + length($item); if (!$rc) { return ('0', 'PDFREP Write PDF File Failure - Page $objcount Data'); } } last; } $pdprintline[$lcnt] = $ldata . "\n"; $pdlgth = $pdlgth + length($pdprintline[$lcnt]); $lcnt++; } } $tmpcnt++; } # Well were getting there guess what comes now # Your right it's the font definitions. foreach $item (sort keys(%fontstr)) { @pdprintline = ''; $lcnt = '0'; $objcount++; $pdoffs{$objcount} = $offset; $pdprintline[$lcnt] = "$objcount 0 obj\n"; $lcnt++; $pdprintline[$lcnt] = "<<\n"; $lcnt++; $pdprintline[$lcnt] = "/Type /Font\n"; $lcnt++; $pdprintline[$lcnt] = "/Subtype /Type1\n"; $lcnt++; $pdprintline[$lcnt] = "/Name /$item\n"; $lcnt++; $pdprintline[$lcnt] = "/BaseFont /$fontstr{$item}\n"; $lcnt++; $pdprintline[$lcnt] = "/Encoding /MacRomanEncoding\n"; $lcnt++; $pdprintline[$lcnt] = ">>\n"; $lcnt++; $pdprintline[$lcnt] = "endobj\n"; $lcnt++; foreach $item (@pdprintline) { $rc = print PDFFILE "$item"; $offset = $offset + length($item); if (!$rc) { return ('0', 'PDFREP Write PDF File Failure - Font $objcount Object'); } } } # Now lets do the cross reference and trailer data bits &xreftrl(); &trailer(); close(PDFFILE) || warn return ('0' , "File close failure - $filetyp - $!"); close(TMPFILE) || warn return ('0' , "File close failure - $temptyp - $!"); # $rc = unlink $temptyp; if (!$rc) { return ('0', "CANNOT DELETE - $temptyp"); } return ("1", "PDFREP Write PDF Data Succesful"); } #-----------------------------------------------------------------------------# # SUB XREFTRL # # # # This sub is the cross reference creation sub. It takes all the input and # # places the required cross reference into the PDF file no parameters are # # passed or required. # #-----------------------------------------------------------------------------# sub xreftrl { my @xrefdata = ''; my $item = ''; my $xcnt = '0'; my $tlcnt = 0; $objcount++; $startxref = $offset; $xrefdata[0] = "xref\n"; $tlcnt++; $xrefdata[1] = "0 $objcount\n"; $tlcnt++; $xrefdata[2] = "0000000000 65535 f\n"; $tlcnt++; foreach $item (sort keys(%pdoffs)) { my $tdata = $pdoffs{$item}; my $tlgth = length($tdata); while ($tlgth < 10) { $tdata = "0$tdata"; $tlgth++; } $xrefdata[$tlcnt] = "$tdata 00000 n\n"; $tlcnt++; } foreach $item (@xrefdata) { $rc = print PDFFILE "$item"; # Calculate the new offset afer calculating the amount of characters written. $offset = $offset + length($item); if (!$rc) { return ('0', 'PDFREP Write PDF File Failure - Cross Reference'); } } return ('1', "PDFREP Cross Reference Succesful"); } #-----------------------------------------------------------------------------# # SUB TRAILER # # # # This receives the appropriate information from the calling program and # # uses the already opened pdf file to add the trailer record of the file # # It is the final part of the PDF file and contains all the required number # # of objects etc. It returns the required messages and status before closing # # The PDF File # #-----------------------------------------------------------------------------# sub trailer { my ($callpgm, @rubbish) = @_; print PDFFILE "trailer\n"; print PDFFILE "<<\n"; print PDFFILE "/SIZE $objcount\n"; print PDFFILE "/Root 1 0 R\n"; print PDFFILE ">>\n"; print PDFFILE "startxref\n"; print PDFFILE "$startxref\n"; print PDFFILE "%%EOF"; return ('1', 'PDFREP Trailer Succesful'); } #-----------------------------------------------------------------------------# # SUB CRASHED # # # # This does not receive any parameters all it does is an unlink on the open # # files and then closes the said files which should release any links and # # physical disc space used. It is called from either the PDFREP package or # # can be called from the controlling program in case of ending required # #-----------------------------------------------------------------------------# sub crashed { close(PDFFILE) || warn return ('0' , "File close failure - $filetyp - $!"); close(TMPFILE) || warn return ('0' , "File close failure - $temptyp - $!"); # $rc = unlink $filetyp; if (!$rc) { return ('0', "CANNOT DELETE - $filetyp"); } $rc = unlink $temptyp; if (!$rc) { return ('0', "CANNOT DELETE - $temptyp"); } return ('1', "FILE DELETION AND CLOSE WORKED SUCCESFULLY"); } 1;