#!/usr/local/bin/perl ######################################################################## # This program uses ghostscript and its pbm driver to extract absolute # bounding box information for any postscript file. # # The output of the program is the bounding box for every page in the # document. # # This program could certainly be made faster if written in a compiled # language, but the time spent within it is probably neglible compared # to the time spent by ghostscript. # # Bugs: # The program may be confused by files that directly set the page # size. # # Copyright (C) 1999 Dov Grobgeld <dov@imagic.weizmann.ac.il> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # For more details see the file COPYING. ######################################################################## $GSPATH = "gs"; # Your path to gs while( $_ = $ARGV[0], /^-/) { shift; /-res/ && do { $baseres = shift; next }; /-letter/ && do { $pagesize = 'letter'; next }; /-a4/ && do { $pagesize = 'a4'; next; }; /-pages/ && do { $maxpages = shift; next; }; /-evenodd/ && do { $evenodd++; next; }; /-insert/ && do { $insert++; next; }; /-pad/ && do { $pad = shift; next; }; /-help/ && do { print <<EOHELP; exit; }; $0 - Calculate the bounding boxes of all pages in a PostScript file Syntax: $0 [-res r] [-letter|-a4] filename Description: The program outputs the page number and the Bounding Box parameters for every page in the file filename . $0 accomplishes this by calling ghostscript, which must be in the path, at the resolution given by res, with output device pbm . The bounding box information is then extracted by scanning the pbm file. Options: -res res Choose a resolution for ghostscript other than the default 72 lpi. -letter -a4 Choose paper size. Default is a4. -evenodd Report separate document bounding boxes for odd and even pages -pages n Set max number of pages to analyze to n. -insert Edit the postscript file and change/insert a EPSF directive and a BoundingBox statement. -pad p Padd bounding box on all sides with p postscript points. Requirements: Ghostscript in path with pbmraw support compiled in. Author: Dov Grobgeld, Rehovot, Israel, 1997 EOHELP die "Unknown option $_!\n"; } # defaults $pagesize = 'a4' unless $pagesize; $baseres = 72 unless $baseres; $maxpages = 1_000_000 unless $maxpages; # table of known paper sizes. # perl5 # %papersizes = ( 'a4'=> [595,842], 'letter', [612,792]); # ($pw,$ph) = @{$papersize{$pagesize}}; %paperwidth = ( 'a4', 595, 'letter', 612 ); %paperheight = ( 'a4', 842, 'letter', 792 ); $pw = $paperwidth{$pagesize}; $ph = $paperheight{$pagesize}; # Calculate a resolution of about $baseres that gives the pixelwidth # divisible by 8. $w8 = int($pw / 72 * $baseres /8 + 0.5) * 8; $res = 72.0*$w8/$pw; # create an empty row for optimization $emptyrow = "\0" x ($w8/8); # document bounding box initialization $docllx=$pw; $doclly=$ph; $docurx=0; $docury=0; $odddocllx=$pw; $odddoclly=$ph; $odddocurx=0; $odddocury=0; $evendocllx=$pw; $evendoclly=$ph; $evendocurx=0; $evendocury=0; # get filename $fn = shift || die "Expected postscript file name!\n"; -f $fn || die "No such file $fn!\n"; # Print header print " Page: llx lly urx ury\n"; $gsopt = "-q -dNOPAUSE -sPAPERSIZE=$pagesize -sDEVICE=pbmraw -sOutputFile=-"; open(GS, "$GSPATH -r$res $gsopt -- $fn|"); $|++; $page=0; while(!eof(GS)) { chop($_=<GS>); die "Expected P4 but got '$_'. \nProbably a PostScript error...\n" unless /P4/; while(<GS>) { last unless /^#/; } ($w,$h)=split(" ", $_); unless ($w8 == $w) { warn "Warning! Expected bitmap width $w8 but got $w. Adjusting resolution...\n"; $res *= $w8 / $w; close(GS); open(GS, "$GSPATH -r$res $gsopt -- $fn|"); next; } $page++; printf " %4d ", $page; $topmarg = 0; $botmarg = 0; $leftmarg = $w; $rightmarg = $w; $top=1; for $i (1..$h) { read(GS, $_, $w/8 ); # Check for an empty row #if ($_ eq $emptyrow) if ($_=~ /^\00*$/) { if ($top) { # Still scanning top margin? $topmarg++; } else { $botmarg = 0 unless $bot; # reset the bottom margin $bot=1; $botmarg++; } } # Otherwise get the left and right margins of the row else { $bot=0; $top=0; # Not counting top and bottom margins anymore # Get left margin of line /^(\00*)([^\00])/; $lmcand=length($1)*8; if ($lmcand < $leftmarg) { ($b=unpack('B*',$2))=~ /^0*/; $lmcand+= length($&); $leftmarg = $lmcand if $lmcand < $leftmarg; } # Get right margin of line /([^\00])(\00*)$/; $rmcand=length($2)*8; if ($rmcand < $rightmarg) { ($b = unpack('B*', $1))=~ /0*$/; $rmcand+= length($&); $rightmarg = $rmcand if ($rmcand < $rightmarg); } } } # scale and translate to postscript points $scale = 72/$res; $ury= ($h-$topmarg)*$scale; $lly= $botmarg*$scale; $llx= $leftmarg*$scale; $urx= ($w-$rightmarg)*$scale; $lly= $h*$scale if $lly==$ury && $lly==0; printf "%5.f %5.f %5.f %5.f\n", $llx, $lly, $urx, $ury; if ($pad) { $llx-=$pad; $lly-=$pad; $urx+=$pad; $ury+=$pad; } if ($evenodd) { if ($page % 2) { # odd $odddocllx=$llx if $llx<$odddocllx; $odddoclly=$lly if $lly<$odddoclly; $odddocurx=$urx if $urx>$odddocurx; $odddocury=$ury if $ury>$odddocury; } else { $evendocllx=$llx if $llx<$evendocllx; $evendoclly=$lly if $lly<$evendoclly; $evendocurx=$urx if $urx>$evendocurx; $evendocury=$ury if $ury>$evendocury; } } $docllx=$llx if $llx<$docllx; $doclly=$lly if $lly<$doclly; $docurx=$urx if $urx>$docurx; $docury=$ury if $ury>$docury; if ($page >= $maxpages) { print "skipping rest of file...\n"; last; } } print "Document: "; printf "%5.f %5.f %5.f %5.f\n", $docllx, $doclly, $docurx, $docury; if ($evenodd) { print "Odd: "; printf "%5.f %5.f %5.f %5.f\n", $odddocllx, $odddoclly, $odddocurx, $odddocury; print "Even: "; printf "%5.f %5.f %5.f %5.f\n", $evendocllx, $evendoclly, $evendocurx, $evendocury; } if ($insert) { rename($fn, "$fn~"); open(PSIN, "$fn~"); open(PSOUT, ">$fn"); print PSOUT "%!PS-Adobe-1.0 EPSF-1.0\n"; print PSOUT "%%BoundingBox: "; printf PSOUT "%5.f %5.f %5.f %5.f\n", $docllx, $doclly, $docurx, $docury; print PSOUT "%%Comment: Bounding box extracted by bboxx\n"; print PSOUT "%%+: A program by Dov Grobgeld 1994\n"; while(<PSIN>) { next if $.==1 && /^%!/; next if /^%%BoundingBox: /; print PSOUT; } }