#!/usr/local/bin/perl
########################################################################
#  This program uses ghostscript and its pbm driver to extract absolute 
#  bounding box information for any postscript file.
#
#  The output of the program is the bounding box for every page in the
#  document.
#
#  This program could certainly be made faster if written in a compiled
#  language, but the time spent within it is probably neglible compared 
#  to the time spent by ghostscript.
#
#  Bugs: 
#    The program may be confused by files that directly set the page
#    size.
#
#  Copyright (C) 1999 Dov Grobgeld <dov@imagic.weizmann.ac.il>
#  
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 of the License, or
#  (at your option) any later version.
# 
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
# 
#  You should have received a copy of the GNU General Public License
#  along with this program; if not, write to the Free Software
#  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
# 
#  For more details see the file COPYING.
########################################################################

$GSPATH = "gs"; # Your path to gs

while( $_ = $ARGV[0], /^-/) {
    shift;
    /-res/     && do { $baseres = shift; next }; 
    /-letter/  && do { $pagesize = 'letter'; next };
    /-a4/      && do { $pagesize = 'a4'; next; };
    /-pages/   && do { $maxpages = shift; next; };
    /-evenodd/ && do { $evenodd++; next; };
    /-insert/  && do { $insert++; next; };
    /-pad/     && do { $pad = shift; next; };
    /-help/    && do { print <<EOHELP; exit; };
$0 - Calculate the bounding boxes of all pages in a PostScript file

Syntax:
    $0 [-res r] [-letter|-a4] filename
    
Description:
    The program outputs the page number and the Bounding Box parameters
    for every page in the file filename .
       $0 accomplishes this by calling ghostscript, which must be in 
    the path, at the resolution given by res, with output device pbm . 
    The bounding box information is then extracted by scanning the pbm
    file.

Options:
    -res res  Choose a resolution for ghostscript other than the default
              72 lpi.
    -letter -a4  Choose paper size. Default is a4.
    -evenodd  Report separate document bounding boxes for odd and even pages
    -pages n  Set max number of pages to analyze to n.
    -insert   Edit the postscript file and change/insert a EPSF directive
              and a BoundingBox statement.
    -pad p    Padd bounding box on all sides with p postscript points.

Requirements:
    Ghostscript in path with pbmraw support compiled in.

Author:
    Dov Grobgeld, Rehovot, Israel, 1997
EOHELP
    die "Unknown option $_!\n";
}

# defaults
$pagesize = 'a4' unless $pagesize;
$baseres = 72 unless $baseres;
$maxpages = 1_000_000 unless $maxpages;

# table of known paper sizes.
# perl5
# %papersizes = ( 'a4'=> [595,842], 'letter', [612,792]);
# ($pw,$ph) = @{$papersize{$pagesize}};

%paperwidth =  ( 'a4', 595, 'letter', 612 );
%paperheight = ( 'a4', 842, 'letter', 792 );

$pw = $paperwidth{$pagesize}; $ph = $paperheight{$pagesize};

# Calculate a resolution of about $baseres that gives the pixelwidth 
# divisible by 8.
$w8 = int($pw / 72 * $baseres /8 + 0.5) * 8;
$res = 72.0*$w8/$pw;

# create an empty row for optimization
$emptyrow = "\0" x ($w8/8);

# document bounding box initialization
$docllx=$pw; $doclly=$ph; $docurx=0; $docury=0;
$odddocllx=$pw; $odddoclly=$ph; $odddocurx=0; $odddocury=0;
$evendocllx=$pw; $evendoclly=$ph; $evendocurx=0; $evendocury=0;

# get filename
$fn = shift || die "Expected postscript file name!\n";
-f $fn || die "No such file $fn!\n";

# Print header
print "    Page:   llx   lly   urx   ury\n";

$gsopt = "-q -dNOPAUSE -sPAPERSIZE=$pagesize -sDEVICE=pbmraw -sOutputFile=-";
open(GS, "$GSPATH -r$res $gsopt -- $fn|");

$|++;
$page=0;
while(!eof(GS)) {
    chop($_=<GS>);
    die "Expected P4 but got '$_'. \nProbably a PostScript error...\n" unless /P4/;
    while(<GS>) { last unless /^#/; }
    ($w,$h)=split(" ", $_);
    unless ($w8 == $w) {
	warn "Warning! Expected bitmap width $w8 but got $w. Adjusting resolution...\n";
	$res *= $w8 / $w;
	close(GS);
        open(GS, "$GSPATH -r$res $gsopt -- $fn|");
	next;
    }
    $page++;
    printf "    %4d  ", $page;
    $topmarg = 0;
    $botmarg = 0;
    $leftmarg = $w;
    $rightmarg = $w;
    $top=1;
    for $i (1..$h) {
	read(GS, $_, $w/8 );

        # Check for an empty row
	#if ($_ eq $emptyrow)  
	if ($_=~ /^\00*$/) {
	    if ($top) {   # Still scanning top margin?
	        $topmarg++;
	    }
	    else {
		$botmarg = 0 unless $bot; # reset the bottom margin
	        $bot=1;
	        $botmarg++;
	    }
	}
	# Otherwise get the left and right margins of the row
	else {
	    $bot=0; $top=0; # Not counting top and bottom margins anymore

	    # Get left margin of line
	    /^(\00*)([^\00])/;
	    $lmcand=length($1)*8;
	    if ($lmcand < $leftmarg) {
		($b=unpack('B*',$2))=~ /^0*/;
		$lmcand+= length($&);
		$leftmarg = $lmcand if $lmcand < $leftmarg;
	    }

            # Get right margin of line
	    /([^\00])(\00*)$/;
	    $rmcand=length($2)*8;
	    if ($rmcand < $rightmarg) {
		($b = unpack('B*', $1))=~ /0*$/;
		$rmcand+= length($&);
		$rightmarg = $rmcand if ($rmcand < $rightmarg);
	    }
	}
    }

    # scale and translate to postscript points
    $scale = 72/$res;
    $ury= ($h-$topmarg)*$scale;
    $lly= $botmarg*$scale;
    $llx= $leftmarg*$scale;
    $urx= ($w-$rightmarg)*$scale;
    $lly= $h*$scale if $lly==$ury && $lly==0;
    printf "%5.f %5.f %5.f %5.f\n", $llx, $lly, $urx, $ury;

    if ($pad) {
        $llx-=$pad;
        $lly-=$pad;
        $urx+=$pad;
        $ury+=$pad;
    }

    if ($evenodd) {
	if ($page % 2) {  # odd
	    $odddocllx=$llx if $llx<$odddocllx;
	    $odddoclly=$lly if $lly<$odddoclly;
	    $odddocurx=$urx if $urx>$odddocurx;
	    $odddocury=$ury if $ury>$odddocury;
	}
	else {
	    $evendocllx=$llx if $llx<$evendocllx;
	    $evendoclly=$lly if $lly<$evendoclly;
	    $evendocurx=$urx if $urx>$evendocurx;
	    $evendocury=$ury if $ury>$evendocury;
	}
    }
	    
    $docllx=$llx if $llx<$docllx;
    $doclly=$lly if $lly<$doclly;
    $docurx=$urx if $urx>$docurx;
    $docury=$ury if $ury>$docury;

    if ($page >= $maxpages) {
        print "skipping rest of file...\n";
        last;
    }
}
print "Document: ";
printf "%5.f %5.f %5.f %5.f\n", $docllx, $doclly, $docurx, $docury;
if ($evenodd) {
    print "Odd:      ";
    printf "%5.f %5.f %5.f %5.f\n", $odddocllx, $odddoclly, $odddocurx, $odddocury;
    print "Even:     ";
    printf "%5.f %5.f %5.f %5.f\n", $evendocllx, $evendoclly, $evendocurx, $evendocury;
}

if ($insert) {
    rename($fn, "$fn~");
    open(PSIN, "$fn~"); open(PSOUT, ">$fn");
    print PSOUT "%!PS-Adobe-1.0 EPSF-1.0\n";
    print PSOUT "%%BoundingBox: ";
    printf PSOUT "%5.f %5.f %5.f %5.f\n", $docllx, $doclly, $docurx, $docury;
    print PSOUT "%%Comment: Bounding box extracted by bboxx\n";
    print PSOUT "%%+:       A program by Dov Grobgeld 1994\n";
    
    while(<PSIN>) {
	next if $.==1 && /^%!/;
	next if /^%%BoundingBox: /;
	print PSOUT;
    }
}