#! /bin/sh eval '(exit $?0)' && eval 'PERL_BADLANG=x;export PERL_BADLANG;: \ ;exec perl -x -S -- "$0" ${1+"$@"};#'if 0; eval 'setenv PERL_BADLANG x;exec perl -x -S -- "$0" $argv:q;#'.q #!perl -w +($0=~/(.*)/s);do$1;die$@if$@;__END__+if 0; # Don't touch/remove lines 1--7: http://www.inf.bme.hu/~pts/Magic.Perl.Header use strict; # Change by Thomas Esser, Sept. 1998: The above lines allows us to find # perl along $PATH rather than guessing a fixed location. The above # construction should work with most shells. # Modified by pts@fazekas.hu at and before Sun Feb 2 12:57:10 CET 2003 # A script to transform an EPS file so that: # a) it is guarenteed to start at the 0,0 coordinate # b) it sets a page size exactly corresponding to the BoundingBox # This means that when Ghostscript renders it, the result needs no # cropping, and the PDF MediaBox is correct. # c) the result is piped to Ghostscript and a PDF version written # # -- It needs a Level 2 PS interpreter. # -- If the bounding box is not right, of course, you have problems... # -- BoundingBox: (atend) is perfectly supported. # -- recommended options: --below --hires # # Sebastian Rahtz, for Elsevier Science # # now with extra tricks from Hans Hagen's texutil. # # History # 1999/05/06 v2.5 (Heiko Oberdiek) # * New options: --hires, --exact, --filter, --help. # * Many cosmetics: title, usage, ... # * New code for debug, warning, error # * Detecting of cygwin perl # * Scanning for %%{Hires,Exact,}BoundingBox. # * Scanning only the header in order not to get a wrong # BoundingBox of an included file. # * (atend) supported. # * uses strict; (earlier error detecting). # * changed first comment from '%!PS' to '%!'; # * corrected (atend) pattern: '\s*\(atend\)' # * using of $bbxpat in all BoundingBox cases, # correct the first white space to '...Box:\s*$bb...' # * corrected first line (one line instead of two before 'if 0;'; # 2000/11/05 v2.6 (Heiko Oberdiek) # * %%HiresBoundingBox corrected to %%HiResBoundingBox # 2001/03/05 v2.7 (Heiko Oberdiek) # * Newline before grestore for the case that there is no # whitespace at the end of the eps file. # 2003/02/02 (Szabó Péter) # * option --below # * removes DOS EPSF binary junk correctly # * adds all 3 BoundingBox DSC comments # * reads all 3 BoundingBox DSC comments, and picks the best # * forces BoundingBox to be an integer # * adds %%EndComments and proper %!PS-Adobe-?-? EPSF-?.? header # * adds %%Pages: # # Dat: calling ``showpage'' is not required, gs 6.50 works without it # Imp: detect error messages from GS, abort... # Imp: make it work for PDF input # Imp: make it work for PS non-EPS files ### program identification my $program = "epstopdf"; my $filedate="2003/02/02"; # my $filedate="2001/03/05"; my $fileversion="2.7p"; my $copyright = "Copyright 1998-2001 by Sebastian Rahtz et al.\nContains modifications by pts\@fazekas.hu"; my $title = "\U$program\E $fileversion, $filedate - $copyright\n"; ### ghostscript command name my($quote,$GS)=("'","gs"); ($quote,$GS) = ("\"","gswin32c") if $^O eq 'MSWin32' or $^O =~ /cygwin/i; ### options $::opt_help=0; $::opt_debug=0; $::opt_compress=1; $::opt_gs=1; $::opt_hires=0; $::opt_exact=0; $::opt_filter=0; $::opt_outfile=""; $::opt_below=0; ### usage my @bool = ("false", "true"); my $usage = <<"END_OF_USAGE"; ${title}Syntax: $program [options] Options: --help: print usage --outfile=: write result to --(no)filter: read standard input (default: $bool[$::opt_filter]) --(no)gs: run Ghostscript to create PDF (default: $bool[$::opt_gs]) --(no)compress: use compression (default: $bool[$::opt_compress]) --(no)hires: scan HiResBoundingBox (default: $bool[$::opt_hires]) --(no)exact: scan ExactBoundingBox (default: $bool[$::opt_exact]) --(no)debug: debug informations (default: $bool[$::opt_debug]) --(no)below: allow below baseline (default: $bool[$::opt_below]) Examples for producing 'test.pdf': * $program test.eps * produce postscript | $program --filter >test.pdf * produce postscript | $program -f -d -o=test.pdf Example: look for HiResBoundingBox and produce corrected PostScript: * $program -d --nogs -hires test.ps>testcorr.ps END_OF_USAGE ### process options use Getopt::Long; GetOptions ( "help!", "debug!", "filter!", "compress!", "gs!", "hires!", "below!", "exact!", "outfile=s", ) or die $usage; ### help functions sub debug { print STDERR "* @_\n" if $::opt_debug; } sub warning { print STDERR "==> Warning: @_!\n"; } sub error { die "$title!!! Error: @_!\n"; } sub errorUsage { die "$usage\n!!! Error: @_!\n"; } ### option help die $usage if $::opt_help; ### get input filename my $InputFilename = ""; if ($::opt_filter) { @ARGV == 0 or errorUsage "Input file cannot be used with filter option"; $InputFilename = "-"; debug "Input file: standard input"; } else { @ARGV > 0 or errorUsage "Input filename missing"; @ARGV < 2 or errorUsage "Unknown option or too many input files"; $InputFilename = $ARGV[0]; if ($ARGV[0] eq '-') { $::opt_filter=1; debug "Input file: standard input"; } else { -f $InputFilename or error "'$InputFilename' does not exist"; debug "Input filename:", $InputFilename; } } ### option compress my $GSOPTS; $GSOPTS = "-dUseFlateCompression=".($::opt_compress?"true":"false")." "; # $GSOPTS.=" -r72 -g99999x99999 "; $GSOPTS.=" -r72 -sPAPERSIZE=a4 "; # default $GSOPTS.=" -dCompatibilityLevel=1.2"; ### option BoundingBox types #**** pts **** # scan all of them and find the best { my $BBprint = "%%BoundingBox:"; $BBprint = "%%HiResBoundingBox:" if $::opt_hires; $BBprint = "%%ExactBoundingBox:" if $::opt_exact; debug "strongest BoundingBox comment:", $BBprint; } my $BBregex='%%(Hi[Rr]es|Exact|)BoundingBox:'; ### option outfile my $OutputFilename = $::opt_outfile; if ($OutputFilename eq "") { if ($::opt_gs) { $OutputFilename = $InputFilename; if (!$::opt_filter) { $OutputFilename =~ s/\.[^\.]*$//; $OutputFilename .= ".pdf"; } } else { $OutputFilename = "-"; # standard output } } if ($::opt_filter) { debug "Output file: standard output"; } else { debug "Output filename:", $OutputFilename; } ### option gs if ($::opt_gs) { debug "Ghostscript command:", $GS; debug "Compression:", ($::opt_compress) ? "on" : "off"; } ### open input file if ($::opt_filter) { open(IN, "<&STDIN") or error "Cannot open standard input"; } else { open(IN,"< $InputFilename") or error "Cannot open '$InputFilename'"; } binmode IN; #**** pts **** sub read_error() { error "read $InputFilename: $!" } my $bytes_left=-1; #** @param $_[0] number of bytes to read, or undef to read a line #** @return the string read sub readIN(;$) { my $S; return "" if $bytes_left==0; ## print STDERR "READ\n"; if (defined $_[0]) { read_error if 0>read IN, $S, $_[0] } else { $S=; read_error if !defined($S) and $!; $S="" if !defined $S; } if ($bytes_left<0) { # unlimited } elsif (length($S)>=$bytes_left) { $S=substr($S, 0, $bytes_left); $bytes_left=0; } else { $bytes_left-=length($S) } $S } ### scan first line, check for DOS eps (and remove DOS headers) #**** pts **** my $header=""; $_=""; { my $S; read_error if 0>read IN, $S, 1; error "$InputFilename: empty file" if 0==length($S); if ($S eq "\305") { # DOS EPSF header read_error if 29>read IN, $S, 29, 1; my ($eheader,$ps_ofs,$ps_len,$wmf_ofs,$wmf_len,$tif_ofs,$tif_len,$checksum)= unpack"A4VVVVVVv", $S; error "$InputFilename: bad DOS EPS" if $eheader ne "\305\320\323\306" or $ps_ofs<30; my($ps_end, $wmf_end, $tif_end)=($ps_ofs+$ps_len, $wmf_ofs+$wmf_len, $tif_ofs+$tif_len); $ps_ofs-=30; if (!seek IN, $ps_ofs, 1) { while ($ps_ofs>4096) { $ps_ofs-=4096; readIN 4096 } read_in $ps_ofs if $ps_ofs>0; } $bytes_left=($ps_end>$wmf_end and $ps_end>$tif_end) ? -1 : $ps_len; $S=readIN(1); } if ($S eq '%') { $S.=readIN; error "$InputFilename: won't read a PDF file" if substr($S,0,4)eq'%PDF'; error "$InputFilename: EPS DSC must be %!PS-Adobe" if substr($S,0,4)ne'%!PS'; # ^^^ Dat: mpost outputs "%!PS\n" } else { warning "$InputFilename: no PS ADSC header, BoundingBox not found\n" } $header=$S; } ### open output file if ($::opt_gs) { my $fn=$OutputFilename; $fn=~s@'@\\'@g if $quote eq "'"; my $pipe = "$GS -q -dBATCH -sDEVICE=pdfwrite $GSOPTS " . "-sOutputFile=$quote$OutputFilename$quote -"; debug "Ghostscript pipe:", $pipe; open(OUT,"|$pipe") or error "Cannot open Ghostscript for piped input"; } else { open(OUT,"> $OutputFilename") or error "Cannot write '$OutputFilename"; } die unless binmode OUT; ### variables and pattern for BoundingBox search my $bbxpatt = '[0-9eE\.\-]'; # protect backslashes: "\\" gets '\' my $BBValues = "\\s*($bbxpatt+)\\s+($bbxpatt+)\\s+($bbxpatt+)\\s+($bbxpatt+)"; #** @return arg rounded down to int sub floor($) { # Dat: Perl int() rounds towards zero $_[0] < 0 ? -int(-$_[0]) : int($_[0]) } #** @return arg rounded up to int sub ceil($) { $_[0] > 0 ? -int(-$_[0]) : int($_[0]) } my ($xoffset, $yoffset) = (0, 0); my $do_translate=0; #** @return PostScript code to be printed after the header sub CorrectBoundingBox($$$$) { my ($llx, $lly, $urx, $ury) = @_; debug "Old BoundingBox:", $llx, $lly, $urx, $ury; my ($width, $height) = ($urx - $llx, $ury - $lly); ($xoffset, $yoffset) = (-$llx, -$lly); # Dat: it is inherently impossible to tell GS that it shouldn't # recompress the images already compressed in the EPS file, but keep # them in their original, compressed form. So we rather instruct GS to # recompress my $pagedevice2set=" /AutoRotatePages /None /CompatibilityLevel 1.2 /UseFlateCompression true /AutoPositionEPSFiles false /AutoFilterGrayImages false /ConvertImagesToIndexed false /DownsampleMonoImages false /DownsampleGrayImages false /DownsampleColorImages false /AutoFilterColorImages false /EncodeMonoImages true /EncodeGrayImages true /EncodeColorImages true /AntiAliasMonoImages false /AntiAliasGrayImages false /AntiAliasColorImages false\n"; $::opt_below=0 if $lly>=0; ($llx,$lly,$urx,$ury)=(0,0,$urx-$llx,$ury-$lly) if !$::opt_below; my $bbx="%%BoundingBox: ".floor($llx)." ".floor($lly)." ". ceil ($urx)." ".ceil ($ury)."\n". "%%HiResBoundingBox: $llx $lly $urx $ury\n". "%%ExactBoundingBox: $llx $lly $urx $ury\n%%EndComments\n"; if ($::opt_below) { #**** pts **** # vvv we output a second /MediaBox here, and we'll remove the first one # (written by GS) later return "$bbx<< /PageSize [$width $height] $pagedevice2set >> setpagedevice mark /MediaBox [$llx $lly $urx $ury] /PAGE pdfmark mark /CropBox [$llx $lly $urx $ury] /PAGE pdfmark\n"; } debug "New BoundingBox: 0 0", $width, $height; debug "Offset:", $xoffset, $yoffset; if ($xoffset==0 and $yoffset==0) { return "$bbx<< /PageSize [$width $height] $pagedevice2set >> setpagedevice mark /CropBox [0 0 $width $height] /PAGE pdfmark\n" } $xoffset=0 if $xoffset==0; # get rid of `-0' $yoffset=0 if $yoffset==0; # get rid of `-0' $do_translate=1; "$bbx<< /PageSize [$width $height] $pagedevice2set >> setpagedevice mark /CropBox [0 0 $width $height] /PAGE pdfmark gsave $xoffset $yoffset translate\n" } ### scan header if (14096) { die unless seek(IN, $bytes_left-4096, 1); $bytes_left=4096; } elsif ($bytes_left<0) { die unless seek(IN, -4096, 2); } $doing_atend=1; goto read_again; } print OUT "%%Pages: 1\n" if !$had_pages; if ($bbtype ne'-') { print OUT CorrectBoundingBox $llx, $lly, $urx, $ury } else { warning "BoundingBox not found"; print OUT "%%EndComments\n" } print OUT $BBCorrected; } ### print rest of file print OUT while length($_=readIN 4096); ### close files error "running gs" unless close(IN); # ^^^ SUXX: gs always exit(0), if exists print OUT "\ngrestore\n" if $do_translate; close(OUT); if ($::opt_below and $::opt_gs) { ### ****pts**** remove incorrect /MediaBox produced by gs die unless open F, "> $OutputFilename.tgs"; # Imp: in temp dir die unless print F ' % this is temporary gs command file created by epstopdf GS_PDF_ProcSet begin pdfdict begin FN (r) file pdfopen begin % vvv keep file offsets, because `pdffindpageref` overrides it with contents /OFT Objects 0 get dup length array copy def % vvv Dat: the generation number is assumed to be 0 % vvv Dat: modifies Objects[0] 1 pdffindpageref 0 get Objects 0 OFT put readxrefentry === currentdict pdfclose end end end '; die unless close F; my $offset=`$GS -dNODISPLAY -dSAFER -dBATCH -sFN=$quote$OutputFilename$quote -q $quote$OutputFilename.tgs$quote`; chomp $offset; unlink "$OutputFilename.tgs"; die unless $offset=~/\A\d+\Z(?!\n)/; # Dat: now $offsets is a file position in $OutputFilename containing our /Page object die unless open F, "+< $OutputFilename"; die unless binmode F; die unless seek F, $offset+=0, 0; my $pageobj; die unless 32