#!/usr/bin/perl # Vpl2vpl: a program to generate accented virtual fonts for TeX # Copyright (C) 1997 John D. Smith # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. $version = 0.25; #------------------------------------------------------------------------# $description = "Syntax: vpl2vpl -d definition-file [-s shrink-factor] [-c candrabindu-adjustment] [-b] vpl-file Vpl2vpl creates new TeX virtual fonts based on existing fonts or virtual fonts (\"input fonts\"). A successful run will read a pl (Property List) or vpl (Virtual Property List) file and a definition file, and will generate a new vpl (Virtual Property List) file on standard output. The input font is assumed to adhere to the standard TeX encoding for text fonts unless it was created with either of the programs afm2pl or afm2tfm, in which case it is assumed to conform to (respectively) the Adobe Standard Encoding or the encoding specified in the file dvips.enc. In either case, the name of the input font is assumed to be the name of the input file without its .vpl or .pl extension: it must conform to normal TeX conventions for naming fonts, as vpl2vpl attempts to draw conclusions from it about the kind of font it is dealing with. A typical complete sequence of commands to create a new virtual font might therefore be tftopl cmr10.tfm cmr10.pl vpl2vpl -d ISO-Latin1.def cmr10.pl >cmr10_isol1.vpl vptovf cmr10_isol1.vpl cmr10_isol1.vf cmr10_isol1.tfm for a Computer Modern font, or afm2pl Times-Roman.afm rptmr.pl pltotf rptmr.pl rptmr.tfm vpl2vpl -d ISO-Latin1.def rptmr.pl >ptmr-isol1.vpl vptovf ptmr-isol1.vpl ptmr-isol1.vf ptmr-isol1.tfm for a PostScript font. Another approach for a PostScript font is to use afm2tfm: afm2tfm Times-Roman.afm -t dvips.enc -v ptmr rptmr vpl2vpl -d ISO-Latin1.def ptmr.vpl >ptmr-isol1.vpl vptovf ptmr-isol1.vpl ptmr-isol1.vf ptmr-isol1.tfm -- but this is now deprecated, as afm2tfm generates incorrect values for the heights of some characters, and this can lead to bad accent placing. In order to keep the whole upper half of the character set free for the requirements of the encoding specified in the definition file, certain modifications are made to input fonts following the dvips.enc encoding to bring them into greater conformity with the TeX norm. In particular, the characters dotaccent and hungarumlaut are placed in the positions assigned by TeX (\"5F, \"7D), not those enforced by dvips.enc (\"C7, \"CD). The f-ligatures, double quotes and dashes are also moved from the upper half of the character set to their normal TeX positions. As a result, the following characters are not found in the lower half of the character set: quotesingle, quotedbl, backslash, underscore, braceleft, bar, braceright. These characters can, however, be assigned positions in the output font if they are needed. (Indeed, they could all be explicitly restored to their dvips.enc positions if this were desired.) Options: -d should refer to a font definition file. This file (which could usefully be named, e.g., \"French.def\") should consist of lines of character definitions, in the form \"number\" \"character\" or \"number\" \"character\" \"accent\" Here \"number\" represents the character's position in the new encoding and may be expressed in decimal, octal or hex; \"character\" names the character (e.g. \"comma\", \"eight\", \"A\") or consists of the word \".notdef\" (indicating that the specified number's \"slot\" in the new encoding is to be empty); and \"accent\" optionally names an accent to be placed on the character. In addition to the standard accents available in PostScript fonts, \"underbar\" and \"underdot\" are also available, as are \"under\" versions of all the normal superscript accents (\"underdieresis\", \"underring\", etc.). The Indian accent \"candrabindu\" may also be specified: it is formed by overprinting a breve with a dotaccent. Finally, \"overdot\" may be used as a synonym for \"dotaccent\". If the character named in the \"accent\" position is not in fact a valid accent character, the program interprets the definition as a request for a digraph formed from the \"character\" and the \"accent\". A digraph consisting of, say, \"k\" and \"h\" will be indistinguishable from the letters \"k\" and \"h\" printed consecutively, but the digraph \"kh\" can itself receive accents like any other character: see next paragraph. A new character (such as \"amacron\" or \"kh\") may be freely used in the \"character\" position of a further definition (such as \"amacron breve\" or \"kh underbar\"). There is no constraint on the ordering of definitions within a definition file. The definition of \"a macron\" does not have to precede that of \"amacron breve\": requests for \"impossible\" characters are deferred until their constituents have had a chance to come into being. \"Slots\" for which no new definition is given retain the definition they have in the input font. The definition file may also contain blank lines and comments (introduced by \"\#\"). -s may optionally give the factor, expressed as a per-thousand value, by which normally superscript accents (such as dieresis, ring) should be shrunk when they are used as subscript accents (such as underdieresis, underring). Values of around 800 may be found useful. -c may optionally give two comma-separated numerical values to adjust the x and y coordinates of the dotaccent placed within a breve to form the candrabindu accent. A coordinate scheme using \"DESIGNUNITS R 1000\" is assumed. -b may optionally be specified to block the use of predefined accented characters, forcing vpl2vpl to define its own versions. This may be useful to secure a consistent appearance in cases where a font designer does not share vpl2vpl's views on where accents should be placed. -h prints this help. "; #------------------------------------------------------------------------# ######################## # Packages and constants ######################## # use File::Basename; use Getopt::Std; $cmdline = basename($0) . " " . join " ", @ARGV; getopts('d:s:c:bh'); if ($opt_h or !$opt_d or $#ARGV != 0) { print STDERR $description; exit 1; } $filename = $ARGV[0]; ($fontname = $filename) =~ s/\..*$//; ($encname = basename($opt_d)) =~ s/\..*$//; $vtitle = "(VTITLE Font $fontname modified for $encname encoding by vpl2vpl"; $vtitle .= " v. $version" if $version; $vtitle .= ")\n(COMMENT Command line: $cmdline)"; # # Flags for bold and small caps. These are probably a bit iffy, but # there's not much that can be done about it. # if ($fontname =~ /(^p.*b[oi]?[c]?$|^[^p].*bx[a-z]*[0-9]+$)/) { $bold = 1 } if ($fontname =~ /(^p.*c$|^[^p].*csc[a-z]*[0-9]+$)/) { $scaps = 1 } if ($opt_s) { $shrink = $opt_s / 1000 } else { $shrink = 1 } # # Array to convert from number to vpl representation # foreach $i (0 .. 255) { $nv[$i] = (chr($i) =~ /[0-9A-Za-z]/ ? "C " . chr $i : sprintf("O %lo", $i)); } # # Now the encoding vectors. # @TeXenc = ( "Gamma", "Delta", "Theta", "Lambda", "Xi", "Pi", "Sigma", "Upsilon", "Phi", "Psi", "Omega", "ff", "fi", "fl", "ffi", "ffl", "dotlessi", "dotlessj", "grave", "acute", "caron", "breve", "macron", "ring", "cedilla", "germandbls", "ae", "oe", "oslash", "AE", "OE", "Oslash", "space", "exclam", "quotedblright", "numbersign", "dollar", "percent", "ampersand", "quoteright", "parenleft", "parenright", "asterisk", "plus", "comma", "hyphen", "period", "slash", "zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "colon", "semicolon", "exclamdown", "equal", "questiondown", "question", "at", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "bracketleft", "quotedblleft", "bracketright", "circumflex", "dotaccent", "quoteleft", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "endash", "emdash", "hungarumlaut", "tilde", "dieresis" ); @dvipsenc = ( ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", "quotesingle", "exclamdown", "questiondown", "dotlessi", "dotlessj", "grave", "acute", "caron", "breve", "macron", "ring", "cedilla", "germandbls", "ae", "oe", "oslash", "AE", "OE", "Oslash", "space", "exclam", "quotedbl", "numbersign", "dollar", "percent", "ampersand", "quoteright", "parenleft", "parenright", "asterisk", "plus", "comma", "hyphen", "period", "slash", "zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "colon", "semicolon", "less", "equal", "greater", "question", "at", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "bracketleft", "backslash", "bracketright", "circumflex", "underscore", "quoteleft", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "braceleft", "bar", "braceright", "tilde", "dieresis", "asciicircum", "asciitilde", "Ccedilla", "Iacute", "Icircumflex", "atilde", "edieresis", "egrave", "scaron", "zcaron", "Eth", "ff", "ffi", "ffl", ".notdef", ".notdef", ".notdef", ".notdef", "Scaron", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", "Ydieresis", ".notdef", "Zcaron", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", "cent", "sterling", "fraction", "yen", "florin", "section", "currency", "copyright", "quotedblleft", "guillemotleft", "guilsinglleft", "guilsinglright", "fi", "fl", "degree", "endash", "dagger", "daggerdbl", "periodcentered", ".notdef", "paragraph", "bullet", "quotesinglbase", "quotedblbase", "quotedblright", "guillemotright", "ellipsis", "perthousand", ".notdef", ".notdef", "Agrave", "Aacute", "Acircumflex", "Atilde", "Adieresis", "Aring", ".notdef", "dotaccent", "Egrave", "Eacute", "Ecircumflex", "Edieresis", "Igrave", "hungarumlaut", "ogonek", "Idieresis", "emdash", "Ntilde", "Ograve", "Oacute", "Ocircumflex", "Otilde", "Odieresis", ".notdef", ".notdef", "Ugrave", "Uacute", "Ucircumflex", "Udieresis", "Yacute", "Thorn", ".notdef", "agrave", "aacute", "acircumflex", "ordfeminine", "adieresis", "aring", ".notdef", "ccedilla", "Lslash", "eacute", "ecircumflex", "ordmasculine", "igrave", "iacute", "icircumflex", "idieresis", ".notdef", "ntilde", "ograve", "oacute", "ocircumflex", "otilde", "odieresis", ".notdef", "lslash", "ugrave", "uacute", "ucircumflex", "udieresis", "yacute", "thorn", "ydieresis" ); @adobeenc=( ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", "space", "exclam", "quotedbl", "numbersign", "dollar", "percent", "ampersand", "quoteright", "parenleft", "parenright", "asterisk", "plus", "comma", "hyphen", "period", "slash", "zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "colon", "semicolon", "less", "equal", "greater", "question", "at", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "bracketleft", "backslash", "bracketright", "asciicircum", "underscore", "quoteleft", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "braceleft", "bar", "braceright", "asciitilde", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", "exclamdown", "cent", "sterling", "fraction", "yen", "florin", "section", "currency", "quotesingle", "quotedblleft", "guillemotleft", "guilsinglleft", "guilsinglright", "fi", "fl", ".notdef", "endash", "dagger", "daggerdbl", "periodcentered", ".notdef", "paragraph", "bullet", "quotesinglbase", "quotedblbase", "quotedblright", "guillemotright", "ellipsis", "perthousand", ".notdef", "questiondown", ".notdef", "grave", "acute", "circumflex", "tilde", "macron", "breve", "dotaccent", "dieresis", ".notdef", "ring", "cedilla", ".notdef", "hungarumlaut", "ogonek", "caron", "emdash", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", "AE", ".notdef", "ordfeminine", ".notdef", ".notdef", ".notdef", ".notdef", "Lslash", "Oslash", "OE", "ordmasculine", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", "ae", ".notdef", ".notdef", ".notdef", "dotlessi", ".notdef", ".notdef", "lslash", "oslash", "oe", "germandbls", ".notdef", ".notdef", ".notdef", ".notdef" ); ############### # Read DEF file ############### # open DEF, $opt_d or die "Cannot open $opt_d: $!\n"; while () { next if (/^\s*$/ || /^\#/); s/\s*(\#.*)?$//; push @deflines, $_; } close DEF; ############### # Read VPL file ############### # # File header # $vplhead = <> or exit 1; unless ($vplhead =~ /^\((VTITLE|FAMILY) /) { die "$filename is not a vpl file: giving up\n" } do { $_ = <>; $vplhead .= $_; } until ($_ =~ /^\(LIGTABLE$/ or eof); if (eof) { die "$filename does not seem to be a text font (no LIGTABLE): giving up\n"; } if ($vplhead =~ s/\A\(VTITLE(.*)$/$vtitle\n(COMMENT Old vtitle:$1/m) { $vplhead =~ s/\n\(COMMENT Please edit that VTITLE .*\)$//m; @enc = @dvipsenc; $dvips = 1; } elsif ($vplhead =~ /\A\(FAMILY.*\)\n\(CODINGSCHEME ADOBESTANDARDENCODING\)$/m) { $vplhead =~ s/\A/$vtitle\n/m; @enc = @adobeenc; $dvips = 0; } else { $vplhead =~ s/\A/$vtitle\n/m; @enc = @TeXenc; $dvips = 0; } if ($vplhead =~ /^\(CODINGSCHEME TEX MATH SYMBOLS/m) { die "$filename is a TeX math font: giving up\n"; } unless ($vplhead =~ s/^(\(CODINGSCHEME .*\+\s?)(\S+)\)$/$1$encname)/m) { $vplhead =~ s/^(\(CODINGSCHEME .*)\)$/$1 + $encname)/m; } if ($vplhead =~ /^\(DESIGNUNITS R (.+)\)/m) { $scale = $1 } else {$scale = 1 } if ($vplhead =~ /^ \(SLANT R (.+)\)/m) { $slant = $1 } if ($vplhead =~ /^ \(XHEIGHT [DR] (.+)\)/m) { $xheight = $1 } unless ($vplhead =~ /^\(MAPFONT /m) { if ($vplhead =~ /^\(DESIGNSIZE R (.*)\)$/m) { $dsize = $1 } $mapfont = "\n(MAPFONT D 0\n (FONTNAME $fontname)\n (FONTDSIZE R $dsize)\n )"; $vplhead =~ s/\n\(LIGTABLE\Z/$mapfont$&/m; } if ($opt_s) { $vplhead =~ s[^(\(MAPFONT D )0(.*?)( \))] [$&\n${1}1$2 (FONTAT R ${ \($shrink * $scale) })\n$3]ms; } # # Ligatures and kerns # do { $_ = <>; s/ \(comment .*$//i; $ligs .= $_; } until $_ =~ /^ \)/; # # Now build a hash to convert from vpl representation to char name # and use it to make ligtable readable # foreach $i (0 .. 255) { $vc{$nv[$i]} = $enc[$i] } $ligs =~ s/^( \((?:LABEL|KRN) )(\S+ \S+)(.*\))$/$1$vc{$2}$3/gm; $ligs =~ s/^( \(LIG )(\S+ \S+) (\S+ \S+)\)$/$1$vc{$2} $vc{$3})/gm; # # Character definitions: store "encoded" defs in @chars, store *all* # defs in %allchars # $_ = <>; do { if (/^\(CHARACTER/) { $character = $_; do { $_ = <>; $character .= $_; } until $_ =~ /^ \)/; storeinfo($character); $_ = <>; } } until eof; foreach $i (0 .. $#chars) { if ($chars[$i] and $enc[$i] ne ".notdef") { $allchars{$enc[$i]} = $chars[$i]; } } ################## # Set up constants ################## $subacc = "(cedilla|ogonek|commaaccent)"; $supacc = "(grave|acute|circumflex|tilde|macron|breve|dotaccent|overdot|dieresis|ring|hungarumlaut|caron|candrabindu)"; $underacc = "(underdot|under$supacc)"; $accents = "($subacc|$supacc|$underacc|underbar)"; $underadp = 0.230; # depth of "under" accs $underddp = 0.213; # depth of underdot if ($bold) { $thk = 0.072 } else { $thk = 0.052 } # thickness and $underbdp = 0.082 + $thk; # depth of underbar $capheight = $allchars{"X"}{ht}; $accheight = $allchars{"macron"}{ht}; $accdepth = $accheight - $thk * $scale; # probable approx. "depth" of macron $v1 = $accheight - $xheight; # vertical offset for double accents $v2 = $capheight - $xheight; # vertical offset for accented caps etc if ($scaps) { # accented small caps $scoffset = $allchars{"x"}{ht} - $xheight; $v1 += $scoffset; } if ($opt_c) { # candrabindu ($cbx, $cby) = $opt_c =~ /^(.*),(.*)$/; $cbx += ($allchars{"breve"}{wd} - $allchars{"dotaccent"}{wd}) / 2; $cbx /= 1000; $cby /= 1000; } ###################### # Build the characters ###################### # # First normalise dvips.enc encoding quirks # if ($dvips) { chmove("fi", 014); chmove("fl", 015); chmove("quotedblright", 042); chmove("quotedblleft", 0134); chmove("dotaccent", 0137); chmove("endash", 0173); chmove("emdash", 0174); chmove("hungarumlaut", 0175); } # # Now build a list of definitions supplied by user # for (@deflines) { if (/^\s*(\d+|0[0-7]+|0x[0-9a-fA-F]+)\s+([a-zA-Z]+?|\.notdef)(?:\s+([a-zA-Z]+))?$/) { ($num, $char, $acc) = ($1, $2, $3); $num = oct $num if $num =~ /^0/; if ($num > 255) { die "Bad definition (number out of range): $_\n" } $def = {}; $def->{qdef} = $_; $def->{num} = $num; $def->{char} = $char; $def->{acc} = $acc; $def->{nchar} = $char . $acc; push @nchars, $def->{nchar}; push @defs, $def; } else { die "Bad definition: $_\n" } } # # Work through the list # while (@defs) { $def = shift @defs; $qdef = $def->{qdef}; $num = $def->{num}; $char = $def->{char}; $acc = $def->{acc}; $nchar = $def->{nchar}; # # If we can't handle $char/$acc yet, but believe we will be able # to later, send the definition to the back of the queue. In case # it later turns out we were wrong, allow only five loops before # giving up. # if (!$allchars{$char} and $char ne ".notdef") { if (grep /^$char$/, @nchars) { unless (++$def->{requeue} > 5) { push @defs, $def; next; } } else { die "Bad definition (no such character): $qdef\n" } } if ($acc and !$allchars{$acc} and $acc !~ /^$accents$/) { if (grep /^$acc$/, @nchars) { unless (++$def->{requeue} > 5) { push @defs, $def; next; } } else { die "Bad definition (no such accent): $qdef\n" } } # # Remove any existing claims on $num # @{ $allchars{$chars[$num]{id}}{num} } = grep !/$num/, @{ $allchars{$chars[$num]{id}}{num} }; # # First deal with .notdef # if ($nchar eq ".notdef") { undef $chars[$num]; } # # Next look among existing chars (unless blocked by -b) # elsif (!($acc and $opt_b) and $allchars{$nchar}) { push( @{ $allchars{$nchar}{num} }, $num); $chars[$num] = $allchars{$nchar}; } # # If it can't be built from sub-elements, issue a warning and move on # elsif (!$acc) { warn "No such character - ignoring definition: $qdef\n"; undef $chars[$num]; } # # Now build the char # else { # # First get rid of predefined/duplicated ligtable statements # and character definitions; also synonyms # $ligs =~ s/\n \((LABEL|KRN|LIG) ($nchar .*|.*$nchar)\)$//gm; $allchars{$nchar} = (); if ($acc eq "overdot") { $nchar2 = $char . "dotaccent"; $ligs =~ s/\n \((LABEL|KRN|LIG) ($nchar2 .*|.*$nchar2)\)$//gm; delete $allchars{$nchar2}; } # # Go! # if ($acc =~ /^$subacc$/) { subacc($num, $char, $acc, $nchar); fixkerns($char, $acc); } elsif ($acc =~ /^$supacc$/) { supacc($num, $char, $acc, $nchar); fixkerns($char, $acc); } elsif ($acc =~ /^$underacc$/) { underacc($num, $char, $acc, $nchar); fixkerns($char, $acc); } elsif ($acc =~ /^underbar$/) { underb($num, $char, $nchar); fixkerns($char, $acc); } else { digraph($num, $char, $acc, $nchar); fixkerns($char, $acc); } } } ################### # Sort out ligtable ################### # # Convert to vpl representation, eliminating statements invoking # "unencoded" characters # @liglist = split /\n/, $ligs; $ligs = ""; foreach (@liglist) { if (/^( \(LIG \S+ )(\S+)\)$/) { if ($n = ${ $allchars{$2}{num} }[0]) { s/^( \(LIG \S+ )(\S+)\)$/$1$nv[$n])/; } else { next } } if (/^( \((?:LABEL|LIG|KRN) )([^ )]+)(.*)$/) { ($one, $two, $three) = ($1, $2, $3); foreach $n (@{ $allchars{$two}{num} }) { $ligs .= "$one$nv[$n]$three\n"; } } else { $ligs .= "$_\n" } } # # Eliminate sequences orphaned by elimination of a LABEL # @liglist = split / \(STOP\)\n/, $ligs; $ligs = ""; foreach (@liglist) { if (/^ \(LABEL /m) { $ligs .= "$_ (STOP)\n" } elsif (/^ \)$/m) { $ligs .= $_ } } # # Eliminate empty statements # $ligs =~ s/(^ \(LABEL .*\)\n)+ \(STOP\)\n//gm; #################### # Output the results #################### # print $vplhead, $ligs; foreach $i (0 .. 255) { if (defined $chars[$i]{id}) { printchar($i) } } ##################### # End of main program ##################### sub storeinfo { # # Extract info from a character definition and store it in @chars # my $char = shift; my $num; if ($char =~ /\A\(CHARACTER O ([0-7]+)/m) { $num = oct $1 } elsif ($char =~ /\A\(CHARACTER C (.)/m) { $num = ord $1 } $chars[$num]{id} = $enc[$num]; push( @{ $chars[$num]{num} }, $num); if ($char =~ /^ \(CHARWD R (.*?)\)$/m) { $chars[$num]{wd} = $1 } if ($char =~ /^ \(CHARHT R (.*?)\)$/m) { $chars[$num]{ht} = $1 } if ($char =~ /^ \(CHARDP R (.*?)\)$/m) { $chars[$num]{dp} = $1 } if ($char =~ /^ \(CHARIC R (.*?)\)$/m) { $chars[$num]{ic} = $1 } if ($char =~ /^ \(MAP\n((.|\n)*)^ \)/m) { $chars[$num]{map} = $1 } else { $chars[$num]{map} = " (SETCHAR $nv[$num])\n" } } sub printchar { # # Extract info from @chars and build it into a character definition # my $num = shift; print "(CHARACTER "; if (chr($num) =~ /[0-9A-Za-z]/) { print "C " . chr($num) } else { printf "O %lo", $num; print " (COMMENT " . $chars[$num]{id} . ")"; } print "\n"; print " (CHARWD R " . $chars[$num]{wd} . ")\n" if $chars[$num]{wd}; print " (CHARHT R " . $chars[$num]{ht} . ")\n" if $chars[$num]{ht}; print " (CHARDP R " . $chars[$num]{dp} . ")\n" if $chars[$num]{dp}; print " (CHARIC R " . $chars[$num]{ic} . ")\n" if $chars[$num]{ic}; print " (MAP\n"; print $chars[$num]{map}; print " )\n"; print " )\n"; } sub chmove { # # Move a character # my ($char, $num) = @_; my $i; foreach $i (@{ $allchars{$char}{num} }) { undef $chars[$i] } @{ $allchars{$char}{num} } = (); @{ $allchars{$chars[$num]{id}}{num} } = grep !/$num/, @{ $allchars{$chars[$num]{id}}{num} }; push( @{ $allchars{$char}{num} }, $num); $chars[$num] = $allchars{$char}; } sub max { # # Return greater of two values # my ($a, $b) = @_; return $a > $b ? $a : $b; } sub subacc { # # Subscript accents # my ($num, $char, $acc, $id) = @_; my ($h, $s1, $s2, $s3); $allchars{$id}{wd} = $allchars{$char}{wd}; $allchars{$id}{ht} = $allchars{$char}{ht}; $allchars{$id}{dp} = $allchars{$acc}{dp}; $allchars{$id}{ic} = $allchars{$char}{ic}; $allchars{$id}{id} = $id; push( @{ $allchars{$id}{num} }, $num); $s1 = $allchars{$char}{map}; $s1 =~ s/\A (.*)\n\Z/(PUSH) $1 (POP)/s; $h = sprintf("%.3f", ($allchars{$char}{wd} - $allchars{$acc}{wd}) / 2); if ($h > 0) { $s2 = " (MOVERIGHT R $h) " } elsif ($h < 0) { $h = -$h; $s2 = " (MOVELEFT R $h) "; } else { $s2 = " " } $s3 = $allchars{$acc}{map}; $s3 =~ s/\A.*(\(SETCHAR . .+?\)).*\Z/$1/s; $allchars{$id}{map} = " $s1\n$s2$s3\n"; $chars[$num] = $allchars{$id}; } sub supacc { # # Superscript accents # my ($num, $char, $acc, $id) = @_; my ($cb, $h, $hadj, $tallchar, $ic, $s1, $s2, $s3); if ($char eq "i" and $allchars{"dotlessi"}) { $char = "dotlessi" } if ($char eq "j" and $allchars{"dotlessj"}) { $char = "dotlessj" } if ($acc eq "overdot") { $acc = "dotaccent" } if ($acc eq "candrabindu") { $acc = "breve"; ($cb = $allchars{"dotaccent"}{map}) =~ s/\A.*(\(SETCHAR . .+?\)).*\Z/$1/s; } $allchars{$id}{wd} = $allchars{$char}{wd}; $allchars{$id}{ht} = $allchars{$acc}{ht}; $allchars{$id}{dp} = $allchars{$char}{dp}; $allchars{$id}{id} = $id; push( @{ $allchars{$id}{num} }, $num); $s1 = $allchars{$char}{map}; $s1 =~ s/\A (.*)\n\Z/(PUSH) $1 (POP)/s; if ($scaps and $char =~ /^[a-z]/) { # accented small caps $tallchar = 1; if ($char =~ /$supacc$/ and $char !~ /under$supacc$/) { # double accs $s2 = " (MOVEUP R $v1)"; $allchars{$id}{ht} += $v1; $hadj = sprintf("%.3f", $v1 * $slant); $ic = $allchars{$char}{ic}; } else { # single accs $s2 = " (MOVEUP R $scoffset)"; $allchars{$id}{ht} += $scoffset; $hadj = sprintf("%.3f", $scoffset * $slant); $ic = $allchars{$acc}{ic} + $hadj; } } elsif ($allchars{$char}{ht} >= ($accheight + $v2)) { # double accs $tallchar = 1; # on caps etc. $s2 = " (MOVEUP R ${ \($v1 + $v2) })"; $allchars{$id}{ht} += ($v1 + $v2); $hadj = sprintf("%.3f", ($v1 + $v2) * $slant); $ic = $allchars{$char}{ic}; } elsif ($allchars{$char}{ht} > 1.15 * $xheight) { $tallchar = 1; if ($char =~ /$supacc$/ and $char !~ /under$supacc$/) { # double accs $s2 = " (MOVEUP R $v1)"; $allchars{$id}{ht} += $v1; $hadj = sprintf("%.3f", $v1 * $slant); $ic = $allchars{$char}{ic}; } else { # caps etc. $s2 = " (MOVEUP R $v2)"; $allchars{$id}{ht} += $v2; $hadj = sprintf("%.3f", $v2 * $slant); $ic = $allchars{$char}{ic}; } } else { # single accs $s2 = " "; $ic = $allchars{$acc}{ic}; } $h = sprintf("%.3f", ($allchars{$char}{wd} - $allchars{$acc}{wd}) / 2); unless ($tallchar) { $ic -= $h } $allchars{$id}{ic} = $ic unless $ic < 0; $h += $hadj; if ($h > 0) { $s2 .= " (MOVERIGHT R $h) " } elsif ($h < 0) { $h = -$h; $s2 .= " (MOVELEFT R $h) "; } else { $s2 .= " " } $s3 = $allchars{$acc}{map}; $s3 =~ s/\A.*(\(SETCHAR . .+?\)).*\Z/$1/s; if ($cb) { # candrabindu $cb = $s2 . $cb; if ($cbx) { unless (($cb =~ s/(MOVERIGHT R )([0-9.]+)/$1 . ($2 + $cbx * $scale)/e) or ($cb =~ s/(MOVELEFT R )([0-9.]+)/$1 . ($2 - $cbx * $scale)/e)) { $cb =~ s/^( +)/"$1(MOVERIGHT R " . ($cbx * $scale) . ") "/e; } } if ($cby) { unless ($cb =~ s/(MOVEUP R )([0-9.]+)/$1 . ($2 + $cby * $scale)/e) { $cb =~ s/^( +)/"$1(MOVEUP R " . ($cby * $scale) . ") "/e; } } $cb .= "\n"; $s1 = "(PUSH) " . $s1; $s3 .= " (POP)"; } $allchars{$id}{map} = " $s1\n$s2$s3\n$cb"; $chars[$num] = $allchars{$id}; } sub underacc { # # Dropped accents # my ($num, $char, $acc, $id) = @_; my ($h, $v, $s1, $s2, $s3); $acc =~ s/^under//; if ($acc eq "dot") { $acc = "period" } if ($acc eq "candrabindu") { die "Bad definition (no such accent): $qdef\n" } $allchars{$id}{wd} = $allchars{$char}{wd}; $allchars{$id}{ht} = $allchars{$char}{ht}; $allchars{$id}{ic} = $allchars{$char}{ic}; $allchars{$id}{id} = $id; push( @{ $allchars{$id}{num} }, $num); if ($acc =~ /^$supacc$/) { $v = $allchars{$id}{dp} = $underadp * $scale * $shrink; $v += ($accdepth * $shrink); } else { $v = $allchars{$id}{dp} = $underddp * $scale + $allchars{$acc}{dp}; } $s1 = $allchars{$char}{map}; $s1 =~ s/\A (.*)\n\Z/(PUSH) $1 (POP)/s; if ($acc =~ /^$supacc$/) { $h = ($allchars{$char}{wd} - ($allchars{$acc}{wd} * $shrink)) / 2 - $v * $slant; if ($opt_s) { $s2 = " (SELECTFONT D 1)\n" } } else { $h = ($allchars{$char}{wd} - $allchars{$acc}{wd}) / 2 - $v * $slant; } $h = sprintf("%.3f", $h); if ($h > 0) { $s2 .= " (MOVEDOWN R $v) (MOVERIGHT R $h) "; } elsif ($h < 0) { $h = -$h; $s2 .= " (MOVEDOWN R $v) (MOVELEFT R $h) "; } else { $s2 = " (MOVEDOWN R $v) " } $s3 = $allchars{$acc}{map}; $s3 =~ s/\A.*(\(SETCHAR . .+?\)).*\Z/$1/s; if ($opt_s and $acc =~ /^$supacc$/) { $s3 .= "\n (SELECTFONT D 0)" } $allchars{$id}{map} = " $s1\n$s2$s3\n"; $chars[$num] = $allchars{$id}; } sub underb { # # Underbar # my ($num, $char, $id) = @_; my ($h, $w, $dp, $s1, $s2, $s3); $allchars{$id}{wd} = $allchars{$char}{wd}; $allchars{$id}{ht} = $allchars{$char}{ht}; $allchars{$id}{dp} = $dp = $underbdp * $scale; $allchars{$id}{ic} = $allchars{$char}{ic}; $allchars{$id}{id} = $id; push( @{ $allchars{$id}{num} }, $num); $s1 = $allchars{$char}{map}; $s1 =~ s/\A (.*)\n\Z/(PUSH) $1 (POP)/s; $h = sprintf("%.3f", ($allchars{$id}{wd} / 10 - $dp * $slant)); $w = sprintf("%.3f", ($allchars{$id}{wd} * 8 / 10)); if ($h > 0) { $s2 = " (MOVEDOWN R $dp) (MOVERIGHT R $h) "; } elsif ($h < 0) { $h = -$h; $s2 = " (MOVEDOWN R $dp) (MOVELEFT R $h) "; } else { $s2 .= " (MOVEDOWN R $dp) " } $s3 = "(SETRULE R ${ \($thk * $scale) } R $w)"; $allchars{$id}{map} = " $s1\n$s2$s3\n"; $chars[$num] = $allchars{$id}; } sub digraph { # # Make a new character consisting of two existing characters # my ($num, $char, $acc, $id) = @_; my ($one, $two, $kern, $s1, $s2); if ($ligs =~ /^ \(LABEL $char\)\n(.*?)\n \(KRN $acc R (-?[0-9.]+)\)\n/ms) { ($one, $two) = ($1, $2); } $kern = $two if $one !~ /^ \(STOP\)$/m; $allchars{$id}{wd} = $allchars{$char}{wd} + $allchars{$acc}{wd}; $allchars{$id}{wd} += $kern; $allchars{$id}{ht} = max($allchars{$char}{ht}, $allchars{$acc}{ht}); $allchars{$id}{dp} = max($allchars{$char}{dp}, $allchars{$acc}{dp}); $allchars{$id}{ic} = $allchars{$acc}{ic}; $allchars{$id}{id} = $id; push( @{ $allchars{$id}{num} }, $num); $s1 = $allchars{$char}{map}; chomp ($s2 = $allchars{$acc}{map}); $s1 =~ s/(\(SETCHAR .*?\))/$1\n$s2/; if ($kern) { if ($kern < 0) { $kern = -$kern; $s1 =~ s/(\(SETCHAR .*?\))/$1 (MOVELEFT R $kern)/; } else { $s1 =~ s/(\(SETCHAR .*?\))/$1 (MOVERIGHT R $kern)/ } } $allchars{$id}{map} = $s1; $chars[$num] = $allchars{$id}; } sub fixkerns { # # Generalise the kerning info contained in the vpl file by applying # it to new accented chars. Do not kern lower-case chars bearing # superscript accents with capitals, quotes or a preceding "f". # my ($char, $acc) = @_; my ($olabel, $nlabel, @liglist, $lchar, $rchar); if ($acc =~ /^$accents$/) { $lchar = $rchar = $char } else { $lchar = $char; $rchar = $acc; } unless ($char =~ /^[a-z]/ and $acc =~ /^$supacc$/) { $ligs =~ s[(\n \(LABEL )$rchar\)(?!\n \(LIG.*$)] [$&$1$char$acc)]gm unless $ligs =~ /\n \(LABEL $char$acc\)$/m; $ligs =~ s[(\n \(LABEL )$rchar\)(\n \(LIG.*$)+(?!\n \(STOP\))] [$&$1$char$acc)]gm unless $ligs =~ /\n \(LABEL $char$acc\)$/m; $ligs =~ s[(\n \(KRN )$lchar( .*)$] [$&$1$char$acc$2]gm; } else { if ($ligs =~ /\n \(LABEL $char\).*?\(STOP\)/s) { $nlabel = $olabel = $&; $nlabel =~ s/(\n \(LABEL $char)\)/$1$acc)/ unless $ligs =~ /\n \(LABEL $char$acc\)/m; $nlabel =~ s/\n \(LIG .*\)$//gm; $nlabel =~ s/\n \(LABEL (?!$char$acc).*\)$//gm; $nlabel =~ s/\n \(KRN ([A-Z]|quote).*\)$//gm; $ligs =~ s/(\n \(LABEL )$char\).*?\(STOP\)/$olabel$nlabel/s; } @liglist = split /\n \(STOP\)/, $ligs; foreach (@liglist) { unless (/\n \(LABEL ([A-Zf]|quote).*\)$/m) { s/(\n \(KRN $char)( .*\))$/$&$1$acc$2/gm; } } $ligs = join("\n (STOP)", @liglist); } }