#!/usr/bin/perl # Vpl2ovp: a program to generate accented virtual fonts for Omega # Copyright (C) 2008 John D. Smith # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. $version = 0.25; #------------------------------------------------------------------------# $description = "Syntax: vpl2ovp -d definition-file [-s shrink-factor] [-c candrabindu-adjustment] [-b] vpl-file Vpl2ovp creates new Omega virtual fonts based on existing TeX fonts or virtual fonts (\"input fonts\"). A successful run will read a pl (Property List) or vpl (Virtual Property List) file and a definition file, and will generate a new ovp (Omega Virtual Property List) file on standard output. The input font is assumed to adhere to the standard TeX encoding for text fonts unless it was created with either of the programs afm2pl or afm2tfm, in which case it is assumed to conform to (respectively) the Adobe Standard Encoding or the encoding specified in the file dvips.enc. In either case, the name of the input font is assumed to be the name of the input file without its .vpl or .pl extension: it must conform to normal TeX conventions for naming fonts, as vpl2ovp attempts to draw conclusions from it about the kind of font it is dealing with. A typical complete sequence of commands to create a new virtual font might therefore be tftopl cmr10.tfm cmr10.pl vpl2ovp -d Unicode1.def cmr10.pl >cmr10-uni1.ovp ovp2ovf cmr10-uni1.ovp cmr10-uni1.ovf cmr10-uni1.ofm for a Computer Modern font, or afm2pl Times-Roman.afm rptmr.pl pltotf rptmr.pl rptmr.tfm vpl2ovp -d Unicode1.def rptmr.pl >ptmr-uni1.ovp ovp2ovf ptmr-uni1.ovp ptmr-uni1.ovf ptmr-uni1.ofm for a PostScript font. Another approach for a PostScript font is to use afm2tfm: afm2tfm Times-Roman.afm -t dvips.enc -v ptmr rptmr vpl2ovp -d Unicode1.def ptmr.vpl >ptmr-uni1.ovp ovp2ovf ptmr-uni1.ovp ptmr-uni1.ovf ptmr-uni1.ofm -- but this is now deprecated, as afm2tfm generates incorrect values for the heights of some characters, and this can lead to bad accent placing. In order to keep the whole of the character range \"F0-\"FF free for the requirements of the encoding specified in the definition file, certain modifications are made to input fonts following the dvips.enc encoding to bring them into greater conformity with the TeX norm. In particular, the characters dotaccent and hungarumlaut are placed in the positions assigned by TeX (\"5F, \"7D), not those enforced by dvips.enc (\"C7, \"CD). The f-ligatures, double quotes and dashes are also moved from the upper half of the original 8-bit character set to their normal TeX positions. As a result, the following characters are not found in the lower half of the character set: quotesingle, quotedbl, backslash, underscore, braceleft, bar, braceright. These characters can, however, be assigned positions in the output font if they are needed. (Indeed, they could all be explicitly restored to their dvips.enc positions if this were desired.) Options: -d should refer to a font definition file. This file (which could usefully be named, e.g., \"Unicode1.def\") should consist of lines of character definitions, in the form \"number\" \"character\" or \"number\" \"character\" \"accent\" Here \"number\" represents the character's position in the new encoding and may be expressed in decimal, octal or hex; \"character\" names the character (e.g. \"comma\", \"eight\", \"A\") or consists of the word \".notdef\" (indicating that the specified number's \"slot\" in the new encoding is to be empty); and \"accent\" optionally names an accent to be placed on the character. In addition to the standard accents available in PostScript fonts, \"underbar\" and \"underdot\" are also available, as are \"under\" versions of all the normal superscript accents (\"underdieresis\", \"underring\", etc.). The Indian accent \"candrabindu\" may also be specified: it is formed by overprinting a breve with a dotaccent. Finally, \"overdot\" may be used as a synonym for \"dotaccent\". Note that all accents used in defining accented characters must themselves be defined in the .def file. Those which exist in the source font should simply be referenced by name in their appropriate Unicode position (e.g. \"0x0304 macron\"); those which do not should be defined as the character \"space\" followed by the name of the accent (e.g. \"0x0310 space candrabindu\"). If the character named in the \"accent\" position is not in fact a valid accent character, the program interprets the definition as a request for a digraph formed from the \"character\" and the \"accent\". A digraph consisting of, say, \"k\" and \"h\" will be indistinguishable from the letters \"k\" and \"h\" printed consecutively, but the digraph \"kh\" can itself receive accents like any other character: see next paragraph. A new character (such as \"amacron\" or \"kh\") may be freely used in the \"character\" position of a further definition (such as \"amacron breve\" or \"kh underbar\"). There is no constraint on the ordering of definitions within a definition file. The definition of \"a macron\" does not have to precede that of \"amacron breve\": requests for \"impossible\" characters are deferred until their constituents have had a chance to come into being. \"Slots\" for which no new definition is given retain the definition they have in the input font. The definition file may also contain blank lines and comments (introduced by \"\#\"). -s may optionally give the factor, expressed as a per-thousand value, by which normally superscript accents (such as dieresis, ring) should be shrunk when they are used as subscript accents (such as underdieresis, underring). Values of around 800 may be found useful. -c may optionally give two comma-separated numerical values to adjust the x and y coordinates of the dotaccent placed within a breve to form the candrabindu accent. A coordinate scheme using \"DESIGNUNITS R 1000\" is assumed. -b may optionally be specified to block the use of predefined accented characters, forcing vpl2ovp to define its own versions. This may be useful to secure a consistent appearance in cases where a font designer does not share vpl2ovp's views on where accents should be placed. -h prints this help. "; #------------------------------------------------------------------------# ######################## # Packages and constants ######################## # use File::Basename; use Getopt::Std; $cmdline = basename($0) . " " . join " ", @ARGV; getopts('d:s:c:bh'); if ($opt_h or !$opt_d or $#ARGV != 0) { print STDERR $description; exit 1; } $filename = $ARGV[0]; $dummycodepoint = 0xF8FF; ($fontname = $filename) =~ s/\..*$//; ($encname = basename($opt_d)) =~ s/\..*$//; $vtitle = "(VTITLE Font $fontname modified for $encname encoding by vpl2ovp"; $vtitle .= " v. $version" if $version; $vtitle .= ")\n(COMMENT Command line: $cmdline)"; $vtitle .= "\n(OFMLEVEL H 0)"; # # Flags for bold and small caps. These are probably a bit iffy, but # there's not much that can be done about it. # if ($fontname =~ /(^p.*b[oi]?[c]?$|^[^p].*bx[a-z]*[0-9]+$)/) { $bold = 1 } if ($fontname =~ /(^p.*c$|^[^p].*csc[a-z]*[0-9]+$)/) { $scaps = 1 } if ($opt_s) { $shrink = $opt_s / 1000 } else { $shrink = 1 } # # Array to convert from number to vpl representation # foreach $i (0 .. 0xFFFF) { $nv[$i] = (chr($i) =~ /[0-9A-Za-z]/ ? "C " . chr $i : sprintf("H %04lX", $i)); } # # Now the encoding vectors. # @TeXenc = ( "Gamma", "Delta", "Theta", "Lambda", "Xi", "Pi", "Sigma", "Upsilon", "Phi", "Psi", "Omega", "ff", "fi", "fl", "ffi", "ffl", "dotlessi", "dotlessj", "grave", "acute", "caron", "breve", "macron", "ring", "cedilla", "germandbls", "ae", "oe", "oslash", "AE", "OE", "Oslash", "space", "exclam", "quotedblright", "numbersign", "dollar", "percent", "ampersand", "quoteright", "parenleft", "parenright", "asterisk", "plus", "comma", "hyphen", "period", "slash", "zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "colon", "semicolon", "exclamdown", "equal", "questiondown", "question", "at", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "bracketleft", "quotedblleft", "bracketright", "circumflex", "dotaccent", "quoteleft", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "endash", "emdash", "hungarumlaut", "tilde", "dieresis" ); @dvipsenc = ( ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", "quotesingle", "exclamdown", "questiondown", "dotlessi", "dotlessj", "grave", "acute", "caron", "breve", "macron", "ring", "cedilla", "germandbls", "ae", "oe", "oslash", "AE", "OE", "Oslash", "space", "exclam", "quotedbl", "numbersign", "dollar", "percent", "ampersand", "quoteright", "parenleft", "parenright", "asterisk", "plus", "comma", "hyphen", "period", "slash", "zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "colon", "semicolon", "less", "equal", "greater", "question", "at", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "bracketleft", "backslash", "bracketright", "circumflex", "underscore", "quoteleft", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "braceleft", "bar", "braceright", "tilde", "dieresis", "asciicircum", "asciitilde", "Ccedilla", "Iacute", "Icircumflex", "atilde", "edieresis", "egrave", "scaron", "zcaron", "Eth", "ff", "ffi", "ffl", ".notdef", ".notdef", ".notdef", ".notdef", "Scaron", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", "Ydieresis", ".notdef", "Zcaron", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", "cent", "sterling", "fraction", "yen", "florin", "section", "currency", "copyright", "quotedblleft", "guillemotleft", "guilsinglleft", "guilsinglright", "fi", "fl", "degree", "endash", "dagger", "daggerdbl", "periodcentered", ".notdef", "paragraph", "bullet", "quotesinglbase", "quotedblbase", "quotedblright", "guillemotright", "ellipsis", "perthousand", ".notdef", ".notdef", "Agrave", "Aacute", "Acircumflex", "Atilde", "Adieresis", "Aring", ".notdef", "dotaccent", "Egrave", "Eacute", "Ecircumflex", "Edieresis", "Igrave", "hungarumlaut", "ogonek", "Idieresis", "emdash", "Ntilde", "Ograve", "Oacute", "Ocircumflex", "Otilde", "Odieresis", ".notdef", ".notdef", "Ugrave", "Uacute", "Ucircumflex", "Udieresis", "Yacute", "Thorn", ".notdef", "agrave", "aacute", "acircumflex", "ordfeminine", "adieresis", "aring", ".notdef", "ccedilla", "Lslash", "eacute", "ecircumflex", "ordmasculine", "igrave", "iacute", "icircumflex", "idieresis", ".notdef", "ntilde", "ograve", "oacute", "ocircumflex", "otilde", "odieresis", ".notdef", "lslash", "ugrave", "uacute", "ucircumflex", "udieresis", "yacute", "thorn", "ydieresis" ); @adobeenc=( ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", "space", "exclam", "quotedbl", "numbersign", "dollar", "percent", "ampersand", "quoteright", "parenleft", "parenright", "asterisk", "plus", "comma", "hyphen", "period", "slash", "zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "colon", "semicolon", "less", "equal", "greater", "question", "at", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "bracketleft", "backslash", "bracketright", "asciicircum", "underscore", "quoteleft", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "braceleft", "bar", "braceright", "asciitilde", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", "exclamdown", "cent", "sterling", "fraction", "yen", "florin", "section", "currency", "quotesingle", "quotedblleft", "guillemotleft", "guilsinglleft", "guilsinglright", "fi", "fl", ".notdef", "endash", "dagger", "daggerdbl", "periodcentered", ".notdef", "paragraph", "bullet", "quotesinglbase", "quotedblbase", "quotedblright", "guillemotright", "ellipsis", "perthousand", ".notdef", "questiondown", ".notdef", "grave", "acute", "circumflex", "tilde", "macron", "breve", "dotaccent", "dieresis", ".notdef", "ring", "cedilla", ".notdef", "hungarumlaut", "ogonek", "caron", "emdash", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", "AE", ".notdef", "ordfeminine", ".notdef", ".notdef", ".notdef", ".notdef", "Lslash", "Oslash", "OE", "ordmasculine", ".notdef", ".notdef", ".notdef", ".notdef", ".notdef", "ae", ".notdef", ".notdef", ".notdef", "dotlessi", ".notdef", ".notdef", "lslash", "oslash", "oe", "germandbls", ".notdef", ".notdef", ".notdef", ".notdef" ); ############### # Read DEF file ############### # open DEF, $opt_d or die "Cannot open $opt_d: $!\n"; while () { next if (/^\s*$/ || /^\#/); s/\s*(\#.*)?$//; push @deflines, $_; } close DEF; ############### # Read VPL file ############### # # File header # $vplhead = <> or exit 1; unless ($vplhead =~ /^\((VTITLE|FAMILY) /) { die "$filename is not a vpl file: giving up\n" } do { $_ = <>; $vplhead .= $_; } until ($_ =~ /^\(LIGTABLE$/ or eof); if (eof) { die "$filename does not seem to be a text font (no LIGTABLE): giving up\n"; } if ($vplhead =~ s/\A\(VTITLE(.*)$/$vtitle\n(COMMENT Old vtitle:$1/m) { $vplhead =~ s/\n\(COMMENT Please edit that VTITLE .*\)$//m; @enc = @dvipsenc; $dvips = 1; } elsif ($vplhead =~ /\A\(FAMILY.*\)\n\(CODINGSCHEME ADOBESTANDARDENCODING\)$/m) { $vplhead =~ s/\A/$vtitle\n/m; @enc = @adobeenc; $dvips = 0; } else { $vplhead =~ s/\A/$vtitle\n/m; @enc = @TeXenc; $dvips = 0; } if ($vplhead =~ /^\(CODINGSCHEME TEX MATH SYMBOLS/m) { die "$filename is a TeX math font: giving up\n"; } unless ($vplhead =~ s/^(\(CODINGSCHEME .*\+\s?)(\S+)\)$/$1$encname)/m) { $vplhead =~ s/^(\(CODINGSCHEME .*)\)$/$1 + $encname)/m; } if ($vplhead =~ s/^\(DESIGNUNITS R (.+)\)\n//m) { $scale = $1 } else { $scale = 1 } $vplhead =~ s/^\(COMMENT.*DESIGNSIZE.*\)\n//gm; $vplhead =~ s/^\(BOUNDARYCHAR.*\)\n//gm; if ($vplhead =~ /^ \(SLANT R (.+)\)/m) { $slant = $1 } if ($vplhead =~ /^ \(XHEIGHT [DR] (.+)\)/m) { $xheight = $1 } unless ($vplhead =~ /^\(MAPFONT /m) { if ($vplhead =~ /^\(DESIGNSIZE R (.*)\)$/m) { $dsize = $1 *$scale } $mapfont = "\n(MAPFONT D 0\n (FONTNAME $fontname)\n (FONTDSIZE R $dsize)\n )"; $vplhead =~ s/\n\(LIGTABLE\Z/$mapfont$&/m; } $vplhead =~ s/^(\(FONTDIMEN)/(SEVENBITSAFEFLAG FALSE)\n$1/m; if ($opt_s) { $vplhead =~ s[^(\(MAPFONT D )0(.*?)( \))] [$&\n${1}1$2 (FONTAT R ${ \($shrink * $scale) })\n$3]ms; } @vplhead = split /^/, $vplhead; foreach (@vplhead) { s[(?; s/ \(comment .*$//i; s/^( \((?:LABEL|KRN) )O ([0-7]+)(.*)$/$1 . sprintf("H %04lX", oct $2) . $3/e; if (/ \(LIG /) { s/O ([0-7]+)/sprintf("H %04lX", oct $1)/ge; } $ligs .= $_; } until $_ =~ /^ \)/; # # Now build a hash to convert from vpl representation to char name # and use it to make ligtable readable # foreach $i (0 .. 255) { $vc{$nv[$i]} = $enc[$i] } $ligs =~ s/^( \((?:LABEL|KRN) )(\S+ \S+)(.*\))$/$1$vc{$2}$3/gm; $ligs =~ s/^( \(LIG )(\S+ \S+) (\S+ \S+)\)$/$1$vc{$2} $vc{$3})/gm; # # Character definitions: store "encoded" defs in @chars, store *all* # defs in %allchars # $_ = <>; do { if (/^\(CHARACTER/) { $character = $_; do { $_ = <>; $character .= $_; } until $_ =~ /^ \)/; storeinfo($character); $_ = <>; } } until eof; foreach $i (0 .. $#chars) { if ($chars[$i] and $enc[$i] ne ".notdef") { $allchars{$enc[$i]} = $chars[$i]; } } ################## # Set up constants ################## $subacc = "(cedilla|ogonek|commaaccent)"; $supacc = "(grave|acute|circumflex|tilde|macron|breve|dotaccent|overdot|dieresis|ring|hungarumlaut|caron|candrabindu)"; $underacc = "(underdot|under$supacc)"; $accents = "($subacc|$supacc|$underacc|underbar)"; $underadp = 0.230; # depth of "under" accs $underddp = 0.213; # depth of underdot if ($bold) { $thk = 0.072 } else { $thk = 0.052 } # thickness and $underbdp = 0.082 + $thk; # depth of underbar $capheight = $allchars{"X"}{ht}; $accheight = $allchars{"macron"}{ht}; $accdepth = $accheight - $thk * $scale; # probable approx. "depth" of macron $v1 = $accheight - $xheight; # vertical offset for double accents $v2 = $capheight - $xheight; # vertical offset for accented caps etc if ($scaps) { # accented small caps $scoffset = $allchars{"x"}{ht} - $xheight; $v1 += $scoffset; } if ($opt_c) { # candrabindu ($cbx, $cby) = $opt_c =~ /^(.*),(.*)$/; $cbx += ($allchars{"breve"}{wd} - $allchars{"dotaccent"}{wd}) / 2; $cbx /= 1000; $cby /= 1000; } ###################### # Build the characters ###################### # # First normalise dvips.enc encoding quirks # if ($dvips) { chmove("fi", 014); chmove("fl", 015); chmove("quotedblright", 042); chmove("quotedblleft", 0134); chmove("dotaccent", 0137); chmove("endash", 0173); chmove("emdash", 0174); chmove("hungarumlaut", 0175); } # # Now build a list of definitions supplied by user # for (@deflines) { if (/^\s*(\d+|0[0-7]+|0x[0-9a-fA-F]+)\s+([a-zA-Z]+?|\.notdef)(?:\s+([a-zA-Z]+))?$/) { ($num, $char, $acc) = ($1, $2, $3); $num = oct $num if $num =~ /^0/; if ($num > 0xFFFF) { die "Bad definition (number out of range): $_\n" } $def = {}; $def->{qdef} = $_; $def->{num} = $num; $def->{char} = $char; $def->{acc} = $acc; if ($char eq "space") { $def->{nchar} = $acc } else { $def->{nchar} = $char . $acc } push @nchars, $def->{nchar}; push @defs, $def; } else { die "Bad definition: $_\n" } } # # Work through the list # while (@defs) { $def = shift @defs; $qdef = $def->{qdef}; $num = $def->{num}; $char = $def->{char}; $acc = $def->{acc}; $nchar = $def->{nchar}; # # If we can't handle $char/$acc yet, but believe we will be able # to later, send the definition to the back of the queue. In case # it later turns out we were wrong, allow only five loops before # giving up. # if (!$allchars{$char} and $char ne ".notdef") { if (grep /^$char$/, @nchars) { unless (++$def->{requeue} > 5) { push @defs, $def; next; } } else { die "Bad definition (no such character): $qdef\n" } } if ($acc and !$allchars{$acc} and $acc !~ /^$accents$/) { if (grep /^$acc$/, @nchars) { unless (++$def->{requeue} > 5) { push @defs, $def; next; } } else { die "Bad definition (no such accent): $qdef\n" } } # # Remove any existing claims on $num # @{ $allchars{$chars[$num]{id}}{num} } = grep !/$num/, @{ $allchars{$chars[$num]{id}}{num} }; # # First deal with .notdef # if ($nchar eq ".notdef") { undef $chars[$num]; } # # Next look among existing chars (unless blocked by -b) # elsif (!($acc and $opt_b) and $allchars{$nchar}) { push( @{ $allchars{$nchar}{num} }, $num); $chars[$num] = $allchars{$nchar}; } # # If it can't be built from sub-elements, issue a warning and move on # elsif (!$acc) { warn "No such character - ignoring definition: $qdef\n"; undef $chars[$num]; } # # Now build the char # else { # # First get rid of predefined/duplicated ligtable statements # and character definitions; also synonyms # $ligs =~ s/\n \((LABEL|KRN|LIG) ($nchar .*|.*$nchar)\)$//gm; $allchars{$nchar} = (); if ($acc eq "overdot") { $nchar2 = $char . "dotaccent"; $ligs =~ s/\n \((LABEL|KRN|LIG) ($nchar2 .*|.*$nchar2)\)$//gm; delete $allchars{$nchar2}; } # # Store the info that will be needed to have the character # linked into the LIGTABLE # push @newligs, "$char: (LIG $acc $nchar)"; # # Go! # if ($acc =~ /^$subacc$/) { subacc($num, $char, $acc, $nchar); fixkerns($char, $acc); } elsif ($acc =~ /^$supacc$/) { supacc($num, $char, $acc, $nchar); fixkerns($char, $acc); } elsif ($acc =~ /^$underacc$/) { underacc($num, $char, $acc, $nchar); fixkerns($char, $acc); } elsif ($acc =~ /^underbar$/) { underb($num, $char, $nchar); fixkerns($char, $acc); } else { digraph($num, $char, $acc, $nchar); fixkerns($char, $acc); } } } ################### # Sort out ligtable ################### # # Pull in the newly created ligatures # foreach $newlig (@newligs) { $newlig =~ s/^(.+)://; $newelm1 = $1; unless ($ligs =~ s/(^ \(LABEL $newelm1\)\n)/$1$newlig\n/m) { $ligs =~ s[^( \)\n)] [ \(LABEL $newelm1\)\n$newlig\n \(STOP\)\n$1]m } } # # Convert to vpl representation, eliminating statements invoking # "unencoded" characters # @liglist = split /\n/, $ligs; $ligs = ""; foreach (@liglist) { if (/^( \(LIG \S+ )(\S+)\)$/) { if ($n = ${ $allchars{$2}{num} }[0]) { s/^( \(LIG \S+ )(\S+)\)$/$1$nv[$n])/; } else { next } } if (/^( \((?:LABEL|LIG|KRN) )([^ )]+)(.*)$/) { ($one, $two, $three) = ($1, $2, $3); foreach $n (@{ $allchars{$two}{num} }) { $ligs .= "$one$nv[$n]$three\n"; } } else { $ligs .= "$_\n" } } # # Eliminate sequences orphaned by elimination of a LABEL # @liglist = split / \(STOP\)\n/, $ligs; $ligs = ""; foreach (@liglist) { if (/^ \(LABEL /m) { $ligs .= "$_ (STOP)\n" } elsif (/^ \)$/m) { $ligs .= $_ } } # # Eliminate empty statements # $ligs =~ s/(^ \(LABEL .*\)\n)+ \(STOP\)\n//gm; @ligs = split /^/, $ligs; foreach (@ligs) { s/ [RD] (-?)([0-9.]+)\)/" R " . $1 . $2\/$scale . ")"/ge } #################### # Output the results #################### # print @vplhead, @ligs; foreach $i (0 .. 0xFFFF) { if (defined $chars[$i]{id}) { printchar($i) } } ##################### # End of main program ##################### sub storeinfo { # # Extract info from a character definition and store it in @chars # my $char = shift; my $num; if ($char =~ /\A\(CHARACTER O ([0-7]+)/m) { $num = oct $1 } elsif ($char =~ /\A\(CHARACTER C (.)/m) { $num = ord $1 } $chars[$num]{id} = $enc[$num]; push( @{ $chars[$num]{num} }, $num); if ($char =~ /^ \(CHARWD R (.*?)\)$/m) { $chars[$num]{wd} = $1 } if ($char =~ /^ \(CHARHT R (.*?)\)$/m) { $chars[$num]{ht} = $1 } if ($char =~ /^ \(CHARDP R (.*?)\)$/m) { $chars[$num]{dp} = $1 } if ($char =~ /^ \(CHARIC R (.*?)\)$/m) { $chars[$num]{ic} = $1 } if ($char =~ /^ \(MAP\n((.|\n)*)^ \)/m) { $chars[$num]{map} = $1 } else { $chars[$num]{map} = " (SETCHAR $nv[$num])\n" } $chars[$num]{map} =~ s[(\(SETCHAR )O ([0-7]+)\)] [$1 . sprintf("H %04lX", oct $2) . ")"]gme } sub printchar { # # Extract info from @chars and build it into a character definition # my $num = shift; my @ch; push @ch, "(CHARACTER "; if (chr($num) =~ /[0-9A-Za-z]/) { push @ch, "C " . chr($num) } else { push @ch, sprintf("H %04lX", $num); push @ch, " (COMMENT " . $chars[$num]{id} . ")"; } push @ch, "\n"; push @ch, " (CHARWD R " . $chars[$num]{wd} . ")\n" if defined $chars[$num]{wd}; push @ch, " (CHARHT R " . $chars[$num]{ht} . ")\n" if defined $chars[$num]{ht}; push @ch, " (CHARDP R " . $chars[$num]{dp} . ")\n" if $chars[$num]{dp}; push @ch, " (CHARIC R " . $chars[$num]{ic} . ")\n" if $chars[$num]{ic}; push @ch, " (MAP\n"; push @ch, $chars[$num]{map}; push @ch, " )\n"; push @ch, " )\n"; foreach (@ch) { s/(? $b ? $a : $b; } sub subacc { # # Subscript accents # my ($num, $char, $acc, $id) = @_; my ($h, $s1, $s2, $s3); $allchars{$id}{wd} = $allchars{$char}{wd}; $allchars{$id}{ht} = $allchars{$char}{ht}; $allchars{$id}{dp} = $allchars{$acc}{dp}; $allchars{$id}{ic} = $allchars{$char}{ic}; $allchars{$id}{id} = $id; push( @{ $allchars{$id}{num} }, $num); $s1 = $allchars{$char}{map}; $s1 =~ s/\A (.*)\n\Z/(PUSH) $1 (POP)/s; $h = sprintf("%.3f", ($allchars{$char}{wd} - $allchars{$acc}{wd}) / 2); if ($h > 0) { $s2 = " (MOVERIGHT R $h) " } elsif ($h < 0) { $h = -$h; $s2 = " (MOVELEFT R $h) "; } else { $s2 = " " } $s3 = $allchars{$acc}{map}; $s3 =~ s/\A.*(\(SETCHAR . .+?\)).*\Z/$1/s; $allchars{$id}{map} = " $s1\n$s2$s3\n"; $chars[$num] = $allchars{$id}; } sub supacc { # # Superscript accents # my ($num, $char, $acc, $id) = @_; my ($cb, $h, $hadj, $tallchar, $ic, $s1, $s2, $s3); if ($char eq "i" and $allchars{"dotlessi"}) { $char = "dotlessi" } if ($char eq "j" and $allchars{"dotlessj"}) { $char = "dotlessj" } if ($acc eq "overdot") { $acc = "dotaccent" } if ($acc eq "candrabindu") { $acc = "breve"; ($cb = $allchars{"dotaccent"}{map}) =~ s/\A.*(\(SETCHAR . .+?\)).*\Z/$1/s; } $allchars{$id}{wd} = $allchars{$char}{wd}; $allchars{$id}{ht} = $allchars{$acc}{ht}; $allchars{$id}{dp} = $allchars{$char}{dp}; $allchars{$id}{id} = $id; push( @{ $allchars{$id}{num} }, $num); $s1 = $allchars{$char}{map}; $s1 =~ s/\A (.*)\n\Z/(PUSH) $1 (POP)/s; if ($scaps and $char =~ /^[a-z]/) { # accented small caps $tallchar = 1; if ($char =~ /$supacc$/ and $char !~ /under$supacc$/) { # double accs $s2 = " (MOVEUP R $v1)"; $allchars{$id}{ht} += $v1; $hadj = sprintf("%.3f", $v1 * $slant); $ic = $allchars{$char}{ic}; } else { # single accs $s2 = " (MOVEUP R $scoffset)"; $allchars{$id}{ht} += $scoffset; $hadj = sprintf("%.3f", $scoffset * $slant); $ic = $allchars{$acc}{ic} + $hadj; } } elsif ($allchars{$char}{ht} >= ($accheight + $v2)) { # double accs $tallchar = 1; # on caps etc. $s2 = " (MOVEUP R ${ \($v1 + $v2) })"; $allchars{$id}{ht} += ($v1 + $v2); $hadj = sprintf("%.3f", ($v1 + $v2) * $slant); $ic = $allchars{$char}{ic}; } elsif ($allchars{$char}{ht} > 1.15 * $xheight) { $tallchar = 1; if ($char =~ /$supacc$/ and $char !~ /under$supacc$/) { # double accs $s2 = " (MOVEUP R $v1)"; $allchars{$id}{ht} += $v1; $hadj = sprintf("%.3f", $v1 * $slant); $ic = $allchars{$char}{ic}; } else { # caps etc. $s2 = " (MOVEUP R $v2)"; $allchars{$id}{ht} += $v2; $hadj = sprintf("%.3f", $v2 * $slant); $ic = $allchars{$char}{ic}; } } else { # single accs $s2 = " "; $ic = $allchars{$acc}{ic}; } $h = sprintf("%.3f", ($allchars{$char}{wd} - $allchars{$acc}{wd}) / 2); unless ($tallchar) { $ic -= $h } $allchars{$id}{ic} = $ic unless $ic < 0; $h += $hadj; if ($h > 0) { $s2 .= " (MOVERIGHT R $h) " } elsif ($h < 0) { $h = -$h; $s2 .= " (MOVELEFT R $h) "; } else { $s2 .= " " } $s3 = $allchars{$acc}{map}; $s3 =~ s/\A.*(\(SETCHAR . .+?\)).*\Z/$1/s; if ($cb) { # candrabindu $cb = $s2 . $cb; if ($cbx) { unless (($cb =~ s/(MOVERIGHT R )([0-9.]+)/$1 . ($2 + $cbx * $scale)/e) or ($cb =~ s/(MOVELEFT R )([0-9.]+)/$1 . ($2 - $cbx * $scale)/e)) { $cb =~ s/^( +)/"$1(MOVERIGHT R " . ($cbx * $scale) . ") "/e; } } if ($cby) { unless ($cb =~ s/(MOVEUP R )([0-9.]+)/$1 . ($2 + $cby * $scale)/e) { $cb =~ s/^( +)/"$1(MOVEUP R " . ($cby * $scale) . ") "/e; } } $cb .= "\n"; $s1 = "(PUSH) " . $s1; $s3 .= " (POP)"; } $allchars{$id}{map} = " $s1\n$s2$s3\n$cb"; $chars[$num] = $allchars{$id}; } sub underacc { # # Dropped accents # my ($num, $char, $acc, $id) = @_; my ($h, $v, $s1, $s2, $s3); $acc =~ s/^under//; if ($acc eq "dot") { $acc = "period" } if ($acc eq "candrabindu") { die "Bad definition (no such accent): $qdef\n" } $allchars{$id}{wd} = $allchars{$char}{wd}; $allchars{$id}{ht} = $allchars{$char}{ht}; $allchars{$id}{ic} = $allchars{$char}{ic}; $allchars{$id}{id} = $id; push( @{ $allchars{$id}{num} }, $num); if ($acc =~ /^$supacc$/) { $v = $allchars{$id}{dp} = $underadp * $scale * $shrink; $v += ($accdepth * $shrink); } else { $v = $allchars{$id}{dp} = $underddp * $scale + $allchars{$acc}{dp}; } $s1 = $allchars{$char}{map}; $s1 =~ s/\A (.*)\n\Z/(PUSH) $1 (POP)/s; if ($acc =~ /^$supacc$/) { $h = ($allchars{$char}{wd} - ($allchars{$acc}{wd} * $shrink)) / 2 - $v * $slant; if ($opt_s) { $s2 = " (SELECTFONT D 1)\n" } } else { $h = ($allchars{$char}{wd} - $allchars{$acc}{wd}) / 2 - $v * $slant; } $h = sprintf("%.3f", $h); if ($h > 0) { $s2 .= " (MOVEDOWN R $v) (MOVERIGHT R $h) "; } elsif ($h < 0) { $h = -$h; $s2 .= " (MOVEDOWN R $v) (MOVELEFT R $h) "; } else { $s2 = " (MOVEDOWN R $v) " } $s3 = $allchars{$acc}{map}; $s3 =~ s/\A.*(\(SETCHAR . .+?\)).*\Z/$1/s; if ($opt_s and $acc =~ /^$supacc$/) { $s3 .= "\n (SELECTFONT D 0)" } $allchars{$id}{map} = " $s1\n$s2$s3\n"; $chars[$num] = $allchars{$id}; } sub underb { # # Underbar # my ($num, $char, $id) = @_; my ($h, $w, $dp, $s1, $s2, $s3); $allchars{$id}{wd} = $allchars{$char}{wd}; $allchars{$id}{ht} = $allchars{$char}{ht}; $allchars{$id}{dp} = $dp = $underbdp * $scale; $allchars{$id}{ic} = $allchars{$char}{ic}; $allchars{$id}{id} = $id; push( @{ $allchars{$id}{num} }, $num); $s1 = $allchars{$char}{map}; $s1 =~ s/\A (.*)\n\Z/(PUSH) $1 (POP)/s; $h = sprintf("%.3f", ($allchars{$id}{wd} / 10 - $dp * $slant)); $w = sprintf("%.3f", ($allchars{$id}{wd} * 8 / 10)); if ($h > 0) { $s2 = " (MOVEDOWN R $dp) (MOVERIGHT R $h) "; } elsif ($h < 0) { $h = -$h; $s2 = " (MOVEDOWN R $dp) (MOVELEFT R $h) "; } else { $s2 .= " (MOVEDOWN R $dp) " } $s3 = "(SETRULE R ${ \($thk) } R $w)"; $allchars{$id}{map} = " $s1\n$s2$s3\n"; $chars[$num] = $allchars{$id}; # # If the character to be underscored is a digraph, fix up the LIGTABLE # if (grep /^$char$/, @digraphs) { pop @newligs; dummychar(0x035F, "uni035F") unless defined $allchars{uni035F}; $char =~ /^(.)(.)$/; ($one, $two) = ($1, $2); $one_ = $one . "_uni035F"; dummychar($dummycodepoint--, $one_) unless defined $allchars{$one_}; push @newligs, "$one: (LIG uni035F $one_)" unless (grep /^$one: \(LIG uni035F $one_\)$/, @newligs); push @newligs, "$one_: (LIG $two $id)"; } } sub digraph { # # Make a new character consisting of two existing characters # my ($num, $char, $acc, $id) = @_; my ($one, $two, $kern, $s1, $s2, $charZWJ); # # Remove the ligature info -- we don't want every occurrence of # e.g."kh" to be converted into a digraph. # pop @newligs; # # List all digraphs for future reference # push @digraphs, $id; if ($ligs =~ /^ \(LABEL $char\)\n(.*?)\n \(KRN $acc R (-?[0-9.]+)\)\n/ms) { ($one, $two) = ($1, $2); } $kern = $two if $one !~ /^ \(STOP\)$/m; $allchars{$id}{wd} = $allchars{$char}{wd} + $allchars{$acc}{wd}; $allchars{$id}{wd} += $kern; $allchars{$id}{ht} = max($allchars{$char}{ht}, $allchars{$acc}{ht}); $allchars{$id}{dp} = max($allchars{$char}{dp}, $allchars{$acc}{dp}); $allchars{$id}{ic} = $allchars{$acc}{ic}; $allchars{$id}{id} = $id; push( @{ $allchars{$id}{num} }, $num); $s1 = $allchars{$char}{map}; chomp ($s2 = $allchars{$acc}{map}); $s1 =~ s/(\(SETCHAR .*?\))/$1\n$s2/; if ($kern) { if ($kern < 0) { $kern = -$kern; $s1 =~ s/(\(SETCHAR .*?\))/$1 (MOVELEFT R $kern)/; } else { $s1 =~ s/(\(SETCHAR .*?\))/$1 (MOVERIGHT R $kern)/ } } $allchars{$id}{map} = $s1; $chars[$num] = $allchars{$id}; } sub dummychar { # # Make a new dummy character # my ($num, $id) = @_; $allchars{$id}{wd} = 0; $allchars{$id}{ht} = 0; $allchars{$id}{dp} = 0; $allchars{$id}{ic} = 0; $allchars{$id}{id} = $id; push( @{ $allchars{$id}{num} }, $num); $allchars{$id}{map} = " (SETCHAR H 0020)\n"; $chars[$num] = $allchars{$id}; } sub fixkerns { # # Generalise the kerning info contained in the vpl file by applying # it to new accented chars. Do not kern lower-case chars bearing # superscript accents with capitals, quotes or a preceding "f". # my ($char, $acc) = @_; my ($olabel, $nlabel, @liglist, $lchar, $rchar); return if $char eq "space"; if ($acc =~ /^$accents$/) { $lchar = $rchar = $char } else { $lchar = $char; $rchar = $acc; } unless ($char =~ /^[a-z]/ and $acc =~ /^$supacc$/) { $ligs =~ s[(\n \(LABEL )$rchar\)(?!\n \(LIG.*$)] [$&$1$char$acc)]gm unless $ligs =~ /\n \(LABEL $char$acc\)$/m; $ligs =~ s[(\n \(LABEL )$rchar\)(\n \(LIG.*$)+(?!\n \(STOP\))] [$&$1$char$acc)]gm unless $ligs =~ /\n \(LABEL $char$acc\)$/m; $ligs =~ s[(\n \(KRN )$lchar( .*)$] [$&$1$char$acc$2]gm; } else { if ($ligs =~ /\n \(LABEL $char\).*?\(STOP\)/s) { $nlabel = $olabel = $&; $nlabel =~ s/(\n \(LABEL $char)\)/$1$acc)/ unless $ligs =~ /\n \(LABEL $char$acc\)/m; $nlabel =~ s/\n \(LIG .*\)$//gm; $nlabel =~ s/\n \(LABEL (?!$char$acc).*\)$//gm; $nlabel =~ s/\n \(KRN ([A-Z]|quote).*\)$//gm; $ligs =~ s/(\n \(LABEL )$char\).*?\(STOP\)/$olabel$nlabel/s; } @liglist = split /\n \(STOP\)/, $ligs; foreach (@liglist) { unless (/\n \(LABEL ([A-Zf]|quote).*\)$/m) { s/(\n \(KRN $char)( .*\))$/$&$1$acc$2/gm; } } $ligs = join("\n (STOP)", @liglist); } }