#!/usr/bin/perl

#   Vpl2ovp: a program to generate accented virtual fonts for Omega
#   Copyright (C) 2008 John D. Smith

#   This program is free software; you can redistribute it and/or modify
#   it under the terms of the GNU General Public License as published by
#   the Free Software Foundation; either version 2 of the License, or
#   (at your option) any later version.

#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU General Public License for more details.

#   You should have received a copy of the GNU General Public License
#   along with this program; if not, write to the Free Software
#   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

$version = 0.25;
#------------------------------------------------------------------------#
$description =
"Syntax: vpl2ovp -d definition-file [-s shrink-factor]
           [-c candrabindu-adjustment] [-b] vpl-file

Vpl2ovp creates new Omega virtual fonts based on existing TeX
fonts or virtual fonts (\"input fonts\"). A successful run will
read a pl (Property List) or vpl (Virtual Property List) file
and a definition file, and will generate a new ovp (Omega
Virtual Property List) file on standard output. The input font
is assumed to adhere to the standard TeX encoding for text
fonts unless it was created with either of the programs afm2pl
or afm2tfm, in which case it is assumed to conform to
(respectively) the Adobe Standard Encoding or the encoding
specified in the file dvips.enc. In either case, the name of
the input font is assumed to be the name of the input file
without its .vpl or .pl extension: it must conform to normal
TeX conventions for naming fonts, as vpl2ovp attempts to draw
conclusions from it about the kind of font it is dealing with.

A typical complete sequence of commands to create a new virtual
font might therefore be
     tftopl cmr10.tfm cmr10.pl
     vpl2ovp -d Unicode1.def cmr10.pl >cmr10-uni1.ovp
     ovp2ovf cmr10-uni1.ovp cmr10-uni1.ovf cmr10-uni1.ofm
for a Computer Modern font, or
     afm2pl Times-Roman.afm rptmr.pl
     pltotf rptmr.pl rptmr.tfm
     vpl2ovp -d Unicode1.def rptmr.pl >ptmr-uni1.ovp
     ovp2ovf ptmr-uni1.ovp ptmr-uni1.ovf ptmr-uni1.ofm
for a PostScript font.

Another approach for a PostScript font is to use afm2tfm:
     afm2tfm Times-Roman.afm -t dvips.enc -v ptmr rptmr
     vpl2ovp -d Unicode1.def ptmr.vpl >ptmr-uni1.ovp
     ovp2ovf ptmr-uni1.ovp ptmr-uni1.ovf ptmr-uni1.ofm
-- but this is now deprecated, as afm2tfm generates incorrect
values for the heights of some characters, and this can lead to
bad accent placing.

In order to keep the whole of the character range \"F0-\"FF free for
the requirements of the encoding specified in the definition file,
certain modifications are made to input fonts following the dvips.enc
encoding to bring them into greater conformity with the TeX norm. In
particular, the characters dotaccent and hungarumlaut are placed in
the positions assigned by TeX (\"5F, \"7D), not those enforced by
dvips.enc (\"C7, \"CD). The f-ligatures, double quotes and dashes are
also moved from the upper half of the original 8-bit character set to
their normal TeX positions. As a result, the following characters are
not found in the lower half of the character set: quotesingle,
quotedbl, backslash, underscore, braceleft, bar, braceright. These
characters can, however, be assigned positions in the output font if
they are needed. (Indeed, they could all be explicitly restored to
their dvips.enc positions if this were desired.)

Options:

  -d should refer to a font definition file. This file (which could
     usefully be named, e.g., \"Unicode1.def\") should consist of
     lines of character definitions, in the form
               \"number\"   \"character\"
     or
               \"number\"   \"character\"   \"accent\"
     Here \"number\" represents the character's position in the new
     encoding and may be expressed in decimal, octal or hex;
     \"character\" names the character (e.g. \"comma\", \"eight\",
     \"A\") or consists of the word \".notdef\" (indicating that
     the specified number's \"slot\" in the new encoding is to be
     empty); and \"accent\" optionally names an accent to be placed
     on the character. In addition to the standard accents available
     in PostScript fonts, \"underbar\" and \"underdot\" are also
     available, as are \"under\" versions of all the normal
     superscript accents (\"underdieresis\", \"underring\", etc.).
     The Indian accent \"candrabindu\" may also be specified: it
     is formed by overprinting a breve with a dotaccent. Finally,
     \"overdot\" may be used as a synonym for \"dotaccent\".

     Note that all accents used in defining accented characters must
     themselves be defined in the .def file. Those which exist in the
     source font should simply be referenced by name in their
     appropriate Unicode position (e.g. \"0x0304 macron\"); those
     which do not should be defined as the character \"space\"
     followed by the name of the accent (e.g. \"0x0310 space
     candrabindu\").

     If the character named in the \"accent\" position is not in fact
     a valid accent character, the program interprets the definition
     as a request for a digraph formed from the \"character\" and the
     \"accent\". A digraph consisting of, say, \"k\" and \"h\" will be
     indistinguishable from the letters \"k\" and \"h\" printed
     consecutively, but the digraph \"kh\" can itself receive accents
     like any other character: see next paragraph.

     A new character (such as \"amacron\" or \"kh\") may be freely
     used in the \"character\" position of a further definition (such
     as \"amacron breve\" or \"kh underbar\"). There is no constraint
     on the ordering of definitions within a definition file. The
     definition of \"a macron\" does not have to precede that of
     \"amacron breve\": requests for \"impossible\" characters are
     deferred until their constituents have had a chance to come into
     being.

     \"Slots\" for which no new definition is given retain the
     definition they have in the input font.

     The definition file may also contain blank lines and comments
     (introduced by \"\#\").

  -s may optionally give the factor, expressed as a per-thousand
     value, by which normally superscript accents (such as dieresis,
     ring) should be shrunk when they are used as subscript accents
     (such as underdieresis, underring). Values of around 800 may be
     found useful.

  -c may optionally give two comma-separated numerical values to
     adjust the x and y coordinates of the dotaccent placed within a
     breve to form the candrabindu accent. A coordinate scheme using
     \"DESIGNUNITS R 1000\" is assumed.

  -b may optionally be specified to block the use of predefined
     accented characters, forcing vpl2ovp to define its own
     versions. This may be useful to secure a consistent appearance
     in cases where a font designer does not share vpl2ovp's views
     on where accents should be placed.

  -h prints this help.
";
#------------------------------------------------------------------------#

########################
# Packages and constants
########################
#
use File::Basename;
use Getopt::Std;
$cmdline = basename($0) . " " . join " ", @ARGV;
getopts('d:s:c:bh');
if  ($opt_h or !$opt_d or $#ARGV != 0) {
   print STDERR $description;
   exit 1;
}

$filename = $ARGV[0];
$dummycodepoint = 0xF8FF;
($fontname = $filename) =~ s/\..*$//;
($encname = basename($opt_d)) =~ s/\..*$//;
$vtitle = "(VTITLE Font $fontname modified for $encname encoding by vpl2ovp";
$vtitle .= " v. $version" if $version;
$vtitle .= ")\n(COMMENT Command line: $cmdline)";
$vtitle .= "\n(OFMLEVEL H 0)";

#
# Flags for bold and small caps. These are probably a bit iffy, but
# there's not much that can be done about it.
#
if ($fontname =~ /(^p.*b[oi]?[c]?$|^[^p].*bx[a-z]*[0-9]+$)/) { $bold = 1 }
if ($fontname =~ /(^p.*c$|^[^p].*csc[a-z]*[0-9]+$)/)         { $scaps = 1 }
if ($opt_s) { $shrink = $opt_s / 1000 } else { $shrink = 1 }

#
# Array to convert from number to vpl representation
#
foreach $i (0 .. 0xFFFF) {
   $nv[$i] = (chr($i) =~ /[0-9A-Za-z]/ ? "C " . chr $i : sprintf("H %04lX", $i));
}

#
# Now the encoding vectors.
#
@TeXenc = (
   "Gamma",          "Delta",          "Theta",          "Lambda",
   "Xi",             "Pi",             "Sigma",          "Upsilon",
   "Phi",            "Psi",            "Omega",          "ff",
   "fi",             "fl",             "ffi",            "ffl",
   "dotlessi",       "dotlessj",       "grave",          "acute",
   "caron",          "breve",          "macron",         "ring",
   "cedilla",        "germandbls",     "ae",             "oe",
   "oslash",         "AE",             "OE",             "Oslash",
   "space",          "exclam",         "quotedblright",  "numbersign",
   "dollar",         "percent",        "ampersand",      "quoteright",
   "parenleft",      "parenright",     "asterisk",       "plus",
   "comma",          "hyphen",         "period",         "slash",
   "zero",           "one",            "two",            "three",
   "four",           "five",           "six",            "seven",
   "eight",          "nine",           "colon",          "semicolon",
   "exclamdown",     "equal",          "questiondown",   "question",
   "at",             "A",              "B",              "C",
   "D",              "E",              "F",              "G",
   "H",              "I",              "J",              "K",
   "L",              "M",              "N",              "O",
   "P",              "Q",              "R",              "S",
   "T",              "U",              "V",              "W",
   "X",              "Y",              "Z",              "bracketleft",
   "quotedblleft",   "bracketright",   "circumflex",     "dotaccent",
   "quoteleft",      "a",              "b",              "c",
   "d",              "e",              "f",              "g",
   "h",              "i",              "j",              "k",
   "l",              "m",              "n",              "o",
   "p",              "q",              "r",              "s",
   "t",              "u",              "v",              "w",
   "x",              "y",              "z",              "endash",
   "emdash",         "hungarumlaut",   "tilde",          "dieresis"
);

@dvipsenc = (
   ".notdef",        ".notdef",        ".notdef",        ".notdef",
   ".notdef",        ".notdef",        ".notdef",        ".notdef",
   ".notdef",        ".notdef",        ".notdef",        ".notdef",
   ".notdef",        "quotesingle",    "exclamdown",     "questiondown",
   "dotlessi",       "dotlessj",       "grave",          "acute",
   "caron",          "breve",          "macron",         "ring",
   "cedilla",        "germandbls",     "ae",             "oe",
   "oslash",         "AE",             "OE",             "Oslash",
   "space",          "exclam",         "quotedbl",       "numbersign",
   "dollar",         "percent",        "ampersand",      "quoteright",
   "parenleft",      "parenright",     "asterisk",       "plus",
   "comma",          "hyphen",         "period",         "slash",
   "zero",           "one",            "two",            "three",
   "four",           "five",           "six",            "seven",
   "eight",          "nine",           "colon",          "semicolon",
   "less",           "equal",          "greater",        "question",
   "at",             "A",              "B",              "C",
   "D",              "E",              "F",              "G",
   "H",              "I",              "J",              "K",
   "L",              "M",              "N",              "O",
   "P",              "Q",              "R",              "S",
   "T",              "U",              "V",              "W",
   "X",              "Y",              "Z",              "bracketleft",
   "backslash",      "bracketright",   "circumflex",     "underscore",
   "quoteleft",      "a",              "b",              "c",
   "d",              "e",              "f",              "g",
   "h",              "i",              "j",              "k",
   "l",              "m",              "n",              "o",
   "p",              "q",              "r",              "s",
   "t",              "u",              "v",              "w",
   "x",              "y",              "z",              "braceleft",
   "bar",            "braceright",     "tilde",          "dieresis",
   "asciicircum",    "asciitilde",     "Ccedilla",       "Iacute",
   "Icircumflex",    "atilde",         "edieresis",      "egrave",
   "scaron",         "zcaron",         "Eth",            "ff",
   "ffi",            "ffl",            ".notdef",        ".notdef",
   ".notdef",        ".notdef",        "Scaron",         ".notdef",
   ".notdef",        ".notdef",        ".notdef",        ".notdef",
   "Ydieresis",      ".notdef",        "Zcaron",         ".notdef",
   ".notdef",        ".notdef",        ".notdef",        ".notdef",
   ".notdef",        ".notdef",        "cent",           "sterling",
   "fraction",       "yen",            "florin",         "section",
   "currency",       "copyright",      "quotedblleft",   "guillemotleft",
   "guilsinglleft",  "guilsinglright", "fi",             "fl",
   "degree",         "endash",         "dagger",         "daggerdbl",
   "periodcentered", ".notdef",        "paragraph",      "bullet",
   "quotesinglbase", "quotedblbase",   "quotedblright",  "guillemotright",
   "ellipsis",       "perthousand",    ".notdef",        ".notdef",
   "Agrave",         "Aacute",         "Acircumflex",    "Atilde",
   "Adieresis",      "Aring",          ".notdef",        "dotaccent",
   "Egrave",         "Eacute",         "Ecircumflex",    "Edieresis",
   "Igrave",         "hungarumlaut",   "ogonek",         "Idieresis",
   "emdash",         "Ntilde",         "Ograve",         "Oacute",
   "Ocircumflex",    "Otilde",         "Odieresis",      ".notdef",
   ".notdef",        "Ugrave",         "Uacute",         "Ucircumflex",
   "Udieresis",      "Yacute",         "Thorn",          ".notdef",
   "agrave",         "aacute",         "acircumflex",    "ordfeminine",
   "adieresis",      "aring",          ".notdef",        "ccedilla",
   "Lslash",         "eacute",         "ecircumflex",    "ordmasculine",
   "igrave",         "iacute",         "icircumflex",    "idieresis",
   ".notdef",        "ntilde",         "ograve",         "oacute",
   "ocircumflex",    "otilde",         "odieresis",      ".notdef",
   "lslash",         "ugrave",         "uacute",         "ucircumflex",
   "udieresis",      "yacute",         "thorn",          "ydieresis"
);

@adobeenc=(
   ".notdef",        ".notdef",        ".notdef",        ".notdef",
   ".notdef",        ".notdef",        ".notdef",        ".notdef",
   ".notdef",        ".notdef",        ".notdef",        ".notdef",
   ".notdef",        ".notdef",        ".notdef",        ".notdef",
   ".notdef",        ".notdef",        ".notdef",        ".notdef",
   ".notdef",        ".notdef",        ".notdef",        ".notdef",
   ".notdef",        ".notdef",        ".notdef",        ".notdef",
   ".notdef",        ".notdef",        ".notdef",        ".notdef",
   "space",          "exclam",         "quotedbl",       "numbersign",
   "dollar",         "percent",        "ampersand",      "quoteright",
   "parenleft",      "parenright",     "asterisk",       "plus",
   "comma",          "hyphen",         "period",         "slash",
   "zero",           "one",            "two",            "three",
   "four",           "five",           "six",            "seven",
   "eight",          "nine",           "colon",          "semicolon",
   "less",           "equal",          "greater",        "question",
   "at",             "A",              "B",              "C",
   "D",              "E",              "F",              "G",
   "H",              "I",              "J",              "K",
   "L",              "M",              "N",              "O",
   "P",              "Q",              "R",              "S",
   "T",              "U",              "V",              "W",
   "X",              "Y",              "Z",              "bracketleft",
   "backslash",      "bracketright",   "asciicircum",    "underscore",
   "quoteleft",      "a",              "b",              "c",
   "d",              "e",              "f",              "g",
   "h",              "i",              "j",              "k",
   "l",              "m",              "n",              "o",
   "p",              "q",              "r",              "s",
   "t",              "u",              "v",              "w",
   "x",              "y",              "z",              "braceleft",
   "bar",            "braceright",     "asciitilde",     ".notdef",
   ".notdef",        ".notdef",        ".notdef",        ".notdef",
   ".notdef",        ".notdef",        ".notdef",        ".notdef",
   ".notdef",        ".notdef",        ".notdef",        ".notdef",
   ".notdef",        ".notdef",        ".notdef",        ".notdef",
   ".notdef",        ".notdef",        ".notdef",        ".notdef",
   ".notdef",        ".notdef",        ".notdef",        ".notdef",
   ".notdef",        ".notdef",        ".notdef",        ".notdef",
   ".notdef",        ".notdef",        ".notdef",        ".notdef",
   ".notdef",        "exclamdown",     "cent",           "sterling",
   "fraction",       "yen",            "florin",         "section",
   "currency",       "quotesingle",    "quotedblleft",   "guillemotleft",
   "guilsinglleft",  "guilsinglright", "fi",             "fl",
   ".notdef",        "endash",         "dagger",         "daggerdbl",
   "periodcentered", ".notdef",        "paragraph",      "bullet",
   "quotesinglbase", "quotedblbase",   "quotedblright",  "guillemotright",
   "ellipsis",       "perthousand",    ".notdef",        "questiondown",
   ".notdef",        "grave",          "acute",          "circumflex",
   "tilde",          "macron",         "breve",          "dotaccent",
   "dieresis",        ".notdef",       "ring",           "cedilla",
   ".notdef",        "hungarumlaut",   "ogonek",         "caron",
   "emdash",         ".notdef",        ".notdef",        ".notdef",
   ".notdef",        ".notdef",        ".notdef",        ".notdef",
   ".notdef",        ".notdef",        ".notdef",        ".notdef",
   ".notdef",        ".notdef",        ".notdef",        ".notdef",
   ".notdef",        "AE",             ".notdef",        "ordfeminine",
   ".notdef",        ".notdef",        ".notdef",        ".notdef",
   "Lslash",         "Oslash",         "OE",             "ordmasculine",
   ".notdef",        ".notdef",        ".notdef",        ".notdef",
   ".notdef",        "ae",             ".notdef",        ".notdef",
   ".notdef",        "dotlessi",       ".notdef",        ".notdef",
   "lslash",         "oslash",         "oe",             "germandbls",
   ".notdef",        ".notdef",        ".notdef",        ".notdef"
);

###############
# Read DEF file
###############
#
open DEF, $opt_d or die "Cannot open $opt_d: $!\n";
while (<DEF>) {
   next if (/^\s*$/ || /^\#/);
   s/\s*(\#.*)?$//;
   push @deflines, $_;
}
close DEF;

###############
# Read VPL file
###############
#
# File header
#
$vplhead = <> or exit 1;
unless ($vplhead =~ /^\((VTITLE|FAMILY) /) {
   die "$filename is not a vpl file: giving up\n"
}
do {
   $_ = <>;
   $vplhead .= $_;
} until ($_ =~ /^\(LIGTABLE$/ or eof);
if (eof) {
   die "$filename does not seem to be a text font (no LIGTABLE): giving up\n";
}
if ($vplhead =~ s/\A\(VTITLE(.*)$/$vtitle\n(COMMENT Old vtitle:$1/m) {
   $vplhead =~ s/\n\(COMMENT Please edit that VTITLE .*\)$//m;
   @enc = @dvipsenc;
   $dvips = 1;
}
elsif ($vplhead =~ /\A\(FAMILY.*\)\n\(CODINGSCHEME ADOBESTANDARDENCODING\)$/m) {
   $vplhead =~ s/\A/$vtitle\n/m;
   @enc = @adobeenc;
   $dvips = 0;
}
else {
   $vplhead =~ s/\A/$vtitle\n/m;
   @enc = @TeXenc;
   $dvips = 0;
}
if ($vplhead =~ /^\(CODINGSCHEME TEX MATH SYMBOLS/m) {
   die "$filename is a TeX math font: giving up\n";
}
unless ($vplhead =~ s/^(\(CODINGSCHEME .*\+\s?)(\S+)\)$/$1$encname)/m) {
   $vplhead =~ s/^(\(CODINGSCHEME .*)\)$/$1 + $encname)/m;
}
if ($vplhead =~ s/^\(DESIGNUNITS R (.+)\)\n//m) { $scale = $1 }
else { $scale = 1 }
$vplhead =~ s/^\(COMMENT.*DESIGNSIZE.*\)\n//gm;
$vplhead =~ s/^\(BOUNDARYCHAR.*\)\n//gm;
if ($vplhead =~ /^   \(SLANT R (.+)\)/m)       { $slant = $1 }
if ($vplhead =~ /^   \(XHEIGHT [DR] (.+)\)/m)  { $xheight = $1 }
unless ($vplhead =~ /^\(MAPFONT /m) {
   if ($vplhead =~ /^\(DESIGNSIZE R (.*)\)$/m) { $dsize = $1 *$scale }
   $mapfont = "\n(MAPFONT D 0\n   (FONTNAME $fontname)\n   (FONTDSIZE R $dsize)\n   )";
   $vplhead =~ s/\n\(LIGTABLE\Z/$mapfont$&/m;
}
$vplhead =~ s/^(\(FONTDIMEN)/(SEVENBITSAFEFLAG FALSE)\n$1/m;
if ($opt_s) {
   $vplhead =~ s[^(\(MAPFONT D )0(.*?)(   \))]
                [$&\n${1}1$2   (FONTAT R ${ \($shrink * $scale) })\n$3]ms;
}
@vplhead = split /^/, $vplhead;
foreach (@vplhead) {
   s[(?<!(IGNSIZE|MAPFONT)) [RD] (-?)([0-9.]+)\)]
    [" R " . $2 . $3/$scale . ")"]ge
}

#
# Ligatures and kerns
#
do {
   $_ = <>;
   s/ \(comment .*$//i;
   s/^(   \((?:LABEL|KRN) )O ([0-7]+)(.*)$/$1 . sprintf("H %04lX", oct $2) . $3/e;
if (/   \(LIG /) {
    s/O ([0-7]+)/sprintf("H %04lX", oct $1)/ge;
   }
   $ligs .= $_;
} until $_ =~ /^   \)/;
#
# Now build a hash to convert from vpl representation to char name
# and use it to make ligtable readable
#
foreach $i (0 .. 255) { $vc{$nv[$i]} = $enc[$i] }
$ligs =~ s/^(   \((?:LABEL|KRN) )(\S+ \S+)(.*\))$/$1$vc{$2}$3/gm;
$ligs =~ s/^(   \(LIG )(\S+ \S+) (\S+ \S+)\)$/$1$vc{$2} $vc{$3})/gm;
#
# Character definitions: store "encoded" defs in @chars, store *all*
# defs in %allchars
#
$_ = <>;
do {
   if (/^\(CHARACTER/) {
      $character = $_;
      do {
	 $_ = <>;
	 $character .= $_;
      } until $_ =~ /^   \)/;
      storeinfo($character);
      $_ = <>;
   }
} until eof;
foreach $i (0 .. $#chars) {
   if ($chars[$i] and $enc[$i] ne ".notdef") {
      $allchars{$enc[$i]} = $chars[$i];
   }
}

##################
# Set up constants
##################

$subacc = "(cedilla|ogonek|commaaccent)";
$supacc = "(grave|acute|circumflex|tilde|macron|breve|dotaccent|overdot|dieresis|ring|hungarumlaut|caron|candrabindu)";
$underacc = "(underdot|under$supacc)";
$accents = "($subacc|$supacc|$underacc|underbar)";
$underadp = 0.230;				# depth of "under" accs
$underddp = 0.213;				# depth of underdot
if ($bold) { $thk = 0.072 } else { $thk = 0.052 }	# thickness and
$underbdp = 0.082 + $thk;				# depth of underbar
$capheight = $allchars{"X"}{ht};
$accheight = $allchars{"macron"}{ht};
$accdepth = $accheight - $thk * $scale;	# probable approx. "depth" of macron
$v1 = $accheight - $xheight;		# vertical offset for double accents
$v2 = $capheight - $xheight;		# vertical offset for accented caps etc
if ($scaps)  {						# accented small caps
    $scoffset = $allchars{"x"}{ht} - $xheight;
    $v1 += $scoffset;
}
if ($opt_c) {						# candrabindu
   ($cbx, $cby) = $opt_c =~ /^(.*),(.*)$/;
   $cbx += ($allchars{"breve"}{wd} - $allchars{"dotaccent"}{wd}) / 2;
   $cbx /= 1000;
   $cby /= 1000;
}

######################
# Build the characters
######################
#
# First normalise dvips.enc encoding quirks
#
if ($dvips) {
   chmove("fi", 014);
   chmove("fl", 015);
   chmove("quotedblright", 042);
   chmove("quotedblleft", 0134);
   chmove("dotaccent", 0137);
   chmove("endash", 0173);
   chmove("emdash", 0174);
   chmove("hungarumlaut", 0175);
}
#
# Now build a list of definitions supplied by user
#
for (@deflines) {
   if (/^\s*(\d+|0[0-7]+|0x[0-9a-fA-F]+)\s+([a-zA-Z]+?|\.notdef)(?:\s+([a-zA-Z]+))?$/) {
      ($num, $char, $acc)  = ($1, $2, $3);
      $num = oct $num if $num =~ /^0/;
      if ($num > 0xFFFF) { die "Bad definition (number out of range): $_\n" }
      $def = {};
      $def->{qdef}  = $_;
      $def->{num}   = $num;
      $def->{char}  = $char;
      $def->{acc}   = $acc;
      if ($char eq "space") { $def->{nchar} = $acc }
      else { $def->{nchar} = $char . $acc }
      push @nchars, $def->{nchar};
      push @defs,   $def;
   }
   else { die "Bad definition: $_\n" }
}
#
# Work through the list
#
while (@defs) {
   $def  = shift @defs;
   $qdef  = $def->{qdef};
   $num   = $def->{num};
   $char  = $def->{char};
   $acc   = $def->{acc};
   $nchar = $def->{nchar};
   #
   # If we can't handle $char/$acc yet, but believe we will be able
   # to later, send the definition to the back of the queue. In case
   # it later turns out we were wrong, allow only five loops before
   # giving up.
   #
   if (!$allchars{$char} and $char ne ".notdef") {
      if (grep /^$char$/, @nchars) {
	 unless (++$def->{requeue} > 5) {
	    push @defs, $def;
	    next;
	 }
      }
      else { die "Bad definition (no such character): $qdef\n" }
   }
   if ($acc and !$allchars{$acc} and $acc !~ /^$accents$/) {
      if (grep /^$acc$/, @nchars) {
	 unless (++$def->{requeue} > 5) {
	    push @defs, $def;
	    next;
	 }
      }
      else { die "Bad definition (no such accent): $qdef\n" }
   }
   #
   # Remove any existing claims on $num
   #
   @{ $allchars{$chars[$num]{id}}{num} } = grep !/$num/,
     @{ $allchars{$chars[$num]{id}}{num} };
   #
   # First deal with .notdef
   #
   if ($nchar eq ".notdef") {
      undef $chars[$num];
   }
   #
   # Next look among existing chars (unless blocked by -b)
   #
   elsif (!($acc and $opt_b) and $allchars{$nchar}) {
      push( @{ $allchars{$nchar}{num} }, $num);
      $chars[$num] = $allchars{$nchar};
   }
   #
   # If it can't be built from sub-elements, issue a warning and move on
   #
   elsif (!$acc) {
      warn "No such character - ignoring definition: $qdef\n";
      undef $chars[$num];
   }
   #
   # Now build the char
   #
   else {
      #
      # First get rid of predefined/duplicated ligtable statements
      # and character definitions; also synonyms
      #
      $ligs =~ s/\n   \((LABEL|KRN|LIG) ($nchar .*|.*$nchar)\)$//gm;
      $allchars{$nchar} = ();
      if ($acc eq "overdot") {
         $nchar2 = $char . "dotaccent";
	 $ligs =~ s/\n   \((LABEL|KRN|LIG) ($nchar2 .*|.*$nchar2)\)$//gm;
	 delete $allchars{$nchar2};
      }
      #
      # Store the info that will be needed to have the character
      # linked into the LIGTABLE
      #
      push @newligs, "$char:   (LIG $acc $nchar)";
      #
      # Go!
      #
      if ($acc =~ /^$subacc$/) {
	 subacc($num, $char, $acc, $nchar);
	 fixkerns($char, $acc);
      }
      elsif ($acc =~ /^$supacc$/) {
	 supacc($num, $char, $acc, $nchar);
	 fixkerns($char, $acc);
      }
      elsif ($acc =~ /^$underacc$/) {
	 underacc($num, $char, $acc, $nchar);
	 fixkerns($char, $acc);
      }
      elsif ($acc =~ /^underbar$/) {
	 underb($num, $char, $nchar);
	 fixkerns($char, $acc);
      }
      else {
	 digraph($num, $char, $acc, $nchar);
	 fixkerns($char, $acc);
      }
   }
}

###################
# Sort out ligtable
###################
#
# Pull in the newly created ligatures
#
foreach $newlig (@newligs) {
   $newlig =~ s/^(.+)://;
   $newelm1 = $1;
   unless ($ligs =~ s/(^   \(LABEL $newelm1\)\n)/$1$newlig\n/m) {
       $ligs =~ s[^(   \)\n)]
                 [   \(LABEL $newelm1\)\n$newlig\n   \(STOP\)\n$1]m
   }
}
#
# Convert to vpl representation, eliminating statements invoking
# "unencoded" characters
#
@liglist = split /\n/, $ligs;
$ligs = "";
foreach (@liglist) {
   if (/^(   \(LIG \S+ )(\S+)\)$/) {
      if ($n = ${ $allchars{$2}{num} }[0]) {
	 s/^(   \(LIG \S+ )(\S+)\)$/$1$nv[$n])/;
      }
      else { next }
   }
   if (/^(   \((?:LABEL|LIG|KRN) )([^ )]+)(.*)$/) {
      ($one, $two, $three) = ($1, $2, $3);
      foreach $n (@{ $allchars{$two}{num} }) {
	 $ligs .= "$one$nv[$n]$three\n";
      }
   }
   else { $ligs .= "$_\n" }
}
#
# Eliminate sequences orphaned by elimination of a LABEL
#
@liglist = split /   \(STOP\)\n/, $ligs;
$ligs = "";
foreach (@liglist) {
   if (/^   \(LABEL /m) { $ligs .= "$_   (STOP)\n" }
   elsif (/^   \)$/m) { $ligs .= $_ }
}
#
# Eliminate empty statements
#
$ligs =~ s/(^   \(LABEL .*\)\n)+   \(STOP\)\n//gm;
@ligs = split /^/, $ligs;
foreach (@ligs) {
   s/ [RD] (-?)([0-9.]+)\)/" R " . $1 . $2\/$scale . ")"/ge
}

####################
# Output the results
####################
#
print @vplhead, @ligs;
foreach $i (0 .. 0xFFFF) { if (defined $chars[$i]{id}) { printchar($i) } }

#####################
# End of main program
#####################

sub storeinfo {
   #
   # Extract info from a character definition and store it in @chars
   #
   my $char = shift;
   my $num;
   if ($char =~ /\A\(CHARACTER O ([0-7]+)/m) { $num = oct $1 }
   elsif ($char =~ /\A\(CHARACTER C (.)/m) { $num = ord $1 }
   $chars[$num]{id} = $enc[$num];
   push( @{ $chars[$num]{num} }, $num);
   if ($char =~ /^   \(CHARWD R (.*?)\)$/m) { $chars[$num]{wd} = $1 }
   if ($char =~ /^   \(CHARHT R (.*?)\)$/m) { $chars[$num]{ht} = $1 }
   if ($char =~ /^   \(CHARDP R (.*?)\)$/m) { $chars[$num]{dp} = $1 }
   if ($char =~ /^   \(CHARIC R (.*?)\)$/m) { $chars[$num]{ic} = $1 }
   if ($char =~ /^   \(MAP\n((.|\n)*)^      \)/m) { $chars[$num]{map} = $1 }
   else { $chars[$num]{map} = "      (SETCHAR $nv[$num])\n" }
   $chars[$num]{map} =~ s[(\(SETCHAR )O ([0-7]+)\)]
         [$1 . sprintf("H %04lX", oct $2) . ")"]gme
}

sub printchar {
   #
   # Extract info from @chars and build it into a character definition
   #
   my $num = shift;
   my @ch;
   push @ch, "(CHARACTER ";
   if (chr($num) =~ /[0-9A-Za-z]/) { push @ch, "C " . chr($num) }
   else {
      push @ch, sprintf("H %04lX", $num);
      push @ch, " (COMMENT " . $chars[$num]{id} . ")";
   }
   push @ch, "\n";
   push @ch, "   (CHARWD R " . $chars[$num]{wd} . ")\n"
      if defined $chars[$num]{wd};
   push @ch, "   (CHARHT R " . $chars[$num]{ht} . ")\n"
      if defined $chars[$num]{ht};
   push @ch, "   (CHARDP R " . $chars[$num]{dp} . ")\n" if $chars[$num]{dp};
   push @ch, "   (CHARIC R " . $chars[$num]{ic} . ")\n" if $chars[$num]{ic};
   push @ch, "   (MAP\n";
   push @ch, $chars[$num]{map};
   push @ch, "      )\n";
   push @ch, "   )\n";
   foreach (@ch) {
      s/(?<!SELECTFONT) [RD] (-?)([0-9.]+)\)/" R " . $1 . $2\/$scale . ")"/ge
   }
   print @ch
}

sub chmove {
   #
   # Move a character
   #
   my ($char, $num) = @_;
   my $i;
   foreach $i (@{ $allchars{$char}{num} }) { undef $chars[$i] }
   @{ $allchars{$char}{num} } = ();
   @{ $allchars{$chars[$num]{id}}{num} } = grep !/$num/,
     @{ $allchars{$chars[$num]{id}}{num} };
   push( @{ $allchars{$char}{num} }, $num);
   $chars[$num] = $allchars{$char};
}

sub max {
   #
   # Return greater of two values
   #
   my ($a, $b) = @_;
   return $a > $b ? $a : $b;
}

sub subacc {
   #
   # Subscript accents
   #
   my ($num, $char, $acc, $id) = @_;
   my ($h, $s1, $s2, $s3);
   $allchars{$id}{wd} = $allchars{$char}{wd};
   $allchars{$id}{ht} = $allchars{$char}{ht};
   $allchars{$id}{dp} = $allchars{$acc}{dp};
   $allchars{$id}{ic} = $allchars{$char}{ic};
   $allchars{$id}{id} = $id;
   push( @{ $allchars{$id}{num} }, $num);
   $s1 = $allchars{$char}{map};
   $s1 =~ s/\A      (.*)\n\Z/(PUSH) $1 (POP)/s;
   $h = sprintf("%.3f", ($allchars{$char}{wd} - $allchars{$acc}{wd}) / 2);
   if ($h > 0) { $s2 = "      (MOVERIGHT R $h) " }
   elsif ($h < 0) {
      $h = -$h;
      $s2 = "      (MOVELEFT R $h) ";
   }
   else { $s2 = "      " }
   $s3 = $allchars{$acc}{map};
   $s3 =~ s/\A.*(\(SETCHAR . .+?\)).*\Z/$1/s;
   $allchars{$id}{map} = "      $s1\n$s2$s3\n";
   $chars[$num] = $allchars{$id};
}

sub supacc {
   #
   # Superscript accents
   #
   my ($num, $char, $acc, $id) = @_;
   my ($cb, $h, $hadj, $tallchar, $ic, $s1, $s2, $s3);
   if ($char eq "i" and $allchars{"dotlessi"}) { $char = "dotlessi" }
   if ($char eq "j" and $allchars{"dotlessj"}) { $char = "dotlessj" }
   if ($acc eq "overdot") { $acc = "dotaccent" }
   if ($acc eq "candrabindu") {
      $acc = "breve";
      ($cb = $allchars{"dotaccent"}{map}) =~ s/\A.*(\(SETCHAR . .+?\)).*\Z/$1/s;
   }
   $allchars{$id}{wd} = $allchars{$char}{wd};
   $allchars{$id}{ht} = $allchars{$acc}{ht};
   $allchars{$id}{dp} = $allchars{$char}{dp};
   $allchars{$id}{id} = $id;
   push( @{ $allchars{$id}{num} }, $num);
   $s1 = $allchars{$char}{map};
   $s1 =~ s/\A      (.*)\n\Z/(PUSH) $1 (POP)/s;
   if ($scaps and $char =~ /^[a-z]/) {			# accented small caps
      $tallchar = 1;
      if ($char =~ /$supacc$/
	   and $char !~ /under$supacc$/) {		# double accs
	 $s2 = "      (MOVEUP R $v1)";
	 $allchars{$id}{ht} += $v1;
	 $hadj = sprintf("%.3f", $v1 * $slant);
	 $ic = $allchars{$char}{ic};
      }
      else {						# single accs
	 $s2 = "      (MOVEUP R $scoffset)";
	 $allchars{$id}{ht} += $scoffset;
	 $hadj = sprintf("%.3f", $scoffset * $slant);
	 $ic = $allchars{$acc}{ic} + $hadj;
      }
   }
   elsif ($allchars{$char}{ht} >= ($accheight + $v2)) {	# double accs
      $tallchar = 1;					# on caps etc.
      $s2 = "      (MOVEUP R ${ \($v1 + $v2) })";
      $allchars{$id}{ht} += ($v1 + $v2);
      $hadj = sprintf("%.3f", ($v1 + $v2) * $slant);
      $ic = $allchars{$char}{ic};
   }
   elsif ($allchars{$char}{ht} > 1.15 * $xheight) {
      $tallchar = 1;
      if ($char =~ /$supacc$/
	   and $char !~ /under$supacc$/) {		# double accs
	 $s2 = "      (MOVEUP R $v1)";
	 $allchars{$id}{ht} += $v1;
	 $hadj = sprintf("%.3f", $v1 * $slant);
	 $ic = $allchars{$char}{ic};
      }
      else {						# caps etc.
	 $s2 = "      (MOVEUP R $v2)";
	 $allchars{$id}{ht} += $v2;
	 $hadj = sprintf("%.3f", $v2 * $slant);
	 $ic = $allchars{$char}{ic};
      }
   }
   else {						# single accs
      $s2 = "     ";
      $ic = $allchars{$acc}{ic};
   }
   $h = sprintf("%.3f", ($allchars{$char}{wd} - $allchars{$acc}{wd}) / 2);
   unless ($tallchar) { $ic -= $h }
   $allchars{$id}{ic} = $ic unless $ic < 0;
   $h += $hadj;
   if ($h > 0) { $s2 .= " (MOVERIGHT R $h) " }
   elsif ($h < 0)  {
      $h = -$h;
      $s2 .= " (MOVELEFT R $h) ";
   }
   else { $s2 .= " " }
   $s3 = $allchars{$acc}{map};
   $s3 =~ s/\A.*(\(SETCHAR . .+?\)).*\Z/$1/s;
   if ($cb) {						# candrabindu
      $cb = $s2 . $cb;
      if ($cbx) {
	 unless (($cb =~ s/(MOVERIGHT R )([0-9.]+)/$1 . ($2 + $cbx * $scale)/e)
	   or ($cb =~ s/(MOVELEFT R )([0-9.]+)/$1 . ($2 - $cbx * $scale)/e)) {
	      $cb =~ s/^( +)/"$1(MOVERIGHT R " . ($cbx * $scale) . ") "/e;
	   }
      }
      if ($cby) {
	 unless ($cb =~ s/(MOVEUP R )([0-9.]+)/$1 . ($2 + $cby * $scale)/e) {
	    $cb =~ s/^( +)/"$1(MOVEUP R " . ($cby * $scale) . ") "/e;
	 }
      }
      $cb .= "\n";
      $s1 = "(PUSH) " . $s1;
      $s3 .= " (POP)";
   }
   $allchars{$id}{map} = "      $s1\n$s2$s3\n$cb";
   $chars[$num] = $allchars{$id};
}

sub underacc {
   #
   # Dropped accents
   #
   my ($num, $char, $acc, $id) = @_;
   my ($h, $v, $s1, $s2, $s3);
   $acc =~ s/^under//;
   if ($acc eq "dot") { $acc = "period" }
   if ($acc eq "candrabindu") { die "Bad definition (no such accent): $qdef\n" }
   $allchars{$id}{wd} = $allchars{$char}{wd};
   $allchars{$id}{ht} = $allchars{$char}{ht};
   $allchars{$id}{ic} = $allchars{$char}{ic};
   $allchars{$id}{id} = $id;
   push( @{ $allchars{$id}{num} }, $num);
   if ($acc =~ /^$supacc$/) {
      $v = $allchars{$id}{dp} = $underadp * $scale * $shrink;
      $v += ($accdepth * $shrink);
   }
   else {
      $v = $allchars{$id}{dp} = $underddp * $scale + $allchars{$acc}{dp};
   }
   $s1 = $allchars{$char}{map};
   $s1 =~ s/\A      (.*)\n\Z/(PUSH) $1 (POP)/s;
   if ($acc =~ /^$supacc$/) {
      $h = ($allchars{$char}{wd} - ($allchars{$acc}{wd} * $shrink)) / 2 - $v * $slant;
      if ($opt_s) { $s2 = "      (SELECTFONT D 1)\n" }
   }
   else {
      $h = ($allchars{$char}{wd} - $allchars{$acc}{wd}) / 2 - $v * $slant;
   }
   $h = sprintf("%.3f", $h);
   if ($h > 0) {
      $s2 .= "      (MOVEDOWN R $v) (MOVERIGHT R $h) ";
   }
   elsif ($h < 0) {
      $h = -$h;
      $s2 .= "      (MOVEDOWN R $v) (MOVELEFT R $h) ";
   }
   else  { $s2 = "      (MOVEDOWN R $v) " }
   $s3 = $allchars{$acc}{map};
   $s3 =~ s/\A.*(\(SETCHAR . .+?\)).*\Z/$1/s;
   if ($opt_s and $acc =~ /^$supacc$/) { $s3 .= "\n      (SELECTFONT D 0)" }
   $allchars{$id}{map} = "      $s1\n$s2$s3\n";
   $chars[$num] = $allchars{$id};
}

sub underb {
   #
   # Underbar
   #
   my ($num, $char, $id) = @_;
   my ($h, $w, $dp, $s1, $s2, $s3);
   $allchars{$id}{wd} = $allchars{$char}{wd};
   $allchars{$id}{ht} = $allchars{$char}{ht};
   $allchars{$id}{dp} = $dp = $underbdp * $scale;
   $allchars{$id}{ic} = $allchars{$char}{ic};
   $allchars{$id}{id} = $id;
   push( @{ $allchars{$id}{num} }, $num);
   $s1 = $allchars{$char}{map};
   $s1 =~ s/\A      (.*)\n\Z/(PUSH) $1 (POP)/s;
   $h = sprintf("%.3f", ($allchars{$id}{wd} / 10 - $dp * $slant));
   $w = sprintf("%.3f", ($allchars{$id}{wd} * 8 / 10));
   if ($h > 0) {
      $s2 = "      (MOVEDOWN R $dp) (MOVERIGHT R $h) ";
   }
   elsif ($h < 0) {
      $h = -$h;
      $s2 = "      (MOVEDOWN R $dp) (MOVELEFT R $h) ";
   }
   else { $s2 .= "      (MOVEDOWN R $dp) " }
   $s3 = "(SETRULE R ${ \($thk) } R $w)";
   $allchars{$id}{map} = "      $s1\n$s2$s3\n";
   $chars[$num] = $allchars{$id};
   #
   # If the character to be underscored is a digraph, fix up the LIGTABLE
   #
   if (grep /^$char$/, @digraphs) {
       pop @newligs;
       dummychar(0x035F, "uni035F") unless defined $allchars{uni035F};
       $char =~ /^(.)(.)$/;
       ($one, $two) = ($1, $2);
       $one_ = $one . "_uni035F";
       dummychar($dummycodepoint--, $one_) unless defined $allchars{$one_};
       push @newligs, "$one:   (LIG uni035F $one_)"
	  unless (grep /^$one:   \(LIG uni035F $one_\)$/, @newligs);
       push @newligs, "$one_:   (LIG $two $id)";
   }
}

sub digraph {
   #
   # Make a new character consisting of two existing characters
   #
   my ($num, $char, $acc, $id) = @_;
   my ($one, $two, $kern, $s1, $s2, $charZWJ);
   #
   # Remove the ligature info -- we don't want every occurrence of
   # e.g."kh" to be converted into a digraph.
   #
   pop @newligs;
   #
   # List all digraphs for future reference
   #
   push @digraphs, $id;
   if ($ligs =~ /^   \(LABEL $char\)\n(.*?)\n   \(KRN $acc R (-?[0-9.]+)\)\n/ms) {
      ($one, $two) = ($1, $2);
   }
   $kern = $two if $one !~ /^   \(STOP\)$/m;
   $allchars{$id}{wd} = $allchars{$char}{wd} + $allchars{$acc}{wd};
   $allchars{$id}{wd} += $kern;
   $allchars{$id}{ht} = max($allchars{$char}{ht}, $allchars{$acc}{ht});
   $allchars{$id}{dp} = max($allchars{$char}{dp}, $allchars{$acc}{dp});
   $allchars{$id}{ic} = $allchars{$acc}{ic};
   $allchars{$id}{id} = $id;
   push( @{ $allchars{$id}{num} }, $num);
   $s1 = $allchars{$char}{map};
   chomp ($s2 = $allchars{$acc}{map});
   $s1 =~ s/(\(SETCHAR .*?\))/$1\n$s2/;
   if ($kern) {
      if ($kern < 0) {
	 $kern = -$kern;
	 $s1 =~ s/(\(SETCHAR .*?\))/$1 (MOVELEFT R $kern)/;
      }
      else { $s1 =~ s/(\(SETCHAR .*?\))/$1 (MOVERIGHT R $kern)/ }
   }
   $allchars{$id}{map} = $s1;
   $chars[$num] = $allchars{$id};
}

sub dummychar {
   #
   # Make a new dummy character
   #
   my ($num, $id) = @_;
   $allchars{$id}{wd} = 0;
   $allchars{$id}{ht} = 0;
   $allchars{$id}{dp} = 0;
   $allchars{$id}{ic} = 0;
   $allchars{$id}{id} = $id;
   push( @{ $allchars{$id}{num} }, $num);
   $allchars{$id}{map} = "      (SETCHAR H 0020)\n";
   $chars[$num] = $allchars{$id};
}

sub fixkerns {
   #
   # Generalise the kerning info contained in the vpl file by applying
   # it to new accented chars. Do not kern lower-case chars bearing
   # superscript accents with capitals, quotes or a preceding "f".
   #
   my ($char, $acc) = @_;
   my ($olabel, $nlabel, @liglist, $lchar, $rchar);
   return if $char eq "space";
   if ($acc =~ /^$accents$/) { $lchar = $rchar = $char }
   else {
      $lchar = $char;
      $rchar = $acc;
   }
   unless ($char =~ /^[a-z]/ and $acc =~ /^$supacc$/) {
      $ligs =~ s[(\n   \(LABEL )$rchar\)(?!\n   \(LIG.*$)]
		[$&$1$char$acc)]gm
                unless $ligs =~ /\n   \(LABEL $char$acc\)$/m;
      $ligs =~ s[(\n   \(LABEL )$rchar\)(\n   \(LIG.*$)+(?!\n   \(STOP\))]
		[$&$1$char$acc)]gm
                unless $ligs =~ /\n   \(LABEL $char$acc\)$/m;
      $ligs =~ s[(\n   \(KRN )$lchar( .*)$]
		[$&$1$char$acc$2]gm;
   }
   else {
      if ($ligs =~ /\n   \(LABEL $char\).*?\(STOP\)/s) {
	 $nlabel = $olabel = $&;
	 $nlabel =~ s/(\n   \(LABEL $char)\)/$1$acc)/
	   unless $ligs =~ /\n   \(LABEL $char$acc\)/m;
         $nlabel =~ s/\n   \(LIG .*\)$//gm;
         $nlabel =~ s/\n   \(LABEL (?!$char$acc).*\)$//gm;
	 $nlabel =~ s/\n   \(KRN ([A-Z]|quote).*\)$//gm;
	 $ligs =~ s/(\n   \(LABEL )$char\).*?\(STOP\)/$olabel$nlabel/s;
      }
      @liglist = split /\n   \(STOP\)/, $ligs;
      foreach (@liglist) {
	 unless (/\n   \(LABEL ([A-Zf]|quote).*\)$/m) {
	    s/(\n   \(KRN $char)( .*\))$/$&$1$acc$2/gm;
	 }
      }
      $ligs = join("\n   (STOP)", @liglist);
   }
}