#! /usr/bin/env python3
# This file is part of the Gentium package for TeX.
# It is licensed under the Expat License, see doc//README for details.

# Used uni0162 for Tcedilla and uni021A for Tcommaaccent


# ERROR (non-critical)
# --------------------
# Found where the duplicate kerning pairs are written.
# Example of such duplicate kerning pair is 'A cacute.sc'
# It is generated is initialcap() for the first time and in
# accentedinitialcapsmallcap() for the second time.
#
# FIXED
#
# The previous error was fixed, althought in a dirty way. Instead of
# writing directly to a file, the lines with extra kerning are added
# to 'Counter' object from 'collections'. This automatically eliminates
# duplicates, but keep track of them (they have count of 2).
# 
# It was tested that just the duplicates are removed and nothing is lost.
# When the duplicates were removed from the file generated by the old
# script, the result is exactly the same as the file generated by this new
# version of the script.
#
# This file will be cleaned up later (or completely rewritten). I know
# this script is very dirty and it might be easier to rewrite it completely.


# TODO: use variable for the small caps extension '.sc'

# TODO: refactor the script; maybe remove classes


# TODO: Decide if should be used purely class-based kerning.

# TODO: It might be good idea to split this script into two scripts (Latin and
#       Cyrillic). It would be better to do different kerning for roman and
#       italic styles (at least for the small Cyrillic letters; especially
#       de, el, ge). It would be even better to do manual kerning, but it would take
#       much time. But I could do support for files with manual kerning. These
#       files would be used if they were present. This would be similar to the
#       files that are used for manual kerning for letters dcaron and lcaron.
#       There shoud be probably three such files for each font (Latin, Greek,
#       Cyrillic).

# The script currently combines class-based kerning and deriving kerning from list
# of kerns. Changing it to completely class-based kerning would create more kerning
# pairs than now. The current (mixed) sollution makes it possible to make exceptions
# to class-based kerning.

# TODO: It might be good idea to find out which kerning pairs can really occur for
#       e.g. encoding T1. Pure class-based kernig would give too many kerning pairs.
#       This is probably no problem for TeX, but it means too many kerning pairs
#       that would never be needed (and it would be more work to look at the changes
#       from the original fonts). The solution would be to look for languages that
#       are supported by T1 and take only those pairs where both the letter are in
#       the same language. Even this would be a superset of what is really possible.
#       For example Czech has letters dcaron and ecaron, but the pair
#       dcaron + ecaron can never occur.
#
#       These pairs might be useful also for other fonts for TeX.


import os, sys, re

import collections

SMALL_CAPS_CORRECTION = 0.94  # width of small caps / width of capital letters; tested for
                   # letters A, L, T, V, W and Y, all had nearly the same ratio

input_file_name = sys.argv[1]
output_file_name = sys.argv[2]

class KernReader:
    def __init__(self, filename):
        with open(filename, "r") as afm_file:
            self.kern_lines = [line for line in afm_file if line.startswith('KPX ')]

    def getkerns(self, regex):
        """ return lines with kerning, format of *.afm

        TODO: Check that there is only one matching line.
        """
        return [line for line in self.kern_lines if re.match(regex, line)]


class KernWriter:
    # TODO: improve methods for quotes; probably merge them; also accented small caps; look
    #       also at other types of quotes, such as Polish and French; start with improving
    #       aacute.sc + quotedblleft and adieresis.sc + quotedblleft
    # TODO: several methods could be merged (simpler code, but worse result to report to Victor)
    kern_pairs_capital = [
        # BE CAREFULL: It is used also for small caps and may cause problems
        # when moving to class kerning
        ["V A", "V Aacute", "V Abreve", "V Acircumflex", "V Adieresis", "V AE", "V Agrave", "V Amacron", "V Aogonek", "V Aring", "V Atilde"],
        ["A Y", "A Yacute", "A Ydieresis", "Aacute Y", "Abreve Y", "Acircumflex Y", "Adieresis Y", "Agrave Y", "Amacron Y", "Aogonek Y", "Aring Y", "Atilde Y", "Aacute Yacute"],
        ["L V", "Lcommaaccent V", "Lslash V"],
        ["A V", "Aacute V", "Abreve V", "Acircumflex V", "Adieresis V", "Agrave V", "Amacron V", "Aogonek V", "Aring V", "Atilde V"],
        ["W A", "W Aacute", "W Abreve", "W Acircumflex", "W Adieresis", "W AE", "W Agrave", "W Amacron", "W Aogonek", "W Aring", "W Atilde"],
        ["Y A", "Y Aacute", "Y Abreve", "Y Acircumflex", "Y Adieresis", "Y AE", "Y Agrave", "Y Amacron", "Y Aogonek", "Y Aring", "Y Atilde", "Yacute A", "Ydieresis A", "Yacute Aacute"],
        ["F A", "F Aacute", "F Abreve", "F Acircumflex", "F Adieresis", "F AE", "F Agrave", "F Amacron", "F Aogonek", "F Aring", "F Atilde"],
        ["L Y", "L Yacute", "L Ydieresis", "Lcommaaccent Y", "Lslash Y"],
        ["L T", "L Tcaron", "L uni0162", "L uni021A", "Lcommaaccent T", "Lslash T"],
        ["A W", "Aacute W", "Abreve W", "Acircumflex W", "Adieresis W", "Agrave W", "Amacron W", "Aogonek W", "Aring W", "Atilde W"],
        ["P A", "P Aacute", "P Abreve", "P Acircumflex", "P Adieresis", "P AE", "P Agrave", "P Amacron", "P Aogonek", "P Aring", "P Atilde"],
        ["T A", "T Aacute", "T Abreve", "T Acircumflex", "T Adieresis", "T AE", "T Agrave", "T Amacron", "T Aogonek", "T Aring", "T Atilde", "Tcaron A", "uni0162 A", "uni021A A", "Tcaron Aacute"],
        ["A T", "A Tcaron", "A uni0162", "A uni021A", "Aacute T", "Abreve T", "Acircumflex T", "Adieresis T", "Agrave T", "Amacron T", "Aogonek T", "Aring T", "Atilde T", "Aacute Tcaron"],
        ["U A", "U Aacute", "U Abreve", "U Acircumflex", "U Adieresis", "U AE", "U Agrave", "U Amacron", "U Aogonek", "Uacute A", "Ucircumflex A", "Udieresis A", "Ugrave A", "Uhungarumlaut A", "Umacron A", "Uogonek A", "Uring A", "Uacute Aacute", "Uring Aacute"],
        ["A U", "A Uacute", "A Ucircumflex", "A Udieresis", "A Ugrave", "A Uhungarumlaut", "A Umacron", "A Uogonek", "A Uring", "Aacute U", "Abreve U", "Acircumflex U", "Adieresis U", "Agrave U", "Amacron U", "Aogonek U", "Aacute Uacute", "Aacute Uring"],
        ["R V", "Racute V", "Rcaron V", "Rcommaaccent V"],
        ["V G", "V Gbreve", "V Gcircumflex", "V Gcommaaccent"],
        ["V O", "V Oacute", "V Ocircumflex", "V Odieresis", "V OE", "V Ograve", "V Ohungarumlaut", "V Omacron", "V Oslash", "V Otilde"],
        ["Y O", "Y Oacute", "Y Ocircumflex", "Y Odieresis", "Y OE", "Y Ograve", "Y Ohungarumlaut", "Y Omacron", "Y Oslash", "Y Otilde", "Yacute O", "Ydieresis O", "Yacute Oacute"],
        ["C A", "C Aacute", "C Abreve", "C Acircumflex", "C Adieresis", "C AE", "C Agrave", "C Amacron", "C Aogonek", "C Aring", "C Atilde", "Cacute A", "Ccaron A", "Ccedilla A", "Ccircumflex A", "Ccaron Aacute"],
         # Dcaron can not be followed by Y with or without accent in a standard word, but I can
         # imagine some abbreviation or compound word with this combination
        ["D A", "D Aacute", "D Abreve", "D Acircumflex", "D Adieresis", "D AE", "D Agrave", "D Amacron", "D Aogonek", "D Aring", "D Atilde", "Dcaron A", "Dcroat A", "Dcaron Aacute"],
        ["D Y", "D Yacute", "D Ydieresis", "Dcaron Y", "Dcroat Y", "Dcaron Yacute"],
        ["K O", "K Oacute", "K Ocircumflex", "K Odieresis", "K OE", "K Ograve", "K Ohungarumlaut", "K Omacron", "K Oslash", "K Otilde", "Kcommaaccent O"],
        ["L W", "Lcommaaccent W", "Lslash W"],
        # there would be many kerns for O A when using kern classes; the same for A O
        ["O A", "O Aacute", "O Abreve", "O Acircumflex", "O Adieresis", "O AE", "O Agrave", "O Amacron", "O Aogonek", "O Aring", "O Atilde", "Oacute A", "Ocircumflex A", "Odieresis A", "Ograve A", "Ohungarumlaut A", "Omacron A", "Oslash A", "Otilde A", "Oacute Aacute"],
        ["O V", "Oacute V", "Ocircumflex V", "Odieresis V", "Ohungarumlaut V", "Omacron V", "Oslash V", "Otilde V"],
        ["O Y", "O Yacute", "O Ydieresis", "Oacute Y", "Ocircumflex Y", "Odieresis Y", "Ograve Y", "Ohungarumlaut Y", "Omacron Y", "Oslash Y", "Otilde Y", "Oacute Yacute"],
        ["R W", "Racute W", "Rcaron W", "Rcommaaccent W"],
        ["O X", "Oacute X", "Ocircumflex X", "Odieresis X", "Ograve X", "Ohungarumlaut X", "Omacron X", "Oslash X", "Otilde X"],
        ["R T", "R Tcaron", "R uni0162", "R uni021A", "Racute T", "Rcaron T", "Rcommaaccent T", "Rcaron Tcaron"],
        ["W O", "W Oacute", "W Ocircumflex", "W Odieresis", "W OE", "W Ograve", "W Ohungarumlaut", "W Omacron", "W Oslash", "W Otilde"],
        ["Y S", "Y Scaron", "Yacute S", "Ydieresis S", "Yacute Scaron"],
        ["A C", "A Cacute", "A Ccaron", "A Ccedilla", "A Ccircumflex", "Aacute C", "Abreve C", "Acircumflex C", "Adieresis C", "Agrave C", "Amacron C", "Aogonek C", "Aring C", "Atilde C", "Aacute Ccaron"],
        ["A G", "A Gbreve", "A Gcircumflex", "A Gcommaaccent", "Aacute G", "Abreve G", "Acircumflex G", "Adieresis G", "Agrave G", "Amacron G", "Aogonek G", "Aring G", "Atilde G"],
        ["A O", "A Oacute", "A Ocircumflex", "A Odieresis", "A OE", "A Ograve", "A Ohungarumlaut", "A Omacron", "A Oslash", "A Otilde", "Aacute O", "Abreve O", "Acircumflex O", "Adieresis O", "Agrave O", "Amacron O", "Aogonek O", "Aring O", "Atilde O", "Aacute Oacute"],
        ["A Q", "Aacute Q", "Abreve Q", "Acircumflex Q", "Adieresis Q", "Agrave Q", "Amacron Q", "Aogonek Q", "Aring Q", "Atilde Q"],
        ["D W", "Dcaron W", "Dcroat W"],
        # problematic kern follows (the kern value of DV was positive in GentiumPlus Regular)
        ["D V", "Dcaron V", "Dcroat V"]
    ]

    kern_pairs_initial_cap = [
        ["Y a", "Y aacute", "Y abreve", "Y acircumflex", "Y adieresis", "Y ae", "Y agrave", "Y amacron", "Y aogonek", "Y aring", "Y atilde", "Yacute a", "Ydieresis a", "Yacute aacute"],
        ["Y e", "Y eacute", "Y ecaron", "Y ecircumflex", "Y edieresis", "Y edotaccent", "Y egrave", "Y emacron", "Y eogonek", "Yacute e", "Ydieresis e", "Yacute eacute", "Yacute ecaron"],
        ["Y o", "Y oacute", "Y ocircumflex", "Y odieresis", "Y oe", "Y ograve", "Y ohungarumlaut", "Y omacron", "Y oslash", "Y otilde", "Yacute o", "Ydieresis o", "Yacute oacute"],
        # not used the combination "Y u" that is only in GentiumPlus regular
        ["A y", "A yacute", "A ydieresis", "Aacute y", "Abreve y", "Acircumflex y", "Adieresis y", "Agrave y", "Amacron y", "Aogonek y", "Aring y", "Atilde y", "Aacute yacute"],
        ["T a", "T aacute", "T abreve", "T acircumflex", "T adieresis", "T ae", "T agrave", "T amacron", "T aogonek", "T aring", "T atilde", "Tcaron a", "uni0162 a", "uni021A a", "Tcaron aacute"],
        ["T e", "T eacute", "T ecaron", "T ecircumflex", "T edieresis", "T edotaccent", "T egrave", "T emacron", "T eogonek", "Tcaron e", "uni0162 e", "uni021A e"],
        ["T o", "T oacute", "T ocircumflex", "T odieresis", "T oe", "T ograve", "T ohungarumlaut", "T omacron", "T oslash", "T otilde", "Tcaron o", "uni0162 o", "uni021A o", "Tcaron oacute"],
    ]

    kern_pairs_small = [
        ["k e", "k eacute", "k ecaron", "k ecircumflex", "k edieresis", "k edotaccent", "k egrave", "k emacron", "k eogonek", "kcommaaccent e"],
        ["r a", "r aacute", "r abreve", "r acircumflex", "r adieresis", "r ae", "r agrave", "r amacron", "r aogonek", "r aring", "r atilde", "racute a", "rcaron a", "rcommaaccent a", "rcaron aacute"],
        ["r e", "r eacute", "r ecaron", "r edieresis", "r edotaccent", "r egrave", "r emacron", "r eogonek", "racute e", "rcaron e", "rcommaaccent e", "rcaron eacute"],
        ["r c", "r cacute", "r ccaron", "r ccedilla", "r cdotaccent", "racute c", "rcaron c", "rcommaaccent c", "rcaron ccaron"],
        ["y e", "y eacute", "y ecaron", "y ecircumflex", "y edieresis", "y edotaccent", "y egrave", "y emacron", "y eogonek", "yacute e", "ydieresis e", "yacute eacute"],
        ["y o", "y oacute", "y ocircumflex", "y odieresis", "y oe", "y ograve", "y ohungarumlaut", "y omacron", "y oslash", "y otilde", "yacute o", "ydieresis o", "yacute oacute"],
        ["r o", "r oacute", "r ocircumflex", "r odieresis", "r oe", "r ograve", "r ohungarumlaut", "r omacron", "r oslash", "r otilde", "racute o", "rcaron o", "rcommaaccent o", "rcaron oacute"],
        ["y a", "y aacute", "y abreve", "y acircumflex", "y adieresis", "y ae", "y agrave", "y amacron", "y aogonek", "y aring", "y atilde", "yacute a", "ydieresis a", "yacute aacute"],
        ["r d", "r dcaron", "r dcroat", "racute d", "rcaron d", "rcommaaccent d", "rcaron dcaron"]
    ]

    kern_pairs_quotes = [
        # TODO pairs that have collisions can be removed in "make_derived_latin_kerns" create list for it!
        # this is far from complete
        # TODO: use this list also for accented small caps (using SMALL_CAPS_CORRECTION)
        ["A quotedblright", "A quotedblleft", "Aacute quotedblleft", "Adieresis quotedblleft"],
        ["L quotedblright", "L quotedblleft"], # Lcaron and Lacute left out (collision)
        ["A quoteright", "A quoteleft", "Aacute quoteleft", "Adieresis quoteleft"],
        ["L quoteright", "L quoteleft"], # Lcaron and Lacute left out (collision)
        # the following groups look like a nonsense, but all this actually tries to follow the
        # idea of correcting just the most critical letters and giving them the same kern value
        ["A quotedblright", "quotedblbase V", "quotedblbase W", "quotedblbase Y", "quotedblbase Yacute", "quotedblbase T", "quotedblbase Tcaron"],
        ["A quoteright", "quotesinglbase V", "quotesinglbase W", "quotesinglbase Y", "quotesinglbase Yacute", "quotesinglbase T", "quotesinglbase Tcaron"]
    ]

    # TODO: It is possible to create several kerning classes for one letter. For example 'A' can be used
    #       for letter A with all possible accents and 'A_unaccented' can be used for just the letter 'A',
    #       'A_subset' can be used for just some accented variants of 'A'. The base kern is now removed,
    #       therefore there shouldn't be problems with non-existing letters like 'A_unaccented' (but check
    #       that it is true).
    #
    #       This would enable to move more kerns to kerning classes.
    #
    #       Deriving all kerns from e.g. 'a e' would give too many kerns. It might help to check
    #       which combinations can really exist in some language, but it would require some research
    #       and a lot of work.
    latin_class_kerns = [
        # sort lines before commit!
        "A v",
        "A w",
        "F a",
        "F e",
        "F o",
        "V a",
        "V e",
        "V o",
        "W a",
        "W e",
        "W o",
        "a v",
        "a w",
        "c h",
        "c k",
        "e x",
        "o v",
        "o w",
        "o x",
        "r comma",
        "r period",
        "r q",
        "v a",
        "v e",
        "v o",
        "x e",
        "y comma",
        "y period",
        # nothing to be derived from "f quoteright" and "f quotedblright"
    ]

    # similar Latin letters that come first in the kerning pair
    latin_left_classes = {
        # sort lines before commit!
        "A": ["A", "Aacute", "Abreve", "Acircumflex", "Adieresis", "Agrave", "Amacron", "Aogonek", "Aring", "Atilde"],
        "F": ["F"],
        "V": ["V"],
        "W": ["W"],
        "a": ["a", "aacute", "abreve", "acircumflex", "adieresis", "agrave", "amacron", "aogonek", "aring", "atilde"],
        "c": ["c", "cacute", "ccaron", "ccedilla", "cdotaccent"],
        "e": ["e", "eacute", "ecaron", "ecircumflex", "edieresis", "edotaccent", "egrave", "emacron", "eogonek"],
        "o": ["o", "oacute", "ocircumflex", "odieresis", "ograve", "oslash", "otilde"],
        "r": ["r", "racute", "rcaron", "rcommaaccent"],
        "v": ["v"],
        "x": ["x"],
        "y": ["y", "yacute", "ydieresis"],
    }

    # similar Latin letters that come second in the kerning pair
    latin_right_classes = {
        # sort lines before commit!
        "a": ["a", "aacute", "abreve", "acircumflex", "adieresis", "ae", "agrave", "amacron", "aogonek", "aring", "atilde"],
        "comma": ["comma"],
        "e": ["e", "eacute", "ecaron", "ecircumflex", "edieresis", "edotaccent", "egrave", "emacron", "eogonek"],
        "h": ["h"],
        "k": ["k"],
        "o": ["o", "oacute", "ocircumflex", "odieresis", "oe", "ograve", "ohungarumlaut", "omacron", "oslash", "otilde"],
        "period": ["period"],
        "q": ["q"],
        "v": ["v"],
        "w": ["w"],
        "x": ["x"],
    }


    # first pair has Latin letters, the second pair has Cyrillic letters; probably not very obvious
    # there is no kern for 'X O' in the fonts
    kern_pairs_cyrillic = [
        # sort lines before commit!
        ["A C", "А С"],
        ["A O", "А О"],
        ["A T", "А Т"],
        ["A V", "А Ч"],  # make it better
        ["A Y", "А Ү"],
        ["A y", "А у"],
        ["C A", "С А"],
        ["O A", "О А"],
        ["O X", "О Х"],
        ["O Y", "О Ү"],
        ["P A", "Р А"],
        ["T A", "Г А"],
        ["T A", "Г Д"],  # make it better
        ["T A", "Г Л"],  # make it better
        ["T A", "Т А"],
        ["T A", "Т Д"],  # make it better
        ["T A", "Т Л"],  # make it better
        ["T a", "Г а"],
        ["T a", "Т а"],
        ["T e", "Г е"],
        ["T e", "Т е"],
        ["T o", "Г о"],
        ["T o", "Т о"],
        ["Y A", "У А"],  # make it better
        ["Y A", "Ү А"],
        ["Y O", "Ү О"],
        ["Y a", "Ү а"],
        ["Y e", "Ү е"],
        ["Y o", "У Д"],  # make it better
        ["Y o", "У Л"],  # make it better
        ["Y o", "Ү о"],
        ["e x", "е х"],
        ["o x", "о х"],
        ["x e", "х е"],
        ["x o", "х о"],
        ["y o", "у д"],  # make it better
        ["y o", "у л"],  # make it better
    ]

    # similar Cyrillic letters that come first in the kerning pair
    cyrillic_left_classes = {
            # sort lines before commit!
            "А": ["А"],
            "Г": ["Г", "Ӷ"],
            "О": ["О"],
            "Р": ["Р", "Ҏ"],
            "С": ["С", "Ҫ"],
            "Т": ["Т", "Ҭ"],
            "У" : ["У"],
            "е": ["е", "ё"],
            "о": ["о"],
            "у" : ["у"],
            "х": ["х", "ҳ", "ӽ"],
            "Ү": ["Ү"],  # not Russian
    }

    # similar Cyrillic letters that come second in the kerning pair
    cyrillic_right_classes = {
            # sort lines before commit!
            "А": ["А"],
            "Д" : ["Д"],
            "Л" : ["Л", "Љ", "Ӆ", "Ԓ"],
            "О": ["О"],
            "С": ["С", "Ҫ"],
            "Т": ["Т", "Ҭ"],
            "Х": ["Х", "Ҳ", "Ӽ"],
            "Ч" : ["Ч", "Ҷ", "Ҹ", "Ӌ"],
            "а": ["а", "ӕ"],
            "д" : ["д"],
            "е": ["е", "ё"],
            "л" : ["л", "љ", "ӆ", "ԓ"],
            "о": ["о"],
            "у": ["у", "ў"],
            "х": ["х", "ҳ", "ӽ"],
            "Ү": ["Ү"],  # not Russian
    }

    def __init__(self, filename):
        self.filename = filename  # the name of the file with extra kerning pairs
        self.kern_lines = collections.Counter()  # contains lines with extra kerning pairs

    def add_to_kern_lines(self, s):
        # This method adds one extra kerning pair to kern_lines.
        # It also makes correction for small cap Tcommaaccent and Tcedilla.
        # Tcedilla is uni0162 and the small cap is uni0163.sc, not
        # uni0162.sc. Tcommaaccent is uni021A and the small cap is uni021B.sc,
        # not uni021a.sc (look also at the small "a"; there is problem not only
        # in the unicode value, but also in the case, because method lower() is
        # used).
        extra_line = s.replace("uni021a.sc", "uni021B.sc").replace("uni0162.sc", "uni0163.sc")
        self.kern_lines.update([extra_line])  # must use '[]', otherwise iterates over letters

    def write_kern_lines_to_file(self):
        with open(self.filename, "a") as extra_kern_file:  # append to file!
            for line in self.kern_lines:
                extra_kern_file.write(line)


    def small_caps(self, fromreader):
        a = fromreader.getkerns("KPX [A-Z] [A-Z]")
        for s in a:
            parts = s.split()
            # Write two small caps
            self.add_to_kern_lines("KPX " + parts[1].lower() + ".sc " + parts[2].lower() +
                             ".sc " + str(int(round(SMALL_CAPS_CORRECTION * float(parts[3])))) +
                             '\n')
            # Write capital letter + small cap
            self.add_to_kern_lines("KPX " + parts[1] + " " + parts[2].lower() + ".sc " +
                             str(int(round(SMALL_CAPS_CORRECTION * float(parts[3])))) + '\n')

    def right_punctuation(self, fromreader):
        # small cap and punctuation
        a = fromreader.getkerns("KPX [A-Z] period")
        a.extend(fromreader.getkerns("KPX [A-Z] comma"))
        a.extend(fromreader.getkerns("KPX [A-Z] quotedblright"))
        a.extend(fromreader.getkerns("KPX [A-Z] quoteright"))
        for s in a:
            parts = s.split()
            self.add_to_kern_lines("KPX " + parts[1].lower() + ".sc " + parts[2] + " " +
                             str(int(round(SMALL_CAPS_CORRECTION * float(parts[3])))) + '\n')

    def left_punctuation(self, fromreader):
        # punctuation and small cap
        a = fromreader.getkerns("KPX quotedblleft [A-Z]")
        a.extend(fromreader.getkerns("KPX quoteleft [A-Z]"))
        for s in a:
            parts = s.split()
            self.add_to_kern_lines("KPX " + parts[1] + " " + parts[2].lower() + ".sc " +
                             str(int(round(SMALL_CAPS_CORRECTION * float(parts[3])))) + '\n')

    def accented_capital_letters(self, fromreader):
        for x in self.kern_pairs_capital:
            basekern = x[0]
            derivedkerns = x[1:]
            a = fromreader.getkerns("KPX " + basekern + " ")
            kernline = a[0]
            for b in derivedkerns:
                newkernline = kernline.replace(basekern, b)
                parts = newkernline.split()
                # Write two capital letters
                self.add_to_kern_lines(newkernline)
                # Write two small caps
                self.add_to_kern_lines("KPX " + parts[1].lower() + ".sc " + parts[2].lower() +
                                 ".sc " + str(int(round(SMALL_CAPS_CORRECTION * float(parts[3])))) +
                                 '\n')
                # Write capital letter + small cap
                self.add_to_kern_lines("KPX " + parts[1] + " " + parts[2].lower() + ".sc " +
                                 str(int(round(SMALL_CAPS_CORRECTION * float(parts[3])))) + '\n')

    def accented_small_letters(self, fromreader):
        for x in self.kern_pairs_small:
            basekern = x[0]
            derivedkerns = x[1:]
            a = fromreader.getkerns("KPX " + basekern + " ")
            kernline = a[0]
            for b in derivedkerns:
                newkernline = kernline.replace(basekern, b)
                self.add_to_kern_lines(newkernline)

    def accented_initial_cap(self, fromreader):
        for x in self.kern_pairs_initial_cap:
            basekern = x[0]
            derivedkerns = x[1:]
            a = fromreader.getkerns("KPX " + basekern + " ")
            kernline = a[0]
            for b in derivedkerns:
                newkernline = kernline.replace(basekern, b)
                self.add_to_kern_lines(newkernline)

    def make_derived_latin_kerns(self, latin_kern):
        left_base_letter, right_base_letter = latin_kern.split()

        derived_kerns =  [ left + " " + right
                    for left in self.latin_left_classes[left_base_letter]
                    for right in self.latin_right_classes[right_base_letter] ]

        return derived_kerns[1:]  # remove the base kern that is already in the font

    def make_latin_class_kerns(self, fromreader):
        for basekern in self.latin_class_kerns:
            derivedkerns = self.make_derived_latin_kerns(basekern)
            a = fromreader.getkerns("KPX " + basekern + " ")
            kernline = a[0]

            for b in derivedkerns:
                newkernline = kernline.replace(basekern, b)
                self.add_to_kern_lines(newkernline)

    def accented_left_punctuation(self, fromreader):
        # nothing to be done
        pass

    def czech_quotes(self, fromreader):
        for x in self.kern_pairs_quotes:
            basekern = x[0]
            derivedkerns = x[1:]
            a = fromreader.getkerns("KPX " + basekern + " ")
            kernline = a[0]
            for b in derivedkerns:
                newkernline = kernline.replace(basekern, b)
                self.add_to_kern_lines(newkernline)

    def char_to_uni(self, c):
        """ Convert character to 'uniXXXX' string """
        return "uni{0:04X}".format(ord(c))

    # TODO: Test the kern_pairs_cyrillic array. First pair must be Latin letters,
    #       all the remaining pairs must be Cyrillic letters.

    def make_derived_cyrillic_kerns(self, cyrillic_kern):
        """ make kerns from the base cyrillic kern and kern classes """
        left_base_letter = cyrillic_kern[0]
        right_base_letter = cyrillic_kern[-1]

        return [ left + " " + right
                    for left in self.cyrillic_left_classes[left_base_letter]
                    for right in self.cyrillic_right_classes[right_base_letter] ]

    def cyrillic_kerns(self, fromreader):
        for x in self.kern_pairs_cyrillic:
            basekern = x[0]
            derivedkerns = self.make_derived_cyrillic_kerns(x[1])
            base_kern_line = fromreader.getkerns("KPX " + basekern + " ")

            # skip the Cyrillic kerns if the corresponding Latin kern is not found
            if not base_kern_line:
                continue

            kernline = base_kern_line[0]
            for b in derivedkerns:
                first_char, second_char = b.split()
                new_kern = self.char_to_uni(first_char) + ' ' + self.char_to_uni(second_char)
                newkernline = kernline.replace(basekern, new_kern)
                self.add_to_kern_lines(newkernline)

fin = KernReader(input_file_name)

# output_generator is not a file ane more! Rename!
output_generator = KernWriter(output_file_name)
# letters without accents
output_generator.small_caps(fin)
output_generator.right_punctuation(fin)
output_generator.left_punctuation(fin)
# letters with accents
output_generator.accented_capital_letters(fin)
output_generator.accented_small_letters(fin)
output_generator.accented_initial_cap(fin)
output_generator.make_latin_class_kerns(fin)
output_generator.accented_left_punctuation(fin)
output_generator.czech_quotes(fin)
output_generator.cyrillic_kerns(fin)
output_generator.write_kern_lines_to_file()