# Jan Labanowski, jkl@osc.edu, Jan. 10, 1992 # File tex_koi8.rus # This is a transliteration data file for converting from Latex to KOI8 # as used by RELCOM (GOST 19768-74). It assumes that the sister file: # koi8-latex.rus was used to obtain the LaTeX file. # Since Latex is a program, frequently complicated, it is probably possible # to convert LaTeX to KOI8 only with TeX. However, if you have a simple # LaTeX document, without math, tables, different font sized and shapes, # you can easily convert it to KOI8 tex, by listing symbols for Russian # lettes on the left side and appropriate KOI8 codes on the right. # Since there are many possible assignements in LaTeX, you need to # modify this file to the ones actually used. First of all, some other # symbols may be used to represent Russian letters. I included some more # popular sequences, but there might be more, which I am not aware of. # The TeX tranliteration sequences follow AMS cyrillic convention for # WNCYR fonts with cyracc.def file # To be used with translit.c program by Jan Labanowski. For a format of # this file consult translit documentation 1 file version number " " # string delimiters [ ] # list delimites { } # regular expression delimiters #starting sequence "" #ending sequence "" 4 # number input SHIFT sequences # The last two of the input "character sets" are used simply to delete all # characters from LaTeX preamble and the \end{document} closing # For set 2, note that \cyr may be followed by spaces or new lines # end preceded with some LaTeX escape sequences #SO-match #SO-subs #Nest-up #Nest-down #SI-match #SI-subs "" "" "" "" "" "" #Latin letters {{\\cyr[ \0x09-\0x0D]+} "" "{" "}" "}" "" #cyrillic "\documentstyle" "" "" "" {\begin{document\0x7D[\0d09-\15]*} "" "\end{document}" "" "" "" "" "" 0 # number of output SHIFT sequences, only one set of output characters # conversion table # inp_set inp_seq out_set out_seq 3 [\001-\255] 0 "\00" # delete LaTeX preamble 4 [\001-\255] 0 "\00" # delete end{document} 1 [A-Za-z] 0 [A-Za-z] #Latin letters A-Z and a-z # If double backslash is followed by new line, skip the double backslash 0 {\\\\[\0x09-\0x0d]*} 0 "\0x0D\0x0A" #restore new lines 0 "\\040" 0 " " # protected space # Convert some special TeX characters # these do not require going out of {\cyr ....} 0 "$[$" 0 "[" 0 "$]$" 0 "]" 0 "$\wedge$" 0 "^" 0 "$\lbrace$" 0 "{" 0 "$\rbrace$" 0 "}" 0 "$\sim$" 0 "~" 0 "$\backslash$" 0 "\" 0 "$\mid$" 0 "|" 0 "$\star$" 0 "*" 0 "$<$" 0 "<" 0 "$>$" 0 ">" 0 "\$" 0 "$" 0 "\%" 0 "%" # These were represented correctly only in Latin charset 1 "\_" 0 "_" 1 "\&" 0 "&" 1 "\#" 0 "#" 1 "\@" 0 "@" # Cyrillic letters # note that TS and ts sequences are checked before the \cydot is removed 2 {\\T[Ss][ \0x09-\0x0d]*} 0 "\0xE3" 2 {T[Ss]} 0 "\0xE3" # Tse 2 {\\ts][ \0x09-\0x0d]*} 0 "\0xC3" 2 {t[Ss]} 0 "\0xC3" # tse 2 {\\S[Hh][Cc][Hh][ \0x09-\0x0d]*} 0 "\0xFD" 2 {S[Hh][Cc][Hh]} 0 "\0xFD" 2 {\\s[Hh][Cc][Hh][ \0x09-\0x0d]*} 0 "\0xDD" 2 {s[Hh][Cc][Hh]} 0 "\0xDD" 2 {\\Cdprime[ \0x09-\0x0d]*} 0 "\0xFF" # Tverdyj znak 2 {\\T[Zz][ \0x09-\0x0d]*} 0 "\0xFF" 2 {\\Cprime[ \0x09-\0x0d]*} 0 "\0xF8" # Myagkij znak 2 {\\M[Zz][ \0x09-\0x0d]*} 0 "\0xF8" 2 {\\cdprime[ \0x09-\0x0d]*} 0 "\0xDF" # tverdyj znak 2 {\\tz[ \0x09-\0x0d]*} 0 "\0xDF" 2 {\\cprime[ \0x09-\0x0d]*} 0 "\0xD8" # myagkij znak 2 {\\mz[ \0x09-\0x0d]*} 0 "\0xD8" 2 {\\u[ \0x09-\0x0d]*I} 0 "\0xEA" # I kratkoje 2 "\u{I}" 0 "\0xEA" 2 {\\[Uu]I[ \0x09-\0x0d]*} 0 "\0xEA" 2 {\\u[ \0x09-\0x0d]*i} 0 "\0xCA" # i kratkoje 2 {\\ui[ \0x09-\0x0d]*} 0 "\0xCA" 2 "\u{i}" 0 "\0xCA" 2 {\\`[ \0x09-\0x0d]*E} 0 "\0xFC" # E obortnoye 2 "\`{E}" 0 "\0xFC" 2 {\\`[ \0x09-\0x0d]*e} 0 "\0xDC" # e oborotnoye 2 "\`{e}" 0 "\0xDC" 2 {\\K[Hh][ \0x09-\0x0d]*} 0 "\0xE8" 2 {K[Hh]} 0 "\0xE8" 2 {\\k[Hh][ \0x09-\0x0d]*} 0 "\0xC8" 2 {k[Hh]} 0 "\0xC8" 2 {\\T[Cc][Hh][ \0x09-\0x0d]*} 0 "\0xFE" 2 {\\C[Hh][ \0x09-\0x0d]*} 0 "\0xFE" 2 {C[Hh]} 0 "\0xFE" 2 {\\S[Hh][ \0x09-\0x0d]*} 0 "\0xFB" 2 {S[Hh]} 0 "\0xFB" 2 {\\c[Hh][ \0x09-\0x0d]*} 0 "\0xDE" 2 {\\t[Cc][Hh][ \0x09-\0x0d]*} 0 "\0xDE" 2 {c[Hh]} 0 "\0xDE" 2 {\\s[Hh][ \0x09-\0x0d]*} 0 "\0xDB" 2 {s[Hh]} 0 "\0xDB" 2 {\\Z[Hh][ \0x09-\0x0d]*} 0 "\0xF6" 2 {Z[Hh]} 0 "\0xF6" 2 {\\z[Hh][ \0x09-\0x0d]*} 0 "\0xD6" 2 {z[Hh]} 0 "\0xD6" 2 {\\Y[Uu][ \0x09-\0x0d]*} 0 "\0xE0" 2 {Y[Uu]} 0 "\0xE0" 2 {\\Y[Aa][ \0x09-\0x0d]*} 0 "\0xF1" 2 {Y[Aa]} 0 "\0xF1" 2 {\\y[Uu][ \0x09-\0x0d]*} 0 "\0xC0" 2 {y[Uu]} 0 "\0xC0" 2 {\\y[Aa][ \0x09-\0x0d]*} 0 "\0xD1" 2 {y[Aa]} 0 "\0xD1" 2 {\\"[ \0x09-\0x0D]*e} 0 "\0xA3" # small \"e (yo) 2 "\\0o42{e}" 0 "\0xA3" 2 {\\y[Oo][ \0x09-\0x0D]*} 0 "\0xA3" 2 {\\"[ \0x09-\0x0D]*E} 0 "\0xB3" # capital \"E (Yo) 2 "\\0o42{E}" 0 "\0xB3" 2 {\\Y[Oo][ \0x09-\0x0D]*} 0 "\0xB3" 2 {\\cydot[ \0x09-\0x0d]*} 0 "" #\cydot out 2 "H" 0 "\0xE8" 2 "h" 0 "\0xC8" 2 "W" 0 "\0xFD" 2 "w" 0 "\0xDD" 2 "X" 0 "\0xFB" 2 "x" 0 "\0xDB" 2 "A" 0 "\0xE1" 2 "B" 0 "\0xE2" 2 "V" 0 "\0xF7" 2 "G" 0 "\0xE7" 2 "D" 0 "\0xE4" 2 "E" 0 "\0xE5" 2 "Z" 0 "\0xFA" 2 "I" 0 "\0xE9" 2 "K" 0 "\0xEB" 2 "L" 0 "\0xEC" 2 "M" 0 "\0xED" 2 "N" 0 "\0xEE" 2 "O" 0 "\0xEF" 2 "P" 0 "\0xF0" 2 "R" 0 "\0xF2" 2 "S" 0 "\0xF3" 2 "T" 0 "\0xF4" 2 "U" 0 "\0xF5" 2 "F" 0 "\0xE6" 2 "C" 0 "\0xE3" 2 "Y" 0 "\0xF9" 2 "a" 0 "\0xC1" 2 "b" 0 "\0xC2" 2 "v" 0 "\0xD7" 2 "g" 0 "\0xC7" 2 "d" 0 "\0xC4" 2 "e" 0 "\0xC5" 2 "z" 0 "\0xDA" 2 "i" 0 "\0xC9" 2 "k" 0 "\0xCB" 2 "l" 0 "\0xCC" 2 "m" 0 "\0xCD" 2 "n" 0 "\0xCE" 2 "o" 0 "\0xCF" 2 "p" 0 "\0xD0" 2 "r" 0 "\0xD2" 2 "s" 0 "\0xD3" 2 "t" 0 "\0xD4" 2 "u" 0 "\0xD5" 2 "f" 0 "\0xC6" 2 "c" 0 "\0xC3" 2 "y" 0 "\0xD9" # Trash {} 0 "{" 0 ""