#!/usr/bin/perl
# Kalvis M. Jansons
# This script is called ``txt2tex'', and converts well formatted plain
# text in LaTeX.
# On a UNIX system replace the top line with something like: #!/usr/bin/perl
# but with your system's path to perl!
# Copyright (C) 1998 --- 2008 Kalvis M. Jansons
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
# 02111-1307, USA.
#: # Txt2tex auto find perl code.
# eval 'exec perl -S $0 "$@"'
# if 0;
# Version control lines
$version = '4.0';
#h
#h TXT2TeX Copyright (C) 1998 --- 2008 Kalvis M. Jansons
#h =====================================================
#h
#h
#h This program is free software: you can redistribute it and/or modify
#h it under the terms of the GNU General Public License as published by
#h the Free Software Foundation, either version 3 of the License, or
#h (at your option) any later version.
#h
#h This program is distributed in the hope that it will be useful,
#h but WITHOUT ANY WARRANTY; without even the implied warranty of
#h MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
#h GNU General Public License for more details.
#h
#h You should have received a copy of the GNU General Public License
#h along with this program. If not, see .
#h
#h This perl script (which is part of the KalTeX package) converts plain text
#h into something with a little LaTeX formatting. If you are reading a LaTeXed
#h version of this ``readme'' file, it was made from the comments in the code
#h of txt2tex using txt2tex to format them; if you are reading the plain text
#h version, try running it through txt2tex (you can use ``txt2tex --demo'' for
#h this on a unix system).
#h
#h Written by Kalvis M. Jansons (email address k@kalvis.com), but based on
#h txt2html by Seth Golub (email address seth@aigeek.com). So if you like it,
#h send an email to both of us, but thank Seth the most; if you have any
#h problems or suggestions send an email to me (Kalvis).
#h
#h By default, much of LaTeX's fine structure is disabled by definitions in the
#h .tex file header. If you need to edit the LaTeX you may need to remove
#h or change some of these statements; or you may need to rerun txt2tex in a
#h lower escaping mode, to add more complex structures, like tables and
#h complex equations. I did it this way as I will use txt2tex myself mainly
#h for non-mathematical documents, and for those, I like to be able to type %
#h for percent etc., and paste in emails without worrying too much about all
#h the strange symbols. Set the ``-ec'' flag if you want to ``escape'' all
#h of LaTeX's special functions, and kill the ``\'', which is often the
#h safest setting for ``unknown'' document formats.
#h
#h
#h DO YOU WANT A DEMONSTRATION? IF SO, SEE BELOW.
#h
#h * For a trivial demo of txt2tex, type ``txt2tex --info |txt2tex -ec''.
#h o For a nicer copy of this readme file, try
#h ``txt2tex --info |txt2tex -ec -ns -10pt''.
#h o Or maybe you will like the look of this better:
#h ``txt2tex --info |txt2tex -tf -ec -ns -10pt''.
#h - Remember, to see the nice output, type something like:
#h ``txt2tex --info |txt2tex -tf -ec -r off > readme.tex''
#h followed by ``latex readme.tex; xdvi readme.dvi''.
#h o On a unix or linux system try ``txt2tex --demo''.
#h * The best test is clearly to try it on one of your own plain text files.
#h
########################
# Some initializations
#
# $mac = 1 if $^O =~ /MacOS/; # Are we running under MacOS
#
# if ($mac)
# {
# my($cmdLine, @args);
# $cmdLine = &MacPerl::Ask("Enter command line options:");
# require "shellwords.pl";
# @args = &shellwords($cmdLine);
# unshift(@ARGV, @args);
# open(OUTPUT, ">t2t_output.tex");
# select OUTPUT;
# }
@ruleset_dictionaries = 0;
$num_heading_styles = 0;
# The first field just marks this as the default headings, so I can check
# if they have been changed.
my @heading_tag = ("orig","section","subsection","subsubsection","paragraph",
"subparagraph");
#
#########################
#########################
# Configurable options
#
#h
#h Paper size
#h ~~~~~~~~~~
#h
#h The paper size is set to ``a4paper'', but if you would like a different
#h paper size I suggest finding the line with ``a4paper'' in txt2tex and
#h changing it once and for all. This can also be changed using the
#h ``--doctype'' option.
#h
#h Tag syntax
#h ~~~~~~~~~~
#h
#h In the options in the next section, the term ``tag'' is often used. I
#h have used this term for many types of LaTeX mark-up instruction. The
#h syntax for using tags with txt2tex is easy. For a simple tag, which
#h puts a heading into a LaTeX subsection form, the tag is just ``subsection''.
#h For more complex, or nested, tags the syntax is a little more complex. If,
#h for example, you wanted all section headings to be centered, the tag to do
#h it with would be ``section{\center''. You could also add a ``clearpage''
#h so each section is on a new page, and a ``*'' so the sections are not
#h numbered; the tag would then be ``clearpage\section*{\center''. Also
#h remember when using tags on a command line, you must take account of the
#h normal shell escaping conventions.
#h
#h Some important command line options
#h ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#h
#h Note that any command line option name can contain any number of ``_'' to
#h make the command line more readable, and, in fact, you only need a single
#h ``-'' for any of the names listed with ``--''.
#h
#h [(-dt|--doctype) ]
$doctype = "\\documentclass[a4paper,12pt]{article}";
# Do not use ``null'' here, but rather "".
#h
#h Used to set the LaTeX documentclass or documentstyle. It can be set to
#h ``null'' for no doctype, which is useful if you want to add some LaTeX
#h definitions above the definitions in the txt2tex header. For an example,
#h see the definition of ``--switch slides'' at the end of txt2tex.
#h
#h [-10pt|-11pt|-12pt]
#h
#h Used to set the LaTeX font size. The default is 12pt. The ``pt'' can
#h be dropped.
#h
#h [(-up|--usepackage) |off]
$usepackages = "";
#h
#h Sets a LaTeX ``usepackage'' definition. No default packages loaded.
#h
#h [(-lh|--latexhooks) ]
$latexhook = "\\jobname";
$latexhookmode = 0;
#h
#h Used to add LaTeX instructions from files. Given a ``name'', it tells LaTeX
#h to read (if they exist) the files name-HeadB, name-HeadE, name-BodyB,
#h name-BodyE (with or without a suffix .tex); these files are read in to the
#h beginning and end of the HEAD and the beginning and end of the BODY.
#h Given a number, it sets the ``latex-hook'' mode, which controls which LaTeX
#h input statements are added; these are 1,2,4,8 for the above files, which
#h are bitwise ORed. If a new LaTeX-hook name is given, the mode is set to 15,
#h i.e. all bits set. If a mode is given, and no name has been set, the
#h default name ``\jobname'' is used as the name. Hooks are off by default.
#h
#h Remember in LaTeX the basename of the LaTeX file is stored in the LaTeX
#h variable ``\jobname'', so by using this as the base part of your LaTeX
#h hooks, you would not have to change the LaTeX itself if you wanted to
#h use a different set of hook files, as you would need only to change the
#h name of the main LaTeX file.
#h
#h [(-ec|--escapechars) []]
#h
$ESCBSLASH = 1;
$ESCDOLLARS = 2;
$ESCSCRIPTS = 4;
$ESCANGLES = 8;
$ESCAND = 16;
$ESCVERT = 32;
$ESCNUM = 64;
$ESCTILDA = 128;
$ESCPERCENT = 512;
$ESCDQUOTE = 1024;
$ESCALL = 2047;
$ESCDEFAULT = 2046;
$escapemode = $ESCDEFAULT;
#h
#h Used to set the escape mode. The options (which can be bitwise ORed) are:
#h
#h 1 --- escape \
#h 2 --- escape $
#h 4 --- escape ^ and _
#h 8 --- escape < and >
#h 16 --- escape &
#h 32 --- escape |
#h 64 --- escape #
#h 128 --- escape ~
#h 512 --- escape %
#h 1024 --- escape "
#h
#h (The above list shows what txt2tex does with complex formatting in the
#h plain text document, namely puts it in a LaTeX verbatim block, at least
#h in the LaTeX version of the documentation.)
#h The default mode is 2046, so the LaTeX backslash is still active. Using
#h ``-ec'' without a following number will escape everything, and ``-ec 0''
#h will escape nothing. Note that mode 1 also fixes a problem with a line
#h that begins with white space and has ``['' as the first non-space
#h character.
#h
#h [-bm|--batchmode]
$batchmode = 0;
#h
#h Makes LaTeX run in its non-stopping mode, i.e. ignores any LaTeX
#h warnings about over-full boxes etc.. Off by default.
#h
#h [-nv|--noverbatim]
#h
#h Stops any output being put in verbatim blocks even if it looks like it
#h is ``preformatted''. This sometimes gives other subroutines a chance
#h to format the data. Off by default.
#h
#h [-sv|--splitverbatim]
$samepageverbatim = "\\samepage ";
# Set $samepageverbatim = "" if verbatim blocks can be split.
#h
#h Use this if verbatim blocks can be split by page breaks; the default is
#h that they cannot.
#h
#h [(-pb|--prebegin) ]
$preformat_trigger_lines = 2;
$preformat_after_blank = 0;
#h
#h Sets the number of preformatted-looking lines (2 by default) needed
#h to begin a verbatim block. The options are:
#h
#h * 0 --- put the entire document in a verbatim block.
#h * 1 --- one trigger line, so even a single line can be put in verbatim.
#h * 2 --- two trigger lines.
#h * 3 --- same as 1, but verbatim blocks can start only after a
#h blank line.
#h
#h Less than 0 is set to 0 and more than 3 is set to 3.
#h
#h [(-pe|--preend) ]
$endpreformat_trigger_lines = 2;
#h
#h Sets the number of non-preformatted-looking lines (2 by default) needed
#h to end a verbatim block. The options are from 0 to 3, with less than
#h 0 set to 0 and more than 3 set to 3. Option 3 has the special meaning
#h of ending the verbatim block on a blank line.
#h
#h NOTE for --prebegin and --preend: If only one is zero, the other is ignored.
#h If both are zero, the entire document is put in a verbatim block.
#h
#h [(-p|--preformat) ]
$verbatim_white_min = 6;
$verbatim_min = 6;
$verbatim_post_min = 3;
#h
#h This option sets the values of the following variables:
#h
#h * $verbatim_white_min (6),
#h * $verbatim_min (6),
#h * $verbatim_post_min (3),
#h
#h where the numbers in () are the defaults. If only one number is given,
#h it sets $verbatim_white_min and $verbatim_min to this value, otherwise it
#h sets the variables in order. A line is considered to be preformatted if
#h either there is a non-space character followed by $verbatim_min non-word
#h characters, or if there are at least $verbatim_white_min spaces after
#h the start of the line and the line contains a non-space character
#h followed by $verbatim_post_min non-word characters.
#h
#h Note that tabs are expanded before these tests.
#h
#h [-ns|--nosectionnumbers]
$nosectionnumbers = "";
#h
#h Do not number LaTeX sections. They may already have numbers, for example,
#h or you may feel that the document looks better without them. In fact, all
#h this really does is add a ``*'' at the end of the headings tags, so if you
#h have changed these tags, be sure that ``-ns'' still makes sense for your
#h tags.
#h
#h [-np|--nopagenumbers]
$nopagenumbers = 0;
#h
#h Do not number LaTeX pages, i.e. set the pagestyle to empty.
#h
#h [(-lm|--listmode) ]
$listmode = 0;
#h
#h Sets the list mode; the bitwise ORed options are:
#h
#h * 0 --- automatically number and label lists, renumbering what appear
#h to be lists with errors. Use standard LaTeX numbering and labelling.
#h * 1 --- keep the original numbers (or letters) on enumerated lists, but
#h put standard labels on itemized lists.
#h * 2 --- turn itemized lists into enumerated lists.
#h * 4 --- hrules end all active lists.
#h * 8 --- easy start. Enumerated lists need not start with 1, A, etc.,
#h which is useful for documents that have headings, diagrams etc. in
#h lists. You would normally use this with list mode 1, to avoid
#h renumbering.
#h * 16 --- turn LaTeX description environments into enumerate; this may
#h sound a strange thing to do, but leads to nice results. Try it!
#h * 32 --- do not nest description environments. Normally a new
#h description starts for every new level of indentation, but this mode
#h switches this feature off.
#h
#h Using ``-lm'' without a following number sets the default mode 0.
#h
#h [(-de|--description) |off]
#h
#h Sets the regular expressions to identify lines that should be put in a LaTeX
#h ``description'' environment. Only the ``first match'' in the regular
#h expression will be used as the ``name'' in the ``description'', and the
#h rest is deleted. So, if you do not want to delete anything, put your
#h regular expression in ``()''. This is off by default, and the default
#h can be reset with the command line option ``-de off''. See the definitions
#h of ``-sw remind'' and ``-sw dict'' for examples.
#h
#h [(-s|--shortline) <[-]num>]
$short_line_length = 40;
$ignore_leading_spaces = 1;
#h
#h Sets the upper bound of the length of a ``short line'' (40 by default),
#h which is assumed to be intentionally this short, so must be kept broken.
#h If the number given is negative, leading spaces are not ignored when
#h determining if a line is ``short''. The default is that leading spaces
#h are ignored.
#h
#h [(-ss|--shortlineskip) ]
$shortlineskip = ""; # Use "" rather than ``null'' here.
#h
#h Sets the vertical skip after a ``short line'', for example try ``-ss 1ex''.
#h The default is a normal line break. The default can be restored by setting
#h it to ``null''.
#h
#h [(-r|--hrule) |off]
$hrule_min = 4;
$hrules_on = 1;
#h
#h If given a number, sets the minimum number of ``==='' etc. for a horizontal
#h rule. The default is 4. If given ``off'', sets $hrules_on = 0, and any
#h hrules found are not printed.
#h
#h [(-sm|--smallmargins) []]
$smallmargins = 0;
#h
#h LaTeX defaults to large margins, but I like small (1in) margins. The
#h bitwise ORed options are:
#h
#h * 0 --- standard LaTeX margins.
#h * 1 --- 1in X margins.
#h * 2 --- 1in Y margins.
#h * 3 --- 1in X and Y margins.
#h
#h The default is 0. If ``-sm'' is not followed by a valid number, then
#h option 3 is set.
#h
#h [(-t|--title) ]
$title = 0;
#h
#h You can specify a title to be placed at the top of the document.
#h
#h [(-tt|--titletag) ]
$titletag = "centerline\{\\LARGE\\bf";
#h
#h Used to set the title tag. The default tag is ``centerline{\LARGE\bf''.
#h
#h [-tf/+tf] | [--titlefirst/--notitlefirst]
$titlefirst = 0;
#h
#h Use the first non-blank line as the title of the document. Off by default.
#h
#h [(-pi|--parindent) ]
$par_indent = 3;
#h
#h Sets the minumum number of spaces indented in first line of a paragraph.
#h This is used only when there's no blank line preceding the paragraph.
#h The default is 3.
#h
#h [(-c|--caps) ]
$min_caps_length = 3;
#h
#h Sets the minimum sequential CAPS for a ``caps line'', which is then put
#h in a special font. For the full definition of a caps line, see the code.
#h The default is 3.
#h
#h [(-ct|--capstag) |off]
$caps_tag = "subsubsection\*";
# Use "" rather than ``off'' here.
#h
#h Sets the tag to put around ``caps lines''. Set it to ``off'' for no
#h caps lines, but note that some of these lines could then be marked as solo
#h lines; if you want to avoid this, set it to ``null'', which is turned into
#h the empty tag. The default tag is ``subsubsection*''.
#h
#h [(-st|--solotag) |off]
$solo_tag = "subsubsection\*\{\\textit";
# Use "" rather than ``off'' here.
#h
#h Sets the tag for ``solo lines'', i.e. lines that have a blank line before
#h and after, and have the ``right'' important-looking ending (see
#h ``sub solo'' for the full definition). The default tag for solo lines is
#h ``subsubsection*{\textit''. Set it to ``off'' for no solo lines.
#h
#h [(-m|--mail) []]
$MAILHQ = 1;
$MAILCUT = 2;
$MAILPAGE = 4;
$MAILBODY = 8;
$MAILDEFAULT = 1;
$mailmode = $MAILDEFAULT;
#h
#h Used to set the mail mode. The bitwise ORed options are:
#h
#h * 1 --- deal with mail headers and mail quoted text.
#h * 2 --- add half-line width right-flushed hrules at the beginning of
#h new messages. Strange, but easy to see!
#h * 4 --- add a LaTeX ``clearpage'' before each new message.
#h * 8 --- do not print the mail header.
#h
#h ``-m'' without a following number sets the default mail mode of 1. (Any
#h non-zero option also includes option 1.)
#h
#h [-u/+u] | [--unhyphenate/--nounhyphenate]
$unhyphenation = 1;
#h
#h Enables unhyphenation of the raw text, so we can leave hyphenation to
#h LaTeX. On by default.
#h
#h [(-ul|--ulength) ]
$underline_length_tolerance = 1;
#h
#h Sets the underline tolerance for plain text headings, i.e. how much longer
#h or shorter than the text can underlines be and still be underlines. The
#h default is 1.
#h
#h [(-uo|--uoffset) ]
$underline_offset_tolerance = 1;
#h
#h Sets the offset tolerance for underlines of plain text headings. The
#h default is 1.
#h
#h [(-tw|--tabwidth) ]
$tab_width = 8;
#h
#h Sets the width of a tab. The default is 8.
$indent_width = 3;
# Indents this many spaces for each level of a list.
#h
#h [-e/+e] | [--extract/--noextract]
$extract = 0;
#h
#h Sets extract mode for making inserts for other LaTeX documents. Off
#h by default.
#h
#h [(-rs|--ruleset) ]
#h
#h [+rs|--noruleset]
$make_ruleset = 1;
#h
#h By default reads the ruleset in ``.txt2tex-ruleset'' (if it exists),
#h but a different file can be given. When looking for a specified ruleset
#h file, if it fails to find a direct match, it will then try ``file-ruleset''
#h and last of all ``~/.txt2tex-file'', where ``file'' is the given file name.
#h
#h [-ro/+ro] | [--rulesetonly/--norulesetonly]
$ruleset_only = 0;
#h
#h Do no escaping or marking up at all, except for processing the ruleset
#h dictionary file and applying it. This is useful if you want to use
#h txt2tex's rulesetting feature on a LaTeX document. If the LaTeX is a
#h complete document (includes HEAD and BODY) then you will need to use
#h the --extract option also. Off by default.
#h
#h [(-H|--heading) ]
@custom_heading_regexp = ();
#h
#h Used to set regular expressions to pick out custom headings in the plain
#h text. For examples, see the ``switch'' options at the end of txt2tex,
#h in particular ``num''. Header levels are assigned by regexp in the order
#h seen; when a line matches a custom header regexp, it is tagged as
#h a header. If it is the first time that particular regexp has matched,
#h the next available header level is associated with it and applied to
#h the line. Any later matches of that regexp will use the same header level.
#h Therefore, if you want to match numbered header lines, you could use
#h something like this:
#h
#h -H '^ *\d+\. \w+' -H '^ *\d+\.\d+\. \w+' -H '^ *\d+\.\d+\.\d+\. \w+'
#h
#h Then lines like:
#h
#h 2. Examples
#h 2.1. More Examples
#h 2.1.1. Even More Examples
#h
#h would be marked as section, subsection, etc., assuming they were found in
#h that order, and that no other header styles were found. If you prefer
#h that the first heading specified always becomes ``section'', the second
#h always becomes ``subsection'' etc., then use the --explicitheadings option.
#h Also you would probably want the --nosectionnumbers option, to avoid getting
#h two sets of numbers; this could also be fixed using the --trimheadings
#h option (see the definition of ``--switch n'').
#h
#h [(-HT|--headingtags) |shift|number]
$numberheadings = 0;
#h
#h [(-TH|--trimheadings) ]
$trimheadings = "";
#h
#h The sequence of tags for the section headings can be set by something like:
#h ``-HT something,anotherthing,...'' and the headings can be trimmed using
#h ``-TH '', i.e. whatever matches ``regexp'' is removed. Note that
#h all headings are trimmed using the same regular expression and that the
#h regular expression is applied after the heading tag and label have been
#h added. The argument of ``-HT'' can also be ``shift'', which shifts the
#h sequence of heading tags down by one, or ``number'', which tells txt2tex
#h (rather than LaTeX) to number the headings (off by default). Remember not
#h to ask LaTeX to number the headings too, if you use ``number''.
#h
#h [-EH/+EH] | [--explicitheadings/--noexplicitheadings]
$explicitheadings = 0;
#h
#h This tells txt2tex not to try to find any headings except the custom ones
#h specified. Also, the custom headings will not be assigned levels in the
#h order they are encountered in the document, but in the order they are
#h specified on the command line. Off by default.
#h
#h [(-db|--debug) ]
$dict_debug = 0;
#h
#h Debug mode for ruleset dictionaries. Bitwise OR what you want to see:
#h
#h * 1 --- the parsing of the dictionary.
#h * 2 --- the code that will make the ruleset.
#
# [-nA] | [--notA]
$notA = 0;
# Lists cannot start with "A", which is good for lists with initials etc..
# Q., A. combinations are trapped by default in this version of txt2tex.
#h
#h [(-tr|--trim) ]
$trim = 0;
#h
#h Used to trim ``n'' characters from the beginning of each line longer than
#h ``n'', or to trim using a regular expression. The default is 0.
#h
#h [(-sw|--switch) ]
#h
#h Used to add sets of command line options that are kept at the bottom
#h of this file. For example ``-sw num'' will help pick out numbered
#h section headings, and ``-sw lynx'' cleans up text files from the lynx
#h browser. Take a look at the definition of ``-sw num'', and see if you
#h can work out what all the options do. Then add some ``-sw'' options
#h of your own. Also see the section on option sets below.
#h
#h [-tc|--twocolumn]
#h
#h Sets LaTeX's ``twocolumn'' option. Off by default. To see what this looks
#h like with 1in margins, take a look at this ``readme'' file in this format
#h by typing ``txt2tex --demo'' on a unix or linux machine.
#h
#h [-ls|--landscape]
#h
#h Sets LaTeX's ``landscape'' option. Off by default.
#h
#h [-sp|--sloppy]
$sloppy = 0;
#h
#h Sets LaTeX's ``sloppy'' option, which is particularly useful for slides.
#h Off by default.
#h
#h [-d|--draft]
#h
#h Save the output in a file called draft.tex. Off by default.
#h
#h [(-h|--help)/--info/--demo]
#h
#h --help gives a short help message listing the options, --info gives a
#h plain text version of the ``readme'' file, and --demo (on a standard
#h unix or linux system) will run the plain text from --info through
#h txt2tex to give a nice LaTeXed version of the ``readme'' file; note that
#h the ``demo'' makes t2t_readme.txt, .tex, .dvi, .aux, and .log.
#h
#h [-v|--version]
#h
#h Prints the txt2tex version number.
$system_ruleset_dict = "/usr/local/lib/txt2tex-ruleset"; # after options
$default_ruleset_dict = "$ENV{'HOME'}/.txt2tex-ruleset"; # before options
# END OF CONFIGURABLE OPTIONS
########################################
########################################
# Definitions (Don't change these)
$NONE = 0;
$LIST = 1;
$HRULE = 2;
$PAR = 4;
$VERB = 8;
$END = 16;
$BREAK = 32;
$HEADER = 64;
$MAILHEADER = 128;
$MAILQUOTE = 256;
$CAPS = 512;
$RULESET =1024;
$SOLO =2048;
$OL = 1;
$UL = 2;
$DL = 4;
# Character entity names
%char_entities =
(
"\007", "",
"\014", "\n\n\\clearpage\n\n",
"\243", "\\pounds\{\}",
"\255", "--",
"\267", "\\ensuremath\{\\bullet\{\}\}",
"\251", "\\copyright\{\}",
"\260", "" # degree symbol
);
########################################
########################################
#
# Subroutine definitions
sub sampleruleset
{
open(HELP, "$0");
print "This file was created by txt2tex $version\n\n";
while ()
{
if (/^\#H(.*)$/)
{
print "$1\n";
}
}
}
sub help
{
open(HELP, "$0");
while ()
{
if (/^\#h(.*)$/i)
{
print "$1\n"; # Note this writes to STDOUT not STDERR.
}
}
}
sub usage
{
print "Usage\: txt2tex \[options\] \[input file\[s\]\]\n\n";
open(HELP, "$0");
while ()
{
if (/^\#h( \[.*?)$/)
{
print "$1\n"; # Note this writes to STDOUT not STDERR.
}
}
}
sub hook
{
my ($hookname,$hookthen,$hookelse) = @_;
if ($latexhook)
{
print "\\InputIfFileExists\{$hookname\}\{";
print "\\typeout\{$hookthen\}" if $hookthen;
print "\}\{";
print "\\typeout\{$hookelse\}" if $hookelse;
print "\}\n\n";
}
}
sub deal_with_options
{
while (($#ARGV > -1) && ($ARGV[0] =~ /^[-+].+/))
{
$ARGV[0] =~ s/_//g;
# Option names can have "_" to make them more readable.
$ARGV[0] =~ s/^--/-/;
# We do not really force long names to start with "--", a "-"
# will do.
if (($ARGV[0] eq "-switch" || $ARGV[0] eq "-sw") &&
$ARGV[1])
{
die "--switch can only be used once!\n" if $data;
shift @ARGV;
my $word = shift @ARGV;
while ($line = ) {
chomp $line;
if ($line =~ s/\\$//)
{
$line .= ;
redo unless eof();
}
@data = split(/\s+/,$line);
if ($word eq $data[0])
{
unshift(@ARGV, @data);
}
last if $line =~ /\#===/;
}
$data = 1;
next;
}
if (($ARGV[0] eq "-rs" || $ARGV[0] eq "-ruleset") && $ARGV[1])
{
$make_ruleset = 1;
# Stick it on the end of the list
push(@ruleset_dictionaries, $ARGV[1]);
shift @ARGV;
next;
}
if (($ARGV[0] eq "+rs" || $ARGV[0] eq "-noruleset") )
{
$system_ruleset_dict = "";
$make_ruleset = 0;
@ruleset_dictionaries = 0;
next;
}
if (($ARGV[0] eq "-H" || $ARGV[0] eq "-heading") && $ARGV[1])
{
push(@custom_heading_regexp, $ARGV[1]);
shift @ARGV;
next;
}
if (($ARGV[0] eq "-headingtags" || $ARGV[0] eq "-HT") && $ARGV[1])
{
if($ARGV[1] eq "shift")
{
shift @heading_tag;
}
elsif ($ARGV[1] eq "number")
{
$numberheadings = 1;
$nosectionnumbers = "*" if $heading_tag[0] ne "new";
}
else
{
$nosectionnumbers = "";
@heading_tag = split(/[,]/, $ARGV[1]);
unshift(@heading_tag, "new");
}
shift @ARGV;
next;
}
if (($ARGV[0] eq "-EH" || $ARGV[0] eq "-explicitheadings") )
{
$explicitheadings = 1;
next;
}
if (($ARGV[0] eq "+EH" || $ARGV[0] eq "-noexplicitheadings") )
{
$explicitheadings = 0;
next;
}
if (($ARGV[0] eq "-latexhooks" || $ARGV[0] eq "-lh") && $ARGV[1])
{
if ($ARGV[1] =~ /\d\d?/)
{
$latexhookmode = $ARGV[1];
}
else
{
$latexhook = $ARGV[1];
$latexhookmode = 15;
}
shift @ARGV;
next;
}
if (($ARGV[0] eq "-r" || $ARGV[0] eq "-hrule")
&& $ARGV[1] =~ /^(\d+|off)$/)
{
if ($ARGV[1] =~ /off/)
{
$hrules_on = 0;
}
else
{
$hrule_min = $ARGV[1];
}
shift @ARGV;
next;
}
if (($ARGV[0] eq "-s" || $ARGV[0] eq "-shortline")
&& ($ARGV[1] =~ /^\-?\d+$/))
{
$short_line_length = $ARGV[1];
if ($ARGV[1] =~ /^\-/)
{
$short_line_length =~ s/^\-//;
$ignore_leading_spaces = 0;
}
else
{
$ignore_leading_spaces = 1;
}
shift @ARGV;
next;
}
if (($ARGV[0] eq "-p" || $ARGV[0] eq "-preformat") &&
$ARGV[1] =~ /^\d+(\,\d+){0,2}$/)
{
if ($ARGV[1] =~ /^\d+$/)
{
$verbatim_white_min = $ARGV[1];
$verbatim_min = $verbatim_white_min;
}
else
{
$ARGV[1] .= ",$verbatim_post_min";
($verbatim_white_min, $verbatim_min, $verbatim_post_min)
= split(/\,/, $ARGV[1]);
}
shift @ARGV;
next;
}
if (($ARGV[0] eq "-pb" || $ARGV[0] eq "-prebegin") &&
$ARGV[1] =~ /^-?\d+$/)
{
if ($ARGV[1] > 2)
{
$preformat_after_blank = 1;
$preformat_trigger_lines = 1;
}
else
{
$preformat_after_blank = 0;
$preformat_trigger_lines = $ARGV[1];
}
shift @ARGV;
next;
}
if (($ARGV[0] eq "-pe" || $ARGV[0] eq "-preend") &&
$ARGV[1] =~ /^-?\d+$/)
{
$endpreformat_trigger_lines = $ARGV[1];
shift @ARGV;
next;
}
if (($ARGV[0] eq "-e" || $ARGV[0] eq "-extract"))
{
$extract = 1;
next;
}
if (($ARGV[0] eq "+e" || $ARGV[0] eq "-noextract"))
{
$extract = 0;
next;
}
if (($ARGV[0] eq "-c" || $ARGV[0] eq "-caps") &&
$ARGV[1] =~ /^\d+$/)
{
$min_caps_length = $ARGV[1];
shift @ARGV;
next;
}
if (($ARGV[0] eq "-ct" || $ARGV[0] eq "-capstag") && $ARGV[1])
{
$caps_tag = $ARGV[1];
if ($caps_tag eq "off")
{
$caps_tag = "";
}
shift @ARGV;
next;
}
if (($ARGV[0] eq "-solotag" || $ARGV[0] eq "-st") && $ARGV[1])
{
$solo_tag = $ARGV[1];
if ($solo_tag eq "off")
{
$solo_tag = "";
}
shift @ARGV;
next;
}
if ($ARGV[0] eq "-u" || $ARGV[0] eq "-unhyphen")
{
$unhyphenation = 1;
next;
}
if ($ARGV[0] eq "+u" || $ARGV[0] eq "-nounhyphen")
{
$unhyphenation = 0;
next;
}
if (($ARGV[0] eq "-titletag" || $ARGV[0] eq "-tt") && $ARGV[1])
{
$titletag = $ARGV[1];
shift @ARGV;
next;
}
if (($ARGV[0] eq "-t" || $ARGV[0] eq "-title") && $ARGV[1])
{
$title = $ARGV[1];
shift @ARGV;
next;
}
if ($ARGV[0] eq "-tf" || $ARGV[0] eq "-titlefirst")
{
$titlefirst = 1;
next;
}
if ($ARGV[0] eq "+tf" || $ARGV[0] eq "-notitlefirst")
{
$titlefirst = 0;
next;
}
if (($ARGV[0] eq "-dt" || $ARGV[0] eq "-doctype") && $ARGV[1])
{
$doctype = $ARGV[1];
$doctype = "" if $doctype eq "null";
shift @ARGV;
next;
}
if (($ARGV[0] eq "-trimheadings" || $ARGV[0] eq "-TH") && $ARGV[1])
{
$trimheadings = $ARGV[1];
shift @ARGV;
next;
}
if (($ARGV[0] eq "-usepackage" || $ARGV[0] eq "-up") && $ARGV[1])
{
if ($ARGV[1] eq "off")
{
$usepackages = "";
}
else
{
$usepackages .= "\\usepackage\{" . $ARGV[1] . "\}\n";
}
shift @ARGV;
next;
}
if (($ARGV[0] eq "-ul" || $ARGV[0] eq "-ulength")
&& $ARGV[1] =~ /^\d+$/)
{
$underline_length_tolerance = $ARGV[1];
shift @ARGV;
next;
}
if (($ARGV[0] eq "-uo" || $ARGV[0] eq "-uoffset") &&
$ARGV[1] =~ /^\d+$/)
{
$underline_offset_tolerance = $ARGV[1];
shift @ARGV;
next;
}
if (($ARGV[0] eq "-tw" || $ARGV[0] eq "-tabwidth") &&
$ARGV[1] =~ /^\d+$/)
{
$tab_width = $ARGV[1];
shift @ARGV;
next;
}
if (($ARGV[0] eq "-pi" || $ARGV[0] eq "-parindent")
&& $ARGV[1] =~ /^\d+$/)
{
$par_indent = $ARGV[1];
shift @ARGV;
next;
}
if ($ARGV[0] eq "-ec" || $ARGV[0] eq "-escapechars")
{
if($ARGV[1] =~ /^\d+$/)
{
$escapemode = $ARGV[1];
shift @ARGV;
}
else
{
$escapemode = $ESCALL;
}
next;
}
if ($ARGV[0] eq "-m" || $ARGV[0] eq "-mail")
{
if($ARGV[1] =~ /^\d+$/)
{
$mailmode = $ARGV[1];
shift @ARGV;
}
else
{
$mailmode = $MAILDEFAULT;
}
next;
}
if ($ARGV[0] eq "-lm" || $ARGV[0] eq "-listmode")
{
if($ARGV[1] =~ /^\d+$/)
{
$listmode = $ARGV[1];
shift @ARGV;
}
else
{
$listmode = 0;
}
next;
}
if ($ARGV[0] eq "-ro" || $ARGV[0] eq "-rulesetonly")
{
$ruleset_only = 1;
next;
}
if ($ARGV[0] eq "+ro" || $ARGV[0] eq "-norulesetonly")
{
$ruleset_only = 0;
next;
}
if ($ARGV[0] eq "-v" || $ARGV[0] eq "-version")
{
print "\nKalvis M. Jansons's txt2tex $version\n\n";
exit;
}
if ($ARGV[0] eq "-h" || $ARGV[0] eq "-help")
{
&usage;
exit;
}
if ($ARGV[0] eq "-info")
{
&help;
exit;
}
if (($ARGV[0] eq "-db" || $ARGV[0] eq "-debug") && $ARGV[1] =~ /^\d+$/)
{
$dict_debug = $ARGV[1];
shift @ARGV;
next;
}
if ($ARGV[0] eq "-sampleruleset")
{
&sampleruleset;
exit;
}
if ($ARGV[0] eq "-batchmode" || $ARGV[0] eq "-bm")
{
$batchmode = 1;
next;
}
if ($ARGV[0] eq "-sloppy" || $ARGV[0] eq "-sp")
{
$sloppy = 1;
next;
}
if ($ARGV[0] eq "-draft" || $ARGV[0] eq "-d")
{
open(DRAFT, ">draft.tex");
select DRAFT;
next;
}
if ($ARGV[0] eq "-smallmargins" || $ARGV[0] eq "-sm")
{
if ($ARGV[1] =~ /^[0-3]$/)
{
$smallmargins = $ARGV[1];
shift @ARGV;
}
else
{
$smallmargins = 3;
}
next;
}
if ($ARGV[0] eq "-nopagenumbers" || $ARGV[0] eq "-np")
{
$nopagenumbers = 1;
next;
}
if ($ARGV[0] eq "-nosectionnumbers" || $ARGV[0] eq "-ns")
{
$nosectionnumbers = "*";
next;
}
if ($ARGV[0] =~ /^-1[0-2](pt)?$/)
{
$ARGV[0] =~ s/^-//;
$ARGV[0] .= "pt" if $ARGV[0] =~ /^1\d$/;
$doctype =~ s/(class|style)/$1\[\]/ unless $doctype =~ /\[/;
$doctype =~ s/\d+pt/$ARGV[0]/ || $doctype =~ s/\]/\,$ARGV[0]\]/;
$doctype =~ s/\[\,/\[/;
next;
}
if ($ARGV[0] =~ /^-(landscape|ls|twocolumn|tc)$/)
{
my $op = $ARGV[0];
$op =~ s/^-//;
if ($op =~ /tc/)
{
$op = "twocolumn";
}
elsif ($op =~ /ls/)
{
$op = "landscape";
}
$doctype =~ s/(class|style)/$1\[\]/ unless $doctype =~ /\[/;
$doctype =~ s/\]/,$op\]/ unless $doctype =~ /$op/;
$doctype =~ s/\[\,/\[/;
next;
}
if (($ARGV[0] eq "-description" || $ARGV[0] eq "-de") && $ARGV[1])
{
if ($ARGV[1] ne "off")
{
$description = $ARGV[1];
}
else
{
$description = 0;
}
shift @ARGV;
next;
}
if (($ARGV[0] eq "-trim" || $ARGV[0] eq "-tr") && $ARGV[1])
{
if ($trim)
{
if ($trim =~ /^\d+$/)
{
$trim = $ARGV[1];
}
else
{
$trim = "(" . $trim . "|" . $ARGV[1] . ")";
}
}
else
{
$trim = $ARGV[1];
}
shift @ARGV;
next;
}
if ($ARGV[0] eq "-noverbatim" || $ARGV[0] eq "-nv")
{
$preformat_trigger_lines = 2;
$endpreformat_trigger_lines = 0;
next;
}
if ($ARGV[0] eq "-splitverbatim" || $ARGV[0] eq "-sv")
{
$samepageverbatim = "";
next;
}
if ($ARGV[0] eq "-notA" || $ARGV[0] eq "-nA")
{
$notA = 1;
next;
}
if ($ARGV[0] eq "-demo")
{
open(DEMO, "$0 --info|tee t2t_readme.txt|\
$0 -sw demo > t2t_readme.tex;\
pdflatex t2t_readme.tex; xpdf t2t_readme.pdf || open t2t_readme.pdf|");
while()
{print;}
exit;
}
if (($ARGV[0] eq "-shortlineskip" || $ARGV[0] eq "-ss")
&& $ARGV[1] =~ /^(\d|null)/)
{
if ($ARGV[1] eq "null")
{
$shortlineskip = "";
}
else
{
$shortlineskip = "\[$ARGV[1]\]";
}
shift @ARGV;
next;
}
if ($ARGV[0] eq "--")
{
last;
}
print STDERR "Unrecognized option: $ARGV[0]\n";
print STDERR " or bad paramater: $ARGV[1]\n" if($ARGV[1]);
&usage;
exit(1);
}
continue
{
shift @ARGV;
}
$preformat_trigger_lines = 0 if ($preformat_trigger_lines < 0);
$endpreformat_trigger_lines = 1 if ($preformat_trigger_lines == 0);
$endpreformat_trigger_lines = 0 if ($endpreformat_trigger_lines < 0);
$endpreformat_trigger_lines = 3 if ($endpreformat_trigger_lines > 3);
}
sub is_blank
{
return $_[0] =~ /^\s*$/;
}
sub escape
{
$line =~ s/\{/\\\{/g;
$line =~ s/\}/\\\}/g;
if ($escapemode & $ESCDOLLARS)
{
$line =~ s/\\(?!(\{|\}))/\\\(\\backslash\\\)/g;
}
else
{
$line =~ s/\\(?!(\{|\}))/\{\\mbox\{\\\(\\backslash\\\)\}\}/g;
}
$line =~ s/^(\s*)(\[)/$1\{\}\[/;
}
sub hrule
{
# Do hrules end lists?
if ($listnum && ($listmode & 4) && ($prev_action & $HRULE))
{
&endlist($listnum);
$clearlines++; # Add blank line
# When hrules end lists, it looks better to me if the length of the
# hrule is the same as the list-text width rather than as the
# following text width, but you might want to change this.
}
my $hruletype = "";
if ($line =~ /^\s*(\+?[-_~=]\+?\s*){$hrule_min,}$/)
{
$hruletype = "\\hrulefill\n";
}
elsif ($line =~ /^\s*([+\*\.]\s*){$hrule_min,}$/)
{
$hruletype = "\\dotfill\n";
}
if ($hruletype)
{
if($hrules_on)
{
$line = $hruletype;
$line_action |= $HRULE;
}
else
{
$line = "\n";
}
}
}
sub shortline
{
if (!($mode & $LIST) && !&is_blank($line) && !&is_blank($prev)
&& ($prev_line_length - $prev_indent * $ignore_leading_spaces
<= $short_line_length)
&& !($line_action & ($END | $HEADER | $HRULE | $LIST | $PAR))
&& !($prev_action & ($HEADER | $HRULE | $BREAK | $CAPS | $SOLO)))
{
$prev .= "\\\\$shortlineskip" . chop($prev);
$prev_action |= $BREAK;
}
}
sub mailstuff
{
if ((($line =~ /^\w*>/) || ($line =~ /^\w*\|/)) && !&is_blank($nextline))
{
$line =~ s/$/\\\\/;
$line_action |= ($BREAK | $MAILQUOTE);
if(!($prev_action & ($BREAK | $PAR)))
{
$prev .= "\n";
$line_action |= $PAR;
}
}
elsif (($line =~ /^(From:? .*\@)|^(Newsgroups: )/) && &is_blank($prev))
{
&endlist($listnum) if $listnum; # trakgalvis added line
chop $line;
$line = $line . "\\\\\n";
$prev .= "\\subsection*\{\\hfill\\hrulefill\}\n"
if ($mailmode & $MAILCUT);
$prev = "\\clearpage\n" . $prev if ($mailmode & $MAILPAGE);
$line_action |= ($BREAK | $MAILHEADER | $PAR);
}
elsif (($line =~ /^[\w\-]*:/) # Handle "Some-Header: blah"
&& ($prev_action & $MAILHEADER) && !&is_blank($nextline))
{
$line =~ s/$/\\\\/;
$line_action |= ($BREAK | $MAILHEADER);
}
elsif (($line =~ /^\s+\S/) && # Handle multi-line mail headers
($prev_action & $MAILHEADER) &&
!&is_blank($nextline))
{
$line =~ s/$/\\\\/;
$line_action |= ($BREAK | $MAILHEADER);
}
}
# We can do a little tidying up here, but do not add too much here,
# but rather make a .txt2tex-ruleset file
sub tidy
{
$prev =~ s/(\\begin\{.*?\}) *\\\\(\[.{5}\])?$/$1/g;
# Put email addresses in tt font
$line =~ s/(([\w-]+(\.[\w-]+)*\%)*[\w-]+(\.[\w-]+)*\@[\w-]+(\.[\w-]+)*\.[\w-]{2,3}(?![\w-]))/\\texttt\{$1\}/g;
# Control space after i.e. ...
$line =~ s/(\b)(i\.e\.|e\.g\.|etc\.|viz\.)[^\S\n]+/$1$2\\ /g;
$line =~ s/([^\\])LaTeX/$1\\LaTeX\{\}/g;
# Uncomment the next line to make isolated "-"s into m-rules.
# $line =~ s/ - / --- /g;
$line =~ s/\{\{\}/\{/g;
# Now let us avoid a LaTeX bug, which results in a crash if a numbered
# section heading has a footnote in it. This is due to the table of
# contents entry, which we will remove to avoid the error.
if ($line =~ /\\footnote/)
{
$line =~ s/(\\(sub)*(section|paragraph))(?![\*\[])/$1\[\]/;
}
}
# Subtracts modes listed in $mask from $vector.
sub subtract_modes
{
my ($vector, $mask) = @_;
($vector | $mask) - $mask;
}
sub paragraph
{
if(!&is_blank($line)
&& !&subtract_modes($line_action, $END | $MAILQUOTE | $BREAK)
&& (&is_blank($prev)
|| ($line_indent >= $prev_indent + $par_indent)))
{
$line_action |= $PAR;
}
}
# If the line is blank, return the second argument. Otherwise,
# return the number of spaces before any nonspaces on the line.
sub count_indent
{
my ($line, $prev_length) = @_;
if(&is_blank($line))
{
return $prev_length;
}
my ($ws) = $line =~ /^( *)[^ ]/;
length($ws);
}
sub listprefix
{
local ($line) = @_;
local ($prefix, $number, $rawprefix, $name);
if ($description && ($line =~ /^\s*$description/))
{
($prefix, $name) = $line =~ /^(\s*)$description/;
$name =~ s/\s*$//;
$name =~ s/^\s*//;
return (0,0,0) unless $name;
$prefix = "" if ($listmode & 32);
$prefix .= "DES";
}
elsif ($line =~ /^\s*[\(\[]?(\d+|[^\W\d_]|[ivx]+|[IVX]+)[\.\)\]:]\s+\S/)
{
($number) = $line =~ /(\w+)/;
($rawprefix) = $line =~ /^(\s*\W?\w+\W)/;
$prefix = $rawprefix;
$prefix =~ s/\d+/1/ # Put prefix in canonical form:
|| $prefix =~ s/[a-z]+/a/ # numbers --> 1, [a-z]+ --> a
|| $prefix =~ s/[A-Z]+/A/; # and [A-Z]+ --> A.
}
elsif ($line =~ /^\s*[-+=\*o\267]{1,2}\s+\S/)
{
$number = 0;
($rawprefix) = $line =~ /^(\s*\S{1,2}\s)/;
$prefix = $rawprefix;
}
else
{
return (0,0,0);
}
($prefix, $number, $rawprefix, $name);
}
sub startlist
{
local ($prefix, $number, $rawprefix, $name) = @_;
$listprefix[$listnum] = $prefix;
my $listtype = "enumerate\}\n";
if ($number)
{
# It doesn't start with 1,a,A,i,I leave it alone, and be carefull
# if the list begins with an A and we have seen a Q, as in FAQs.
$notA-- if (($notA ne "1") && ($number eq "Q"));
if (!($listmode & 8) && ($number ne "1")
&& ($number ne "a") && !($number eq "A" && !$notA)
&& ($number ne "i") && ($number ne "I"))
{
$notA++ if ($number eq "A") & ($notA < 0);
return 0;
}
$list[$listnum] = $OL;
}
elsif ($name)
{
$listtype = "description\}\n" unless ($listmode & 16);
$list[$listnum] = $DL;
}
else
{
$listtype = "itemize\}\n" unless ($listmode & 2);
$list[$listnum] = $UL;
}
if ($prev =~ /\A\s*\Z/)
{
$prev = "$list_indent\\begin\{" . $listtype;
$clearlines++; # Retain 1 blank line where blanks are found
}
else
{
$prev .= "$list_indent\\begin\{" . $listtype;
}
$listnum++;
$list_indent = " " x $listnum x $indent_width;
$line_action |= $LIST;
$mode |= $LIST;
1;
}
sub endlist # End N lists
{
my ($n) = @_;
# Avoid white space above ``end list''
$prev = "" if ($prev =~ /\A\s*\Z/);
for (; $n > 0; $n--, $listnum--)
{
$list_indent = " " x ($listnum-1) x $indent_width;
if(($list[$listnum-1] == $UL) && !($listmode & 2))
{
$prev .= "$list_indent\\end\{itemize\}\n";
}
elsif ($list[$listnum-1] == $DL && !($listmode & 16))
{
$prev .= "$list_indent\\end\{description\}\n";
}
else
{
$prev .= "$list_indent\\end\{enumerate\}\n";
}
}
if (!$listnum)
{
$mode ^= $LIST;
$clearlines = 0; # Avoid white space above ``end list''
}
$line_action |= $END;
}
sub continuelist
{
if ($list[$listnum-1] == $UL)
{
$line =~ s/^\s*\S{1,2}\s*/$list_indent\\item /;
}
elsif ($list[$listnum-1] == $DL)
{
$line =~ s/^\s*$description\s*//;
if (!$line)
{
$line = "\\ " unless ($listmode & 16);
$line .= "\n";
}
$line = "$list_indent\\item\[$name\] " . $line;
}
elsif ($list[$listnum-1] == $OL)
{
$line =~ s/^\s*\W?\w+\W\s*//;
if ($listmode & 1)
{
$line = "$list_indent\\item\[$number\.\] " . $line;
}
else
{
$line = "$list_indent\\item " . $line;
}
}
$line_action |= $LIST;
}
sub liststuff
{
my ($i);
local ($prefix, $number, $rawprefix, $name) = &listprefix($line);
if (!$prefix)
{
return if !&is_blank($prev); # inside a list item
# This is not a list, so end them all.
&endlist($listnum) if $listnum;
return;
}
# If numbers with more than one digit grow to the left instead of
# to the right, the prefix will shrink and we'll fail to match the
# right list. We need to account for this.
my ($prefix_alternate);
if (length("" . $number) > 1)
{
$prefix_alternate = (" " x (length("" . $number) -1)) . $prefix;
}
# Maybe we're going back up to a previous list
for ($i = $listnum - 1; ($i >= 0) && ($prefix ne $listprefix[$i]); $i--)
{
if (length( "" . $number ) > 1)
{
last if $prefix_alternate eq $listprefix[$i];
}
}
my ($islist);
# Measure the indent from where the text starts, not where the
# prefix starts. This won't screw anything up, and if we don't do
# it, the next line might appear to be indented relative to this
# line, and get tagged as a new paragraph.
my ($total_prefix) = $line =~ /^(\s*[\w-+=\*o\267]+.\s*)/;
# Of course, we only use it if it really turns out to be a list.
$islist = 1;
$i++;
if (($i > 0) && ($i != $listnum))
{
&endlist($listnum - $i);
$islist = 0;
}
elsif (!$listnum || ($i != $listnum))
{
if (($line_indent > 0) || &is_blank($prev)
|| ($prev_action & ($BREAK | $HEADER)))
{
$islist = &startlist($prefix, $number, $rawprefix, $name);
}
else
{
# We have something like this: "- foo" which usually
# turns out not to be a list.
return;
}
}
&continuelist($prefix, $number, $rawprefix, $name) if ($mode & $LIST);
$line_indent = length($total_prefix) if $islist;
}
# Returns true if the passed string is considered to be preformatted
sub is_preformatted
{
(($_[0] =~ /\S[\W_]{$verbatim_min,}\S+/o)
|| (($_[0] =~ /^\s{$verbatim_white_min,}\S+/o)
&& ($_[0] =~ /\S[\W_]{$verbatim_post_min,}\S+/o)));
}
sub endpreformat
{
if ((($endpreformat_trigger_lines == 3) && &is_blank($line))
|| (($endpreformat_trigger_lines < 3)
&& ((!&is_preformatted($line)
&& ($endpreformat_trigger_lines == 1
|| !&is_preformatted($nextline))))))
{
$prev .= "\\end\{verbatim\}\n";
$mode ^= ($VERB & $mode);
$line_action |= $END;
}
}
sub preformat
{
if (($preformat_trigger_lines == 0
|| (&is_preformatted($line)
&& ($preformat_trigger_lines == 1
|| &is_preformatted($nextline))))
&& (&is_blank($prev) || !$preformat_after_blank))
{
$line =~ s/^/\\begin\{verbatim\}\n/;
$mode |= $VERB;
$line_action |= $VERB;
}
}
sub make_new_label
{
my ($heading_level) = @_;
my ($label, $i);
return sprintf("%d", $non_header_label++) if(!$heading_level);
$label = "";
$heading_count[$heading_level-1]++;
# Reset lower order counters
for($i=$#heading_count + 1; $i > $heading_level; $i--)
{
$heading_count[$i-1] = 0;
}
for($i=0; $i < $heading_level; $i++)
{
$heading_count[$i] = 1 if !$heading_count[$i]; # In case they skip any
$label .= sprintf("%d.", $heading_count[$i]);
}
chop($label);
$label;
}
sub label_heading
{
&endlist($listnum) if $listnum;
my ($level) = @_;
my ($label) = &make_new_label($level);
$line =~ s/=n=/$label/ if $numberheadings;
$line =~ s/$/\\label\{sec$label\}/;
$line =~ s/$trimheadings//g if $trimheadings;
}
sub heading_level
{
my ($style) = @_;
$heading_styles{$style} = ++$num_heading_styles
if !$heading_styles{$style};
$heading_styles{$style};
}
sub heading
{
my ($hoffset, $heading) = $line =~ /^(\s*)(.+)$/;
$hoffset = "" unless defined( $hoffset );
$heading = "" unless defined( $heading );
my ($uoffset, $underline) = $nextline =~ /^(\s*)(\S+)\s*$/;
$uoffset = "" unless defined( $uoffset );
$underline = "" unless defined( $underline );
my ($lendiff, $offsetdiff);
$lendiff = length($heading) - length($underline);
$lendiff *= -1 if $lendiff < 0;
$offsetdiff = length($hoffset) - length($uoffset);
$offsetdiff *= -1 if $offsetdiff < 0;
if (&is_blank($line)
||($lendiff > $underline_length_tolerance)
||($offsetdiff > $underline_offset_tolerance))
{
return;
}
$underline = substr($underline,0,1);
$underline .= "C" if &iscaps($line); # Call it a different style if the
# heading is in all caps.
$nextline = &getline; # Eat the underline
$heading_level = &heading_level($underline);
$line = "=n= " . $line if $numberheadings; # Mark where the number will go
&tagline("$heading_tag[$heading_level]$nosectionnumbers");
&label_heading( $heading_level );
$line_action |= $HEADER;
}
sub custom_heading
{
my ($i, $level);
for ($i=0; $i <= $#custom_heading_regexp; $i++)
{
if ($line =~ /$custom_heading_regexp[$i]/)
{
if ($explicitheadings)
{
$level = $i + 1;
}
else
{
$level = &heading_level("Cust" . $i);
}
$line = "=n= " . $line if $numberheadings; # Mark number slot
&tagline("$heading_tag[$level]$nosectionnumbers");
&label_heading($level);
$line_action |= $HEADER;
last;
}
}
}
sub unhyphenate
{
my ($second);
($second) = $nextline =~ /^\s*([^\W\d_]+[\)\}\]\.,:;\'\"\>]*\s*)/;
$nextline =~ s/^(\s*)[^\W\d_]+[\)\}\]\.,:;\'\"\>]*\s*/$1/;
$nextline = &getline if $nextline eq "";
$line =~ s/\-\s*$/$second/;
$line .= "\n";
}
sub untabify
{
my ($line) = @_;
while($line =~ /\011/)
{
$line =~ s/\011/" " x ($tab_width - (length($`) % $tab_width))/e;
}
$line;
}
sub tagit
{
my ($tag, $line) = @_;
if ($tag && !($tag =~ /null/))
{
$line =~ s/\A\s*(.*?)\s*\Z/\\$tag\{$1/;
my $brackets += ($tag =~ s/\{//g); # Count brackets in tag
$brackets -= ($tag =~ s/\}//g);
$line .= ("\}" x $brackets) . "\}";
}
$line;
}
sub tagline
{
my ($tag) = @_;
$line = &tagit($tag, $line) . "\n";
}
sub iscaps
{
local ($_) = @_;
# You may wish to add some chars to fit you needs
/^[^a-z]*[A-Z]{$min_caps_length,}[^a-z]*$/;
}
sub caps
{
if (&iscaps($line) && !&iscaps($nextline)
&& (&is_blank($prev) || ($line_action & $END)))
{
&tagline($caps_tag);
$line_action |= $CAPS;
}
}
sub solo
{
if (!&is_blank($line) && !($line_action & $CAPS) && &is_blank($nextline)
&& (&is_blank($prev) || ($line_action & $END))
&& !($line =~ /^\s*\\/)
&& !($line =~ /([^\.]\.|[\:\;\,\!\-\'\"]|\))\s*$/))
# You might want to add "?" to the last list
{
$line =~ s/^ *//g;
$line =~ s/ *$//g;
$line =~ s/ {3,}/\\hfill\{\}/g;
if ($line =~ /(\.{4,}|\_{4,})/)
{
$line =~ s/\.{4,}/\\dotfill\{\}/g;
$line =~ s/\_{4,}/\\hrulefill\{\}/g;
}
else
{
&tagline($solo_tag);
}
$line_action |= $SOLO;
}
}
# Convert very simple globs to regexps
sub glob2regexp
{
my ($glob) = @_;
# Escape funky chars
$glob =~ s/[^\w\[\]\*\?\|\\]/\\$&/g;
my ($regexp,$i,$len,$escaped) = ("",0,length($glob),0);
for (;$i < $len; $i++)
{
$char = substr($glob,$i,1);
if ($escaped)
{
$escaped = 0;
$regexp .= $char;
next;
}
if ($char eq "\\") {
$escaped = 1; next;
$regexp .= $char;
}
if ($char eq "?") {
$regexp .= "."; next;
}
if ($char eq "*") {
$regexp .= ".*"; next;
}
$regexp .= $char; # Normal character
}
"\\b" . $regexp . "\\b";
}
sub add_regexp_to_ruleset_table
{
my ($key,$short_cut,$switches) = @_;
# No sense adding a second one if it's already in there
if (!$ruleset_table{$key})
{
# Keep track of the order they were added so we can
# look for matches in the same order
push(@ruleset_table_order, ($key));
$ruleset_table{$key} = $short_cut; # Put it in The Table
$ruleset_switch_table{$key} = $switches;
print STDERR
" ($#ruleset_table_order)\tKEY: $key\n\tVALUE: $short_cut\n\tSWITCHES: $switches\n\n"
if ($dict_debug & 1);
}
else
{
if($dict_debug & 1)
{
print STDERR " Skipping entry. Key already in table.\n";
print STDERR "\tKEY: $key\n\tVALUE: $short_cut\n\n";
}
}
}
sub add_literal_to_ruleset_table
{
my ($key,$short_cut,$switches) = @_;
$key =~ s/(\W)/\\$1/g; # Escape non-alphanumeric chars
$key = "\\b$key\\b"; # Make a regexp out of it
&add_regexp_to_ruleset_table($key,$short_cut,$switches);
}
sub add_glob_to_ruleset_table
{
my ($key,$short_cut,$switches) = @_;
&add_regexp_to_ruleset_table(&glob2regexp($key),$short_cut,$switches);
}
# This is the only function that you would need to change, if you were to
# use a different dictionary file format.
sub parse_dict
{
my ($dictfile, $dict) = @_;
print STDERR "Parsing dictionary file $dictfile\n" if ($dict_debug & 1);
$dict =~ s/^\#.*$//g; # Strip lines that start with '#'
$dict =~ s/^.*[^\\]:\s*$//g; # Strip lines that end with unescaped ':'
if($dict =~ /->\s*->/)
{
$message = "Two consecutive '->'s found in $dictfile\n";
# Print out any useful context so they can find it.
($near) = $dict =~ /([\S ]*\s*->\s*->\s*\S*)/;
$message .= "\n$near\n" if $near =~ /\S/;
die $message;
}
while ($dict =~ /\s*(.+)\s+\-+([\-ieofFt]+\-+)?\>\s*(.*\S+)\s*\n/ig)
{
my ($key, $short_cut,$switches,$options);
$key = $1;
$options = $2;
$options = "" unless defined($options);
$short_cut = $3;
$switches = 0;
$switches += 1 if $options =~ /i/; # Case insensitivity
$switches += 2 if $options =~ /e/; # Evaluate as Perl code
$switches += 4 if $options =~ /o/; # Do only once
$switches += 8 if $options =~ /f/; # Footnote only
$switches += 16 if $options =~ /F/; # Footnote plus triggering text
$switches += 32 if $options =~ /t/; # Footnote plus triggering text
$key =~ s/\s*$//; # Chop trailing whitespace
if($key =~ m|^/|) # Regexp
{
$key = substr($key,1);
$key =~ s|/$||; # Allow them to forget the closing /
&add_regexp_to_ruleset_table($key,$short_cut,$switches);
}
elsif($key =~ /^\|/) # alternate regexp format
{
$key = substr($key,1);
$key =~ s/\|$//; # Allow them to forget the closing |
$key =~ s|/|\\/|g; # Escape all slashes
&add_regexp_to_ruleset_table($key,$short_cut,$switches);
}
elsif ($key =~ /\"/)
{
$key = substr($key,1);
$key =~ s/\"$//; # Allow them to forget the closing "
&add_literal_to_ruleset_table($key,$short_cut,$switches);
}
else
{
&add_glob_to_ruleset_table($key,$short_cut,$switches);
}
}
}
sub make_dictionary_ruleset_code
{
my ($i,$pattern,$switches,$options,$code,$rule_item);
$code = <);
close(DICT);
&parse_dict($dict, $contents);
}
&make_dictionary_ruleset_code;
}
sub make_dictionary_ruleset
{
eval "&dynamic_make_dictionary_ruleset;";
warn $@ if $@;
}
sub getline
{
my ($line);
$line = <>;
$line = "" unless defined ($line);
if (!$mac) # Chop trailing whitespace and DOS CRs
{
$line =~ s/[ \011]*\015$//;
}
else
{
$line =~ s/[ \011]*$//;
}
$line = &untabify($line); # Change all tabs to spaces
if ($trim =~ /^\d+$/)
{
$line =~ s/^.{$trim}//; # Trim lines if requested by --trim
}
else
{
$line =~ s/$trim//g; # Assume $trim is a regexp
}
$line;
}
sub main
{
$* = 1; # Turn on multiline searches
push(@ruleset_dictionaries,($default_ruleset_dict))
if ($make_ruleset && (-f $default_ruleset_dict));
&deal_with_options;
if($make_ruleset)
{
push(@ruleset_dictionaries,($system_ruleset_dict))
if -f $system_ruleset_dict;
&load_dictionary_ruleset;
}
$non_header_label = 0;
# Moved this way up here so we can grab the first line and use it
# as the title (if --titlefirst is set)
$mode = 0;
$listnum = 0;
$list_indent = "";
$line_action = $NONE;
$prev_action = $NONE;
$prev_line_length = 0;
$prev_indent = 0;
$prev = "";
$line = &getline;
$nextline = 0;
$nextline = &getline if $line;
# Skip leading blank lines
while (&is_blank($line) && $line)
{
$prev = $line;
$line = $nextline;
$nextline = &getline if $nextline;
}
if (!$extract)
{
print "$doctype\n\n" unless !$doctype;
print "$usepackages\n" if $usepackages;
print "\\typeout\{Using LaTeX hooks with prefix ``$latexhook''.}\n"
if $latexhookmode;
&hook("$latexhook-HeadB", "", "No HeadB")
if ($latexhookmode & 1);
# if --titlefirst is set and --title isn't, use the first line
# as the title.
if ($titlefirst && !$title)
{
($title) = $line =~ /^ *(.*)/; # grab first line
$title =~ s/ *$//; # strip trailing whitespace
$line = "";
}
if ($batchmode)
{
print "\\batchmode\n\n";
}
if ($nopagenumbers)
{
print "\\pagestyle\{empty\}\n\n";
}
# Setting the margins as below will give 1in margins for
# any paper size.
if ($smallmargins & 1) # x margins
{
print "\\setlength\{\\oddsidemargin\}\{0in\}\n";
print "\\setlength\{\\textwidth\}\{\\paperwidth\}\n";
print "\\addtolength\{\\textwidth\}\{-2in\}\n";
print "\\setlength\{\\marginparwidth\}\{.7in\}\n\n";
}
if ($smallmargins & 2) # y margins
{
print "\\setlength\{\\headheight\}\{0in\}\n";
print "\\setlength\{\\headsep\}\{0in\}\n";
print "\\setlength\{\\textheight\}\{\\paperheight\}\n";
print "\\addtolength\{\\textheight\}\{-2in\}\n";
print "\\setlength\{\\topmargin\}\{0in\}\n\n";
}
if ($samepageverbatim || ($escapemode & $ESCVERT))
{
print "\\let\\olddospecials=\\dospecials\n";
print "\\def\\dospecials\{$samepageverbatim";
print "\\catcode\`\|=11 "
if ($escapemode & $ESCVERT);
print "\\olddospecials\}\n\n";
}
print "\\let\\leftangle=<\n" if ($escapemode & $ESCANGLES);
print "\\let\\rightangle=>\n" if ($escapemode & $ESCANGLES);
print "\\let\\doublequote=\"\n" if ($escapemode & $ESCDQUOTE);
print "\\let\\dollar=\\\$\n" if ($escapemode & $ESCDOLLARS);
print "\\catcode\`\\\_=\\active\n" if ($escapemode & $ESCSCRIPTS);
print "\\catcode\`\\<=\\active\n" if ($escapemode & $ESCANGLES);
print "\\catcode\`\\\>=\\active\n" if ($escapemode & $ESCANGLES);
print "\\catcode\`\\\|=\\active\n" if ($escapemode & $ESCVERT);
print "\\catcode\`\\\"=\\active\n" if ($escapemode & $ESCDQUOTE);
print "\\catcode\`\\\$=\\active\n" if ($escapemode & $ESCDOLLARS);
# We use mboxes to avoid errors if equations are added to be source,
# and it works in old versions of LaTeX too.
print "\\def<\{\\mbox\{\\\(\\leftangle\\\)\}\}\n"
if ($escapemode & $ESCANGLES);
print "\\def>\{\\mbox\{\\\(\\rightangle\\\)\}\}\n"
if ($escapemode & $ESCANGLES);
print "\\def\|\{\\mbox\{\\\(\\mid\\\)\}\}\n"
if ($escapemode & $ESCVERT);
print "\\def\"\{\\mbox\{\\texttt\{\\doublequote\}\}\}\n"
if ($escapemode & $ESCDQUOTE);
print "\\def\$\{\\dollar}\n"
if ($escapemode & $ESCDOLLARS);
print "\\catcode\`\\\%=11\n" if ($escapemode & $ESCPERCENT);
print "\\catcode\`\\\#=11\n" if ($escapemode & $ESCNUM);
print "\\catcode\`\\\&=11\n" if ($escapemode & $ESCAND);
print "\\catcode\`\\\^=11\n" if ($escapemode & $ESCSCRIPTS);
print "\\catcode\`\\\~=11\n" if ($escapemode & $ESCTILDA);
print "\n"
if $escapemode & ($ESCPERCENT|$ESCNUM|$ESCAND|$ESCSCRIPTS
|$ESCTILDA);
print "\\setlength\{\\parindent\}\{0em\}\n";
print "\\setlength\{\\parskip\}\{2ex\}\n\n";
&hook("$latexhook-HeadE", "", "No HeadE")
if ($latexhookmode & 2);
print "\\begin\{document\}\n\n";
if ($title)
{
$title = &tagit($titletag, $title);
print "$title\\vskip 4ex\n\n";
}
}
&hook("$latexhook-BodyB", "", "No BodyB")
if ($latexhookmode & 4);
print "\\sloppy\n\n" if $sloppy;
unless($extract)
{
print "\\iffalse\n";
print "Converted from plain text with txt2tex $version\n";
$mytime = gmtime(time);
$mytime =~ s/\s+/ /g;
$mytime =~ s/\:\d\d / /;
print "on $mytime GMT.\n\n".'$Id: txt2tex.pl 4051 2007-07-01 07:38:34Z kalvis $'."\n\\fi\n";
}
$clearlines = 1;
do
{
if (!$ruleset_only)
{
$line_length = length($line); # Do this before tags are added
$line_indent = &count_indent($line, $prev_indent);
&endpreformat
if (($mode & $VERB) && ($preformat_trigger_lines != 0));
&preformat
if (!($line_action & ($HEADER | $LIST | $MAILHEADER))
&& !($mode & ($VERB | $LIST))
&& !($prev_action & $MAILHEADER)
&& ($endpreformat_trigger_lines != 0));
if (!($mode & $VERB))
{
&escape if ($escapemode & $ESCBSLASH);
&unhyphenate
if ($unhyphenation && ($line =~ /[^\W\d_]\-$/) #end hyphen
&& ($nextline =~ /^\s*[^\W\d_]/) # starts with letters
&& !($mode & ($HEADER | $MAILHEADER | $BREAK)));
&mailstuff if ($mailmode && !($line_action & $HEADER));
&hrule;
&custom_heading if (($#custom_heading_regexp > -1)
&& !($line_action & $HRULE));
&heading if (!$explicitheadings
&& !($line_action & ($HRULE | $HEADER))
&& $nextline =~ /^\s*[=\-\*\.~\+\^]+\s*$/);
# Maybe we should add $MAILHEADER to the list below.
&liststuff
if (!&is_blank($line)
&& !($line_action & ($HRULE | $HEADER)));
# Give preformat another chance now we have tried lists
# You can remove the following block with little loss.
&preformat
if (($line_action & $END)
&& !($line_action & ($HEADER | $LIST | $MAILHEADER
| $HRULE))
&& !($mode & $LIST) && !($prev_action & $MAILHEADER)
&& ($endpreformat_trigger_lines != 0));
}
if (!($mode & $VERB))
{
¶graph unless ($prev_action & $BREAK);
&shortline unless (($mode & $CAPS)
|| ($line_action & $PAR));
&caps if $caps_tag;
&solo if ($solo_tag && !($mode & $LIST)
&& !($line_action & ($HEADER|$MAILHEADER|$HRULE)));
}
}
if (!($mode & $VERB))
{
&make_dictionary_ruleset # Trakgalvis removal
if ($make_ruleset # && !&is_blank($line)
&& $#ruleset_table_order);
&tidy;
# All the matching and formatting is done. Now we can
# replace non-ASCII characters with character entities.
@chars = split(//,$line);
foreach $_ (@chars)
{
$_ = $char_entities{$_} if defined($char_entities{$_});
}
$line = join("", @chars);
}
# Print it out and move on, but avoid printing too much white space
# to the LaTeX file.
if ($prev !~ /\A\s*\Z/)
{
print "\n" if $clearlines ;
print $prev unless ($prev_action & $MAILHEADER)
&& ($mailmode & $MAILBODY);
if (($line_action & ($SOLO | $CAPS | $HEADER | $HRULE
| $PAR | $END))
|| ($prev_action & ($HEADER | $SOLO | $CAPS | $HRULE)))
# The last item ensures a blank line after such events.
# With current settings, we do not need ``$SOLO'' here,
# but I often forget to put it in for non-standard runs.
{
$clearlines = 1;
}
else
{
$clearlines = 0;
}
}
else
{
$clearlines++;
}
if (!&is_blank($nextline))
{
$prev_action = $line_action;
$line_action = $NONE;
$prev_line_length = $line_length;
$prev_indent = $line_indent;
}
$prev = $line;
$line = $nextline;
$nextline = &getline if $nextline;
}
until (!$nextline && !$line && !$prev);
$prev = "";
&endlist($listnum) if ($mode & $LIST); # End all lists
print $prev;
print "\n" unless ($mode & $VERB);
print "\\end\{verbatim\}\n\n" if ($mode & $VERB);
&hook("$latexhook-BodyE", "", "No BodyE")
if ($latexhookmode & 8);
print "\\end\{document\}\n" if (!$extract);
}
&main();
__END__
#h
#h Option sets
#h ~~~~~~~~~~~
#h
#h Below the ``__END__'' in txt2tex you can put lists of command line
#h options after a ``keyword''; these can then be loaded by putting
#h ``-sw keyword'' on the command line. Note that ``\'' is a continuation
#h character, so long options can be put on several lines. These include:
#h
#h * remind --- turns the output of the unix remind program into nice LaTeX;
#h call remind using ``rem -n |sort''.
remind -ec 1919 -sm -nv -s 100 -t Appointments -np -tt section*{\today:~~ \
-de \d\d\d\d.(\d\d.\d\d(\s*\d\d.\d\d)?)
#h * num --- picks out simple numbered headings.
num -H ^\s*\d+\.\s -H ^\s*\d+\.\d+\.\s -H ^\s*\d+\.\d+\.\d+\.\s -ns
#h * n --- a variant of the above.
n -H ^\s*\d+\.\s -H ^\s*\d+\.\d+\.\s -H ^\s*\d+\.\d+\.\d+\.\s -TH (\d+\.)+\s+
ind -H ^\s{2}[^\d\s\*] -H ^\s{4}[^\d\s\*] -H ^\s{6}[^\d\s\*]
#h * plain --- a very plain style, which is good for university work!
plain +rs -ec -ns -np -ct off -st off
#h * trim --- removes leading spaces before txt2tex processes the line.
trim -tr ^[^\S\n]*
#h * lynx --- for lynx browser output.
lynx +rs -sm 1 -tr \[.*?\]-? -ec -ss 1ex -H ^\w
ltc +rs -sm -tr \[.*?\]-? -ec -ss 1ex -H ^\w -tc -11 -ns -tt section* -nv
lyn +rs -sm 1 -tr \[.*?\]-? -ec -ss 1ex
#h * noL --- normally \014 produces a LaTeX ``clearpage'', but this option
#h removes \014 before txt2tex sees the line.
noL -tr \014
#h * HH --- this is what I use to print the ``Happy Hacker'' newsletter.
HH -ec -r off -ns -s 60 +rs
#h * man --- useful for dealing with unix man pages, but could be better!
man -tr .\010 -bm -ec +rs -pb 3 -pe 3 -p 20,6,2 -11pt -sm -sv -pi 10
grof -tr .\010
#h * pagesec --- each new section starts on a new page.
pagesec -HT clearpage\section,subsection,subsubsection,paragraph,subparagraph
#h * pagesubsec --- each subsection starts on a new page.
pagesubsec -HT clearpage\section,clearpage\subsection,subsubsection,paragraph
#h * slides --- turns plain text into (very) simple slides. You might also
#h want to set ``noverbatim''. Note that many of the standard options will
#h not work with switch ``slides'' set.
slides -ec -sloppy +rs -dt \documentclass{slides} -ct textbf -st textbf \
-HT clearpage\textbf{\centerline{\Large,textbf{\Large,textbf{\large,textbf \
-up times
#h * handout --- used for student handouts.
handout -ec 0 +rs -sm -ns -HT subsection,subsubsection,subsubsubsection \
-ct subsection* -st subsection*
#h * letter --- used for writing letters, but you need to define your own
#h letter-hook files with your address etc.
letter -d -ec -dt \documentclass[12pt]{letter} -lh letter -ct textbf -st off \
-HT textbf{\large,textbf{\emph +rs -rs letter -lh 9
#h * preview --- not for LaTeXing, but marks up the file in a manner to show
#h you what txt2tex was thinking; this can help in choosing the right tags
#h etc. for the print run. It can be followed by other options, so you can
#h see how that changes the mark up. It is also useful for debugging, but that
#h is probably my job [:-)]
preview -e -ec -ct Caps -st Solo -HT HeadOne,HeadTwo,HeadThree,HeadFour \
-ss 1ShortLine -t Title -tt TitleTag
#h * dict --- turns a list of the form `word: text' into a LaTeX description
#h environment.
dict -de (\w+?\:)
#h * phone --- turns a list of the form `phrase: text' into a LaTeX description
#h environment. I use this for a personal phone book.
phone -de (.*\:) +rs -11 -sm
# ``word space word'' or ``word'' with more than two letters.
di -de (((\w+)\s*){2}\:)
#h * fn --- turns fancy numbered lists, with numbers like 1.1.1, into LaTeX
#h description environments. Often useful for printing contracts off the net!
fn -pi 10 -de ((\d+\.)+(\d+)?) +rs
#h * lpr --- used as part of a fancy plain text printer filter.
lpr -d -ec -sm +rs -nv -ns -np -r off
lpr2 -d -ec -sm +rs -nv -ns -np -r off -tc
#h * lpn --- used by the Lockpicker Network.
lpn -tf -st off -de \*\s+(\w+)\s+\-\-\- -d -ec -sm +rs -nv -ns -r off -11
#h * netrc --- used to print a .netrc file.
netrc -d -sm -de machine(.*)login
# Used by the ``--demo'' option.
demo -bm -ns -ec -tc -10 -de ^\s\*\s(\S+) +rs -sm -sv
AB -de \*.* -t Addresses -sm -s 1000 -tc -nv -d
# Used to print a single simple page
one +rs -nv -ns -np -r off
# Use for my weather reports
laiks -pb 1 -tf -np
#===========================================================================
#H
#H A sample ruleset
#H ~~~~~~~~~~~~~~~~
#H
#H Txt2tex by default tries to load a file called ``.txt2tex-ruleset'' from
#H your home directory (assuming you are using a unix system). This file, if
#H it exists, contains transformation rules that are executed AFTER all other
#H txt2tex subroutines with the exception of ``tidy'' (which does a little
#H cleaning up) and the escaping of ``funny'' characters. Strange behaviour
#H can result from not keeping the time of execution in mind.
#h
#h I most often use ``rulesets'' for writing my own documents in plain text, to
#h be transformed later by txt2tex into LaTeX. So let us look at rules
#h that help in such tasks. Each rule must be on a single line in the ruleset
#h file.
#H
#H /<<(.*?)>>/ -f-> $1
#h
#h The ``-f->'' type rule, when the regular expression on the left matches,
#h takes the expression on the right and turns it into a footnote, then
#h removes the triggering text. So the above example transforms
#h ``Kalvis M. Jansons<>'' into
#h ``Kalvis M. Jansons\footnote{Mathematics, UCL}'' in the LaTeX output.
#H
#H Kalvis M. Jansons -Fo-> Email: kalvis\@jansons.org
#h
#h The ``-F->'' type rules are the same as the ``-f->'' ones, but do not
#h remove the triggering text. So the above rule adds a footnote with my email
#h address to my name. So that this happens once only per document, I have
#h added the ``o'' (for once) in the rule.
#H
#H /txt2tex/ -oi-> TXT2TeX \\emph{(written by Kalvis)}
#H
#H /pheonix/ ---> phoenix
#h
#h The above rules are simple transformations, the first is case insensitive,
#h hence the ``i'', and is executed once only. The second corrects a common
#h spelling error (every time it occurs).
#H
#H /tagad/ -ie-> my $time = localtime(time); $time =~ s/\:\d\d\s.*//; $time
#h
#h The ``e'' option means evaluate the righthand side as a perl expression.
#h So the above expression turns ``tagad'' (the Latvian for ``now'') into the
#h current date and time (and removes ``tagad''). The ``e'' option can also
#h be used to change the value of txt2tex parameters while running, by setting
#h them when certain patterns are first encountered.
#H
#H /\*([a-z][a-z ]*[a-z])\*/ -ti-> emph
#H
#H /\*([a-z])\*/ -ti-> emph
#h
#h The ``t'' option is used to tag the text in (), so leads to a shorter
#h rule than could be obtained using the above rules to do this job.
#h The above rules put any sequence of letters and spaces which are between
#h two stars in the LaTeX ``emph'' style. This use of ``*'' is often seen
#h in plain text ``readme'' files.
#H
#H /<\*(.*?)\*>/ -tfi-> textbf
#H
#H Putting a few bits together, we can turn anything in <* ... *> into a
#H ``textbf'' footnote, but I am sure you can think of a better application.
#h
#h Saving the sample ruleset
#h .........................
#h
#h If you want to save this sample ruleset to adapt for your own use, type
#h ``txt2tex -sampleruleset > ~/.txt2tex-ruleset'',
#h
#h or direct it into a different file if you do not want it to be the default.
#h
#h Getting help
#h ~~~~~~~~~~~~
#h
#h Please contact me (Kalvis) with any problems or suggestions.
#h
#h Bugs
#h ~~~~
#h
#h Send any bug reports to me, and I will do my best to fix them, but note that
#h there is a limit to what txt2tex can be expected to do on poorly formatted
#h text files. For such files, it is often better to fix the worst features
#h before giving them to txt2tex; then there should not be the need to do much
#h work, if any, on the LaTeX file produced.
#h
#h Ensure that you are using the latest version, which can be obtained from
#h any CTAN site.
#h
#h Kalvis@Jansons.org
# $Id: txt2tex.pl 4051 2007-07-01 07:38:34Z kalvis $
# ``Kalvis'' is the name of a Baltic God; a ``magical'' blacksmith who created
# the Sun and placed Her in the heavens.