% Copyright 2012-2024, Alexander Shibakov % This file is part of SPLinT % % SPLinT is free software: you can redistribute it and/or modify % it under the terms of the GNU General Public License as published by % the Free Software Foundation, either version 3 of the License, or % (at your option) any later version. % % SPLinT is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with SPLinT. If not, see . %\def\mkpurebyte{\uccode`\@=\yycp@\uppercase{\yybytepure{@}}\uccode`\@=`\@} % make all symbol characters category 12; most macros are indifferent to this % however, if delimited macros are used to process the mathched text, the option % makes it easier to write such macros; \def\mkpurebyte{\uccode`\.=\yycp@\uppercase{\yybytepure{.}}\uccode`\.=`\.} \def\yyinput{\futurelet\next\yyinp@t} % get the code of the next character ... \def\yyinp@t{% test the code and decide whether to continue lexing the % token or return to the parser \ifcat\noexpand\next\bgroup \yybreak\yyinputgroup \else \if\noexpand\next\space % code 32 character token is assumed to % be an ordinary space (while category 10 tokens are not necessarily); % the reason for this choice is the way code 32 characters are treated % by \string (turned into `real' spaces); `funny' spaces of both kinds % (a character that differs from a `real' space (\catcode`\ ==10 % \number`\ ==32) in either category or character code) can be % created, however, category 10 characters of character code other % than 32 are very rare (and take some effort to produce), while % category 12, character code 32 tokens are poduced as a result of % `sanitizing' input all the time; note that the spaces are processed % at the lowest level of the input routine and thus cannot be % substituted by the switches below; this choice is not bullet proof % but should suffice for most uses; in the case of more polluted % input, an extra `sanitation' step can be performed first; if % desired, category 10 charactes can be assumed to be spaces at this % point, as well, although this will introduce an extra test, affecting % the (already poor) efficiency of the input macros. \yycp@=`\ % \yybyte={ }% \mkpurebyte \yybreak@\yyskipspace % return the space \else \if\noexpand\next\eolletter % cannot use ^^M here since TeX will simply drop the rest of the line \yycp@=\n \yybyte={\n}% \mkpurebyte \yybreak@@\yyskipspace % return the end of line character \else \yybreak@@\yy@np@t \fi \fi \yycontinue } % some \Cee\ escape characters; the rest are either silly (like \a and \b) or % are already defined to have other important functions in \TeX\ (such as \v and \t) \chardef\n=`\^^J \chardef\r=`\^^M \chardef\f=`\^^L \chardef\HT=`\^^I % ASCII horizontal tab \chardef\charseq1 \chardef\charac2 \chardef\chargroup3 \def\yyskipspace{\afterassignment\yyreturn\let\next= } \def\yyinputgroup#1{% \yycp@=\chargroup \yybyte={{#1}}% \mkpurebyte \yyreturn } \newif\ifyyinputdebug \def\yy@np@t#1{% start lexing: % whitespace (category 10) tokens are skipped % automatically \yybyte{#1}% \ifyyinputdebug \immediate\write16{read: \the\yybyte\space after: \the\yytext@seen}% \fi \ifx#1\ % a space token \yybreak\returnexplicitspace \else \expandafter\ischar\string#100\end \ifchar % a single character (not a control sequence) \ifcat\noexpand#1\noexpand~% is it an active character? \yycp@=\charac % yes, return it \mkpurebyte \yybreak@@\yyreturn \else % it is a non-active character ... % ... or a control sequence with an empty name (obtained by \csname\endcsname or % \enlinechar=-1 \toks0={\ % } this case can be handled by the switch below but is ignored for the moment) \let\default\achardefault \yybreak@@{\switchon{\the\yybyte}\in\acharswitch}% \fi \else % it is a control sequence, return it \expandafter\ischar\string#10\end \ifchar % it is a one-char control sequence \let\default\onechardefault \yybreak@@{\switchon{\the\yybyte}\in\onecharswitch}% \else \let\default\multichardefault \yybreak@@{\switchon{\the\yybyte}\in\multicharswitch}% \fi \fi \yycontinue } \chardef\explicitspacecode=`\ % \def\returnexplicitspace{% \yycp@=\explicitspacecode \mkpurebyte \yyreturn % keep looking for a non-whitespace token } \def\onecharswitch{ \raw \n \raw {% \yycp@=\n \mkpurebyte \yyreturn } \raw \^^M\raw {% \returnexplicitspace } } \def\onechardefault{% \expandafter\yycp@\expandafter`\the\yybyte\relax \mkpurebyte \yyreturn }% \def\multicharswitch{ \raw\vb\raw {% \vbunwrap } \raw\insertraw\raw {% \insertrawnext } \raw\stashed\raw {% \stashnext } \raw\format \formatlocal\raw {% \formatnext } \raw\formatbegin\raw {% \fmtbegin } \raw\formatp\raw {% \fmtparam } \raw\sflush\raw {% \sflushnext } \raw\yyeof\raw {% \yycp@=\YYENDOFBUFFERCHAR\relax \yybytepure={}% \yyreturn } \raw\inputboundary\raw {% \inputboundarynext } \raw\flatten \resetf \inline \skipheader\raw {% \expandafter\yyinput\expandafter\format\expandafter{\the\yybyte}% } \raw\fold \breakline\raw {% \expandafter\yyinput\expandafter\formatlocal\expandafter{\the\yybyte}% } \raw\breakahead\raw {% \expandafter\yyinput\expandafter\formatp\the\yybyte } \raw\break\raw {% for testing purposes \yycp@=`\ % \yybytepure={ }% \yyreturn } \raw\squashtermstrue\raw {% \yycp@=`\ % \yybytepure={ }% \squashtermstrue \yyreturn } \raw\endparse \endparseinput\raw {% \errmessage{internal error: reading past the end of the input buffer}% } } \def\multichardefault{% \yycp@\charseq \mkpurebyte \yyreturn }% % the single character switch `normalizes' the typesetting of underscores, as this % is a particularly tricky character to get right; in typewriter fonts there is a % `first class' underscore character so \_ may be defined as \char`\_ whereas in other fonts % there is no `native' underscore character so \_ may be defined as a rule of an appropriate % size \def\acharswitch{ \raw_\raw\_ {% the name parser will replace _ with \_ in subscripts \yycp@`\_\relax \yybyte{\_}% \mkpurebyte \yyreturn } } \def\achardefault{% \expandafter\yycp@\expandafter`\the\yybyte\relax \mkpurebyte \yyreturn } % the following commands are only here for debugging purposes % they slow down the input \setspecialcharsfrom\onecharswitch \setspecialcharsfrom\multicharswitch \setspecialcharsfrom\acharswitch % care should be taken with using the next command; it will only be executed % if the parser (rather the lexer) `sees' it as part of the input; thus if this % command is inserted at the end of the parsed text and, say, a bootstrap parser % terminates early, the command will not be executed; likewise, it may be executed % (more than) twice in case, for example, a parser stack is used and both (several) % parsers see it before the parsing is terminated. \def\insertrawnext#1{% insert a command #1\yyinput } \def\vbunwrap#1#2\vb{% \yyinput#1\stashed{#2}\vb } % stash and format streams (lists) \chardef\stashchar=`\ % \chardef\formatchar=`\ % \newcount\stashmarker \newcount\formatmarker \def\yystash{[stash]} \def\yyformat{[format]} \def\stashnextwithspace#1{% \yybytepure{ }\yycp@\stashchar \yybyte\expandafter{\the\yybyte{#1}}% \advance\stashmarker\@ne \ifnum\stashmarker<\showlistcounter\yystash\relax \else \appendtolist\yystash{#1}% \fi \yyreturn } % the mechanism for stash processing making stash invisible \def\stashnextwithnothing#1{% \yybyte\expandafter{\the\yybyte{#1}}\concat\yysubtext\yybyte \advance\stashmarker\@ne \ifnum\stashmarker<\showlistcounter\yystash\relax \else \appendtolist\yystash{#1}% \fi \ifyyinputdebug \immediate\write16{mid text: \the\yysubtext}% \fi \yyinput } % collect the stash in the stash list (stream) \let\stashnext\stashnextwithnothing \def\formatnext#1{% \yybytepure{ }\yycp@\formatchar \toksa{#1}% \advance\formatmarker\@ne \ifnum\formatmarker<\showlistcounter\yyformat\relax \else \expandafter\appendtolisti\expandafter\yyformat\expandafter{\the\yybyte{#1}}% insert the iterator sequence, as well \fi \yybyte\expandafter{\the\yybyte{#1}}% \yyreturn } \def\fmtbegin#1\fmtend{\formatnext{#1}} % multiparameter format sequences \def\fmtparam#1#2{\formatnext{#1{#2}}} % single parameter format sequences \chardef\boundarychar=`\ % % the following is a minimal setup of a parsing boundary \def\inputboundarynext#1{% `l' for left boundary \yybytepure{ }\yycp@\boundarychar \yybyte\expandafter{\the\yybyte{#1}}% \yyreturn % inserting something here will effectively insert it into the input stream } % the next macro is not implemented correctly (since after a backup the flushed % tokens may be reinserted; there needs to be a mechanism to remember that the % tokens are already in the stash stream \def\sflushnext#1#2{% #1 is the marker % #2 is the contents \yybytepure{ }\yycp@\stashchar % `save the flushed code' goes here \yybyte\expandafter{\the\yybyte{#1}{#2}}% \concat\yysubtext\yybyte \ifyyinputdebug \immediate\write16{mid text: \the\yysubtext}% \fi \yyreturn } \def\z@rotest{0} \newif\ifchar \def\ischar#1#2#3\end{% three parameters because #1 can be an % \escapechar \def\lastnamechar{#3}% \ifx\lastnamechar\z@rotest\chartrue\else\charfalse\fi } % trivial input routine \def\yyinputtrivial{\futurelet\next\yyinp@ttrivial} % get the code of the next character ... \def\yyinp@ttrivial{ \ifcat\noexpand\next\space % category 10 token \yycp@=`\ % \yybyte={ }% \mkpurebyte \yybreak\yyskipspace % return the space \else \yybreak\yy@np@ttrivial \yycontinue } \def\yy@np@ttrivial#1{% \ifcat\noexpand#1a% \yycp@`#1% \yybyte{#1}% \yybytepure{#1}% \else \if\noexpand#1\eolletter \yycp@=\n \yybyte\expandafter{\eolletter}% \mkpurebyte \else \ifx#1\yyeof \yycp@=\YYENDOFBUFFERCHAR\relax \yybyte{#1}% \yybytepure={}% \else \yycp@\charseq \mkpurebyte \fi \fi \fi \yyreturn }