@q Copyright 2012-2024, Alexander Shibakov@> @q This file is part of SPLinT@> @q SPLinT is free software: you can redistribute it and/or modify@> @q it under the terms of the GNU General Public License as published by@> @q the Free Software Foundation, either version 3 of the License, or@> @q (at your option) any later version.@> @q SPLinT is distributed in the hope that it will be useful,@> @q but WITHOUT ANY WARRANTY; without even the implied warranty of@> @q MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the@> @q GNU General Public License for more details.@> @q You should have received a copy of the GNU General Public License@> @q along with SPLinT. If not, see .@> @** The name parser. What follows is an example parser for the term name processing. This approach (i.e. using a `full blown' parser/scanner combination) is probably not the best way to implement such machinery but its main purpose is to demonstrate a way to create a separate parser for local purposes. The name parser is what allows one to automatically typeset term names such as \.{example1} and \.{\%option\_name} as \prodstyle{example1} and \prodstyle{\%option_name}. @q The reference to \prodstyle{example1} above serves a dual role of correcting@> @q an owerfull \vbox in the index.@> % We include the macros here since this file is intended to be % included by the documentation `aggregator' so putting bare \TeX\ % at the beginning of the file runs the risk of producing and error % of having \TeX\ material inside a \Cee\ section. \let\currentparsernamespace\parsernamespace \let\parsernamespace\smallnamespace \let\hostparsernamespace\smallnamespace \input stokenset.sty \let\parsernamespace\currentparsernamespace @(small_parser.yy@>= @G Switch to generic mode. %{@> @ @=%} @> @ @= %union {@> @ @=} %{@> @ @=%} @> @ @= %% @> @ @= %% @g @ @= @G %token-table %debug %start full_name @g @ @= @G %token PERCENT_IDENTIFIER %token IDENTIFIER %token OPTIONAL NO_ATTR EXTENDED LT RT %token INTEGER %token WILDCARD C_ESCCHAR %token META_IDENTIFIER @g @ @= @G full_name: identifier_string suffixes.opt {@> @ @=} | META_IDENTIFIER {@> @ @=} | quoted_name suffixes.opt {@> @ @=} ; identifier_string: PERCENT_IDENTIFIER {@> @ @=} | IDENTIFIER {@> @ @=} | '<' IDENTIFIER '>' {@> @ @=} | '\'' WILDCARD '\'' {@> @ @=} | '\'' C_ESCCHAR '\'' {@> @ @=} | '\'' '>' '\'' {@> @'} string@> @=} | '\'' '<' '\'' {@> @ @=} | '\'' '.' '\'' {@> @ @=} | '\'' '_' '\'' {@> @ @=} | '\'' '-' '\'' {@> @ @=} | '\'' '$' '\'' {@> @ @=} | '$' {@> @ @=} | qualifier {@> @ @=} | identifier_string IDENTIFIER {@> @ @=} | identifier_string qualifier {@> @ @=} | identifier_string INTEGER {@> @ @=} ; quoted_name: '\"' PERCENT_IDENTIFIER '\"' {@> @ @=} | '\"' IDENTIFIER '\"' {@> @ @=} ; suffixes.opt: {@> TeX_( "/yy0{}" ); @=} | '.' {@> TeX_( "/yy0{/nx/dotsp/nx/sfxnone}" ); @=} | '.' suffixes {@> @ @=} | '.' qualified_suffixes {@> @ @=} ; suffixes: IDENTIFIER {@> @ @=} | INTEGER {@> @ @=} | suffixes '.' {@> @ @=} | suffixes IDENTIFIER {@> @ @=} | suffixes INTEGER {@> @ @=} | qualifier '.' {@> TeX_( "/yy0{/nx/sfxn/the/yy(1)/nx/dotsp}" ); @=} | suffixes qualifier '.' {@> TeX_( "/yy0{/the/yy(1)/nx/sfxn/the/yy(2)/nx/dotsp}" ); @=} ; qualified_suffixes: suffixes qualifier {@> @ @=} | qualifier {@> @ @=} ; @t}\vb{\inline\flatten}{@> qualifier: OPTIONAL {@> TeX_( "/yy0{/the/yy(1)}" ); @=} | NO_ATTR {@> TeX_( "/yy0{/the/yy(1)}" ); @=} | EXTENDED {@> TeX_( "/yy0{/the/yy(1)}" ); @=} | LT {@> TeX_( "/yy0{/the/yy(1)}" ); @=} | RT {@> TeX_( "/yy0{/the/yy(1)}" ); @=} ; @g @ @= @[TeX_( "/yy0{/the/yy(1)/the/yy(2)}/namechars/yyval" );@]@; @ @= @[TeX_( "/getfirst{/yy(1)}/to/toksa" );@]@; @[TeX_( "/getsecond{/yy(1)}/to/toksb" );@]@; @[TeX_( "/yy0{/nx/idstr{/the/toksa}{/the/toksb}}/namechars/yyval" );@]@; @ @= @[TeX_( "/getfirst{/yy(1)}/to/toksa" );@]@; @[TeX_( "/getsecond{/yy(1)}/to/toksb" );@]@; @[TeX_( "/yy0{/nx/optstr{/the/toksa}{/the/toksb}}" );@]@; @ @= @[TeX_( "/getfirst{/yy(1)}/to/toksa" );@]@; @[TeX_( "/getsecond{/yy(1)}/to/toksb" );@]@; @[TeX_( "/yy0{/nx/idstr{/the/toksa}{/the/toksb}}" );@]@; @ Tags are recognized as a separate syntax element although no special processing is performed by the name parser or the associated macros. @= @[TeX_( "/getfirst{/yy(2)}/to/toksa" );@]@; @[TeX_( "/getsecond{/yy(2)}/to/toksb" );@]@; @[TeX_( "/yy0{/nx/idstr{}{}}" );@]@; @ @= @[TeX_( "/getfirst{/yy(2)}/to/toksa" );@]@; @[TeX_( "/getsecond{/yy(2)}/to/toksb" );@]@; @[TeX_( "/sansfirst/toksb" );@]@; @[TeX_( "/yy0{/nx/chstr{/the/toksb}{/the/toksb}/nx/visflag{/nx/termvstring}{}}" );@]@; @ @= @[TeX_( "/getsecond{/yy(2)}/to/toksb" );@]@; @[TeX_( "/yy0{/nx/chstr{/the/toksb}{/the/toksb}/nx/visflag{/nx/termvstring}{}}" );@]@; @ @= @[TeX_( "/yy0{/nx/chstr{<}{<}/nx/visflag{/nx/termvstring}{}}" );@]@; @ @'} string@>= @[TeX_( "/yy0{/nx/chstr{/greaterthan}{/greaterthan}/nx/visflag{/nx/termvstring}{}}" );@]@; @ @= @[TeX_( "/yy0{/nx/chstr{/uscoreletter}{/uscoreletter}/nx/visflag{/nx/termvstring}{}}" );@]@; @ @= @[TeX_( "/yy0{/nx/chstr{-}{-}/nx/visflag{/nx/termvstring}{}}" );@]@; @ @= @[TeX_( "/yy0{/nx/chstr{/safemath}{/safemath}/nx/visflag{/nx/termvstring}{}}" );@]@; @ @= @[TeX_( "/yy0{/nx/chstr{.}{.}/nx/visflag{/nx/termvstring}{}}" );@]@; @ @= @[TeX_( "/yy0{/nx/bidstr{/nx/$}{/safemath}}" );@]@; @ @= @@; @ @= @[TeX_( "/getsecond{/yy(1)}/to/toksa" );@]@; @[TeX_( "/appendr/toksa{/space}" );@]@; @[TeX_( "/getfirst{/yy(2)}/to/toksb" );@]@; @[TeX_( "/concat/toksa/toksb" );@]@; @[TeX_( "/getthird{/yy(1)}/to/toksb" );@]@; @[TeX_( "/appendr/toksb{/space}" );@]@; @[TeX_( "/getsecond{/yy(2)}/to/toksc" );@]@; @[TeX_( "/concat/toksb/toksc" );@]@; @[TeX_( "/yy0{/nx/idstr{/the/toksa}{/the/toksb}}" );@]@; @ @= @ @ An integer at the end of an identifier (such as |id1|) is interpreted as a suffix (similar to the way \MF\ treats identifiers, and \mft\ typesets them,\footnote{This allows, for example, names like |$[term0]| while leaving |$[char2int]| in its `natural' form.} as \prodstyle{id1}) to mitigate a well-intentioned but surprisingly inconvenient feature of \CTANGLE, namely outputting something like \.{id.1} as \.{id\ .1} in an attempt to make sure that integers do not interfere with structure dereferences. For this to produce meaningful results, a stricter interpretation of \prodstyle{IDENTIFIER} syntax is required, represented by the \flexrenstyle{id\_strict} syntax \locallink{id_strict definition}below\endlink. @= @[TeX_( "/yy0{/the/yy(1)/nx/dotsp/nx/sfxi/the/yy(2)}" );@]@; @ @= @[TeX_( "/getfirst{/yy(2)}/to/toksa" );@]@; @[TeX_( "/getsecond{/yy(2)}/to/toksb" );@]@; @[TeX_( "/yy0{/nx/idstr{/the/toksa}{/the/toksb}/nx/visflag{/nx/termvstring}{}}" );@]@; @ @= @[TeX_( "/getfirst{/yy(2)}/to/toksa" );@]@; @[TeX_( "/getsecond{/yy(2)}/to/toksb" );@]@; @[TeX_( "/yy0{/nx/optstr{/the/toksa}{/the/toksb}/nx/visflag{/nx/termvstring}{}}" );@]@; @ @= @[TeX_( "/yy0{/nx/dotsp/the/yy(2)}" );@]@; @ @= @@; @ @= @[TeX_( "/yy0{/nx/sfxn/the/yy(1)}" );@]@; @ @= @[TeX_( "/yy0{/nx/sfxi/the/yy(1)}" );@]@; @ @= @[TeX_( "/yy0{/the/yy(1)/nx/dotsp}" );@]@; @ @= @[TeX_( "/yy0{/the/yy(1)/nx/sfxi/the/yy(2)}" );@]@; @ @= @[TeX_( "/yy0{/the/yy(1)/nx/sfxn/the/yy(2)}" );@]@; @ @= @[TeX_( "/yy0{/the/yy(1)/nx/qual/the/yy(2)}" );@]@; @ @= @[TeX_( "/yy0{/nx/qual/the/yy(1)}" );@]@; @ \Cee\ preamble. In this case, there are no `real' actions that our grammar performs, only \TeX\ output, so this section is empty. @= @ \Cee\ postamble. It is tricky to insert function definitions that use \bison's internal types, as they have to be inserted in a place that is aware of the internal definitions but before said definitions are used. @= @ Union of types. @= @** The name scanner. The scanner for lexing term names is admittedly {\em ad hoc\/} and rather redundant. A minor reason for this is to provide some flexibility for name typesetting. Another reason is to let the existing code serve as a template for similar procedures in other projects. At the same time, it must be pointed out that this scanner is executed multiple times for every \bison\ section, so its efficiency directly affects the speed at which the parser operates. @(small_lexer.ll@>= @G @> @ @= %{@> @ @=%} @> @ @= %% @> @ @= %% @O void define_all_states( void ) { @@; } @o @g @ \namedspot{id_strict definition}The tokens consumed by the name parser must represent a relatively fine classification of various identifier substrings to be able to detect various suffixes. @= @@; @G(fs1) letter [_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ] c-escchar \\[fnrtv] wc ([^\\\'\"$.<>]{-}[_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0-9]|\\.) id {letter}({letter}|[-0-9])* id_strict {letter}(({letter}|[-0-9])*{letter})? meta_id "*"{id_strict}"*"? int [0-9]+ @g @ @= #define _register_name( name ) @[Define_State( #name, name )@] /* nothing for now */ #undef _register_name @ Strings and characters in directives/rules. @= @G(fs1) %x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER @g @ @= #include #include @ @= @G(fs1) %option bison-bridge %option noyywrap nounput noinput reentrant %option noyy_top_state %option debug %option stack %option outfile="small_lexer.c" @g @ @= @@; @@; @ White space skipping. @= @G(fs2) [ \f\n\t\v] {@> @[TeX_( "/yylexnext" );@]@=} @g @ This collection of regular expressions might seem redundant, and in its present state, it certainly is. However, if later on the typesetting style for some of the keywords would need to be adjusted, such changes would be easy to implement, since the template is already here. @= @G(fs2) "%binary" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=} "%code" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=} "%debug" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=} "%default-prec" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=} "%define" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=} "%defines" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=} "%destructor" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=} "%dprec" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=} "%empty" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=} "%error-verbose" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=} "%expect" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=} "%expect-rr" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=} "%file-prefix" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=} "%fixed-output-files" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=} "%initial-action" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=} "%glr-parser" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=} "%language" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=} "%left" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=} "%lex-param" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=} "%locations" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=} "%merge" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=} "%name-prefix" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=} "%no-default-prec" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=} "%no-lines" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=} "%nonassoc" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=} "%nondeterministic-parser" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=} "%nterm" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=} "%output" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=} "%param" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=} "%parse-param" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=} "%prec" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=} "%precedence" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=} "%printer" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=} "%pure-parser" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=} "%require" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=} "%right" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=} "%skeleton" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=} "%start" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=} "%term" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=} "%token" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=} "%token-table" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=} "%type" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=} "%union" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=} "%verbose" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=} "%yacc" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=} "%default"[-_]"prec" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=} "%error"[-_]"verbose" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=} "%expect"[-_]"rr" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=} "%fixed"[-_]"output"[-_]"files" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=} "%name"[-_]"prefix" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=} "%no"[-_]"default"[-_]"prec" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=} "%no"[-_]"lines" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=} "%pure"[-_]"parser" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=} "%token"[-_]"table" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=} "%"({letter}|[0-9]|[-_]|"%"|[<>])+ {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=} @t}\vb{\insertraw{\inscomment{\it suffixes}}}{@> "opt" {@> @[TeX_( "/yylexreturnval{OPTIONAL}" );@]@=} "na" {@> @[TeX_( "/yylexreturnval{NO_ATTR}" );@]@=} "ext" {@> @[TeX_( "/yylexreturnval{EXTENDED}" );@]@=} "l" {@> @[TeX_( "/yylexreturnval{LT}" );@]@=} "r" {@> @[TeX_( "/yylexreturnval{RT}" );@]@=} @t}\vb{\insertraw{\inscomment{\it delimeters}}}{@> [<>$._\'\"] {@> @[TeX_( "/yylexreturnchar" );@]@=} {c-escchar} {@> @[TeX_( "/yylexreturnval{C_ESCCHAR}" );@]@=} {wc} {@> @[TeX_( "/yylexreturnval{WILDCARD}" );@]@=} @t}\vb{\insertraw{\inscomment{\it identifiers and other names}}}{@> {id_strict} {@> @[@@]@=} {meta_id} {@> @[@@]@=} {int} {@> @[TeX_( "/yylexreturnval{INTEGER}" );@]@=} @t}\vb{\insertraw{\inscomment{\it everything else}}}{@> . {@> @[@@]@=} @g @ @= @[TeX_( "/yylexreturnval{IDENTIFIER}" );@]@; @ @= @[TeX_( "/yylexreturnval{META_IDENTIFIER}" );@]@; @ @= @[TeX_( "/iftracebadchars" );@]@; @[TeX_( " /yyfatal{invalid character(s): /the/yytext}" );@]@; @[TeX_( "/fi" );@]@;