@q Copyright 2012-2022, Alexander Shibakov@> @q This file is part of SPLinT@> @q SPLinT is free software: you can redistribute it and/or modify@> @q it under the terms of the GNU General Public License as published by@> @q the Free Software Foundation, either version 3 of the License, or@> @q (at your option) any later version.@> @q SPLinT is distributed in the hope that it will be useful,@> @q but WITHOUT ANY WARRANTY; without even the implied warranty of@> @q MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the@> @q GNU General Public License for more details.@> @q You should have received a copy of the GNU General Public License@> @q along with SPLinT. If not, see .@> @*1\eatone{Flex}\flex\ specific routines. The output of the scanner automaton follows the steps similar to the ones taken during the parser output. The major difference is in the output of actions and constants. @*2 Tables. As in the case of a parser we start with all the table names. @= _register_table_d(yy_accept)@; _register_table_d(yy_ec)@; _register_table_d(yy_meta)@; _register_table_d(yy_base)@; _register_table_d(yy_def)@; _register_table_d(yy_nxt)@; _register_table_d(yy_chk)@; @*2Actions. The scanner function, |yylex()|, has been reverse engineered to execute all portions of the action code. The method chosen here makes sure that none of the tables gets written past its last element. @= int max_yybase_entry = 0; int max_yyaccept_entry = 0; int max_yynxt_entry = 0; int max_yy_ec_entry = 0; @ The `exotic' scanner constants treated below are the constants used to control the scanner code itself. Unfortunately they are not given any names that can be used by the `driver' to output them in a simple way. @= { int i; for ( i = 0; i < sizeof( yy_base )/sizeof( yy_base[0] ); i++ ) { if ( yy_base[i] > max_yybase_entry ) { max_yybase_entry = yy_base[i]; } } for ( i = 0; i < sizeof( yy_nxt )/sizeof( yy_nxt[0] ); i++ ) { if ( yy_nxt[i] > max_yynxt_entry ) { max_yynxt_entry = yy_nxt[i]; } } for ( i = 0; i < sizeof( yy_accept )/sizeof( yy_accept[0] ); i++ ) { if ( yy_accept[i] > max_yyaccept_entry ) { max_yyaccept_entry = yy_accept[i]; } } for ( i = 0; i < sizeof( yy_ec )/sizeof( yy_ec[0] ); i++ ) { if ( yy_ec[i] > max_yy_ec_entry ) { max_yy_ec_entry = yy_ec[i]; } } } @ @= if ( output_desc.output_actions ) { int i, j; yyscan_t fake_scanner; fprintf( tables_out, "%s", action_desc.preamble ); if ( !bare_actions ) { if ( yylex_init( &fake_scanner ) ) { printf( "Cannot initialize the scanner\n" ); } yy_ec[0] = 0; yy_base[1] = max_yybase_entry; yy_base[2] = 0; yy_chk[0] = 2; yy_chk[max_yybase_entry] = 1; yy_nxt[max_yybase_entry] = 1; yy_nxt[0] = 1; fprintf( stderr, "max entry: %d\n", max_yybase_entry ); } for ( i = 1; i <= max_yyaccept_entry; i++ ) { fprintf( tables_out, action_desc.act_setup, i ); if ( i == YY_END_OF_BUFFER ) { fprintf( tables_out, " %% YY_END_OF_BUFFER\n%s\n", " \\yylexeofaction" ); } else { fprintf( tables_out, "\n" ); if ( !bare_actions ) { (( struct yyguts_t *)fake_scanner)->yy_hold_char = 0; yy_accept[1] = i; if ( i%10 == 0 ) { fprintf( stderr, "." ); } yylex( NULL, fake_scanner ); } } fprintf( tables_out, action_desc.act_suffix, i ); } fprintf( tables_out, " %% end of file states:\n%s\n", " %#define YY_STATE_EOF(state) (YY_END_OF_BUFFER + state + 1)" ); if ( max_eof_state == 0 ) { /* in case the user has not declared any states */ max_eof_state = YY_STATE_EOF( INITIAL ); } for ( ; i <= max_eof_state; i++ ) { fprintf( tables_out, action_desc.act_setup, i ); if ( !bare_actions ) { fprintf( tables_out, "\n" ); (( struct yyguts_t *)fake_scanner)->yy_hold_char = 0; yy_accept[1] = i; yylex( NULL, fake_scanner ); } fprintf( tables_out, action_desc.act_suffix, i ); } fprintf( tables_out, "%s", action_desc.postamble ); if ( action_desc.cleanup ) { action_desc.cleanup( &action_desc ); } } @@; @; fprintf( tables_out, "\\constset{YYECMAGIC}{%d}%%\n", yy_ec_magic ); fprintf( tables_out, "\\constset{YYMAXEOFSTATE}{%d}%%\n", max_eof_state ); @ @= BAD_SCANNER,@[@] @ @= int yy_ec_magic; @ The `magic' constants are similar to the `exotic' ones mentioned above except the methods used to compute them rely on reverse engineering the scanner function. Since this changes the scanner tables it has to be done after the `driver' has finished going through all the actions. @= { int i, j; char fake_yytext[ YY_MORE_ADJ + 1 ]; yyscan_t yyscanner; struct yyguts_t *yyg; if ( yylex_init( &yyscanner ) ) { printf( "Cannot initialize the scanner\n" ); exit( BAD_SCANNER ); } yyg = (struct yyguts_t *)yyscanner; yyg->yy_start = 0; yy_set_bol(0); yyg->yytext_ptr = fake_yytext; yyg->yy_c_buf_p = yyg->yytext_ptr + 1 + YY_MORE_ADJ; fake_yytext[YY_MORE_ADJ] = 0; /* |*yy_cp = 0;| */ yy_accept[0] = 0; yy_base[0] = 0; for ( i = 0; i < sizeof( yy_chk )/sizeof( yy_chk[0] ); i++ ) { yy_chk[i] = 0; } for ( i = 0; i < sizeof( yy_nxt )/sizeof( yy_nxt[0] ); i++ ) { yy_nxt[i] = i; } yy_ec_magic = yy_get_previous_state( yyscanner ); } @*2State names. There is no easy way to output the symbolic names for states, so this has to be done by hand while the actions are output. The state names are accumulated in a list structure and are printed out after the action output is complete. Note that parsing the scanner file is only partially helpful (even though the extended parser and scanner can recognize the \.{\%x} option). All that can be done is output the state {\it names\/} but not their numerical values, since all such names are macros whose values are only known to the \flex\ generated scanner. @d Define_State( st_name, st_num ) do { struct lexer_state_d *this_state; this_state = malloc( sizeof(struct lexer_state_d) ); this_state->name = st_name; this_state->value = st_num; this_state->next = NULL; if ( last_state ) { last_state->next = this_state; last_state = this_state; } else { last_state = state_list = this_state; } if ( YY_STATE_EOF( st_num ) > max_eof_state ) { max_eof_state = YY_STATE_EOF( st_num ); } } while (0); @= int max_eof_state = 0; struct lexer_state_d { char *name; int value; struct lexer_state_d *next; }; struct lexer_state_d *state_list = NULL; struct lexer_state_d *last_state = NULL; @ @= { struct lexer_state_d *current_state; struct lexer_state_d *next_state; current_state = next_state = state_list; if ( current_state ) { fprintf( tables_out, "\\def\\setflexstates{%%\n" " \\stateset{INITIAL}{%d}%%\n", INITIAL ); while ( current_state ) { fprintf( tables_out, " \\stateset{%s}{%d}%%\n", current_state->name, current_state->value); current_state = current_state->next; free( next_state ); next_state = current_state; /* the |name| field is not deallocated because it is not allocated on the heap */ } fprintf( tables_out, "}%%\n%%\n" ); } } @*2Constants. The few hard coded constants needed for the lexer to work are listed here. @= _register_const_d(YY_END_OF_BUFFER_CHAR)@; _register_const_d(YY_NUM_RULES)@; _register_const_d(YY_END_OF_BUFFER)@; @*2Output modes. The output modes are the same as those in the parser driver with some minor changes. @*3Generic output. Generic output is not programmed yet. @= GENERIC_OUT,@[@] @ @= case GENERIC_OUT:@; printf( "This mode is not supported yet\n" ); exit(0); break; @*3\TeX~mode. The \TeX\ mode is the main focus of this software. @= TEX_OUT,@[@] @ @= case TEX_OUT:@; @@; @@; @@; break; @ @= tex_table_generic(yy_accept); yy_accept_desc.name = "yyaccept"; tex_table_generic(yy_ec); yy_ec_desc.name = "yyec"; tex_table_generic(yy_meta); yy_meta_desc.name = "yymeta"; tex_table_generic(yy_base); yy_base_desc.name = "yybase"; tex_table_generic(yy_def); yy_def_desc.name = "yydef"; tex_table_generic(yy_nxt); yy_nxt_desc.name = "yynxt"; tex_table_generic(yy_chk); yy_chk_desc.name = "yychk"; @ @= if ( optimize_actions ) { action_desc.preamble = "%\n% the big switch\n%\n"@/ "\\catcode`\\/=0\\relax\n%\n"@/ "\\def\\yydoactionswitch#1{%%\n"@/ " \\let\\yylextail\\yylexcontinue\n"@/ " \\csname doflexaction\\number #1\\parsernamespace\\endcsname\n"@/ " \\yylextail\n"@; "}\\stashswitch{yydoactionswitch}%\n"; action_desc.act_setup = "\n\\expandafter\\def\\csname doflexaction%d\\parsernamespace\\endcsname{%%"; action_desc.act_suffix = "}%% end of rule %d\n"; action_desc.action1 = NULL; action_desc.actionn = NULL; action_desc.postamble = "\\catcode`\\/=12\\relax\n%\n"; action_desc.print_rule = NULL; action_desc.cleanup = NULL; output_desc.output_actions = 1; } else { action_desc.preamble = "%\n% the big switch\n%\n"@/ "\\catcode`\\/=0\\relax\n%\n"@/ "\\def\\yydoactionswitch#1{%%\n \\let\\yylextail\\yylexcontinue\n"@; " \\ifcase#1\\relax\n"; action_desc.act_setup = " \\or\n" " \\YYRULESETUP %% (rule %d) "; action_desc.act_suffix = " %% end of rule %d\n"; action_desc.action1 = NULL; action_desc.actionn = NULL; action_desc.postamble = " \\else\n \\fi\n \\yylextail\n}\\stashswitch{yydoactionswitch}%\n\\catcode`\\/=12\\relax\n%\n"; action_desc.print_rule = NULL; action_desc.cleanup = NULL; output_desc.output_actions = 1; } @ \TeX\ constant output is another place where the techniques described above are applied. A few names are handled separately, because they contain underscores. \def\YYxENDxOFxBUFFERxCHARxdesc{\.{YY\_END\_OF\_BUFFER\_CHAR\_}\\{desc}} \def\YYxNUMxRULESxdesc{\.{YY\_NUM\_RULES\_}\\{desc}} \def\YYxENDxOFxBUFFERxdesc{\.{YY\_END\_OF\_BUFFER\_}\\{desc}} @s YY_END_OF_BUFFER_CHAR_desc TeX @s YY_NUM_RULES_desc TeX @s YY_END_OF_BUFFER_desc TeX @= #define _register_const_d(c_name) @[c_name##_desc.format = "\\constset{%s}{%d}%%\n"; \ c_name##_desc.name = #c_name; \ c_name##_desc.value = c_name; \ output_desc.output_##c_name = 1;@] @@; #undef _register_const_d YY_END_OF_BUFFER_CHAR_desc.name = "YYENDOFBUFFERCHAR"; YY_NUM_RULES_desc.name = "YYNUMRULES"; YY_END_OF_BUFFER_desc.name = "YYENDOFBUFFER"; @ @= fprintf( tables_out, "\\constset{YYMAXREALCHAR}{%ld}%%\n", sizeof( yy_accept )/(sizeof( yy_accept[0] )) - 1 ); fprintf( tables_out, "\\constset{YYBASEMAXENTRY}{%d}%%\n", max_yybase_entry ); fprintf( tables_out, "\\constset{YYNXTMAXENTRY}{%d}%%\n", max_yynxt_entry ); fprintf( tables_out, "\\constset{YYMAXRULENO}{%d}%%\n", max_yyaccept_entry ); fprintf( tables_out, "\\constset{YYECMAXENTRY}{%d}%%\n", max_yy_ec_entry ); @*2 Command line options. We start with the most obvious option, the one begging for help. @ @= register_option_("help", no_argument, 0, LONG_HELP, "")@; @ @= @[@[@], 'h'@] @ @= case 'h': /* short help */@; fprintf(stderr, "Usage: %s [options] output_file\n", argv[0]); exit(0); break; /* should not be needed */ case LONG_HELP:@; fprintf(stderr, "%s [--mode=TeX:options] output_file outputs tables\n" " and constants for a TeX scanner\n", argv[0]); exit(0); break; /* should not be needed */ @ @= register_option_("debug", optional_argument, 0, 'b', "")@; register_option_("mode", required_argument, 0, 'm', "")@; register_option_("table-separator", required_argument, 0, 'z', "")@; register_option_("format", required_argument, 0, 'f', "")@; /* name? */ register_option_("table", required_argument, 0, 't', "")@; /* specific table */ register_option_("constant", required_argument, 0, 'c', "")@; /* specific constant */ register_option_("name-length", required_argument, 0, 'l', "")@; /* change |MAX_NAME_LENGTH| */ register_option_("token", required_argument, 0, 'n', "")@; /* specific token */ register_option_("run-scan", required_argument, 0, 'p', "")@; /* run the scanner */ register_option_("scan-file", required_argument, 0, 'i', "")@; /* input for the scanner */ @ A few options can be immediately discussed. @= int debug_level = 0; char *table_separator = "%s "; @ @= case 'b': /* debug (level) */@; debug_level = optarg ? atoi(optarg) : 1; break; case 'm': /* output mode */@; switch( optarg[0] ) { case 'T': case 't':@; mode = TEX_OUT; break; case 'b': case 'B': case 'g': case 'G':@; mode = GENERIC_OUT; break; default:@; break; } break; case 'z': table_separator = (char *)malloc( (strlen(optarg) + 1)*sizeof(char) ); strcpy(table_separator, optarg); break;