/* l2xiscan.c LTX2X Interpreter lexing routines */ /* Written by: Peter Wilson, CUA pwilson@cme.nist.gov */ /* This code is partly based on algorithms presented by Ronald Mak in */ /* "Writing Compilers & Interpreters", John Wiley & Sons, 1991 */ #include #include #include #include #include "l2xicmon.h" #include "l2xierr.h" #include "l2xiscan.h" #include "l2xiidbg.h" #ifndef l2xicpr_h #include "l2xicpr.h" /* before/after token code lists */ #endif #define EOF_CHAR 257 #define TAB_SIZE 8 #define QUOTE_CHAR '\'' #define MIN_RESERVED_WORD_LENGTH 2 #define MAX_RESERVED_WORD_LENGTH 15 /* US_CHAR is for ltx2x, adding undescore character */ #define US_CHAR '_' /* CHARACTER CODES */ typedef enum{ LETTER, DIGIT, QUOTE, SPECIAL, EOF_CODE, USCORE, /* for ltx2x underscore */ } CHAR_CODE; #define EOS '\0' /* RESERVED WORD TABLES */ typedef struct { char *string; TOKEN_CODE token_code; } RW_STRUCT; RW_STRUCT rw_2[] = { {"if", IF}, /* P&E */ {"in", IN}, /* P&E */ {"of", OF}, /* P&E */ {"or", OR}, /* P&E */ {"to", TO}, /* P&E */ {"as", XAS}, {"by", XBY}, {"pi", XPI}, {NULL, 0 }, }; RW_STRUCT rw_3[] = { {"and", AND}, /* P&E */ {"div", DIV}, /* P&E */ {"end", END}, /* P&E */ {"for", FOR}, /* P&E */ {"mod", MOD}, /* P&E */ {"not", NOT}, /* P&E */ {"set", SET}, /* P&E */ {"var", VAR}, /* P&E */ {"bag", XBAG}, {"use", XUSE}, {"xor", XXOR}, {NULL, 0 }, }; RW_STRUCT rw_4[] = { {"case", CASE}, /* P&E */ {"else", ELSE}, /* P&E */ {"file", FFILE}, {"then", THEN}, /* P&E */ {"type", TYPE}, /* P&E */ {"with", WITH}, /* P&E */ {"from", XFROM}, {"list", XLIST}, {"real", XREAL}, {"rule", XRULE}, {"skip", XSKIP}, {"like", XLIKE}, {"self", XSELF}, {"true", XTRUE}, {"call", XCALL}, {NULL, 0 }, }; RW_STRUCT rw_5[] = { {"array", ARRAY}, /* P&E */ {"begin", BEGIN}, /* P&E */ {"until", UNTIL}, /* P&E */ {"while", WHILE}, /* P&E */ {"alias", XALIAS}, {"fixed", XFIXED}, {"local", XLOCAL}, {"model", XMODEL}, {"oneof", XONEOF}, {"query", XQUERY}, {"where", XWHERE}, {"andor", XANDOR}, {"false", XFALSE}, {"notes", XNOTES}, {"subof", XSUBOF}, {"supof", XSUPOF}, {"using", XUSING}, {NULL, 0 }, }; RW_STRUCT rw_6[] = { {"repeat", REPEAT}, /* P&E */ {"binary", XBINARY}, {"derive", XDERIVE}, {"end_if", XEND_IF}, {"entity", XENTITY}, {"escape", XESCAPE}, {"number", XNUMBER}, {"return", XRETURN}, {"schema", XSCHEMA}, {"select", XSELECT}, {"string", XSTRING}, {"unique", XUNIQUE}, {"import", XIMPORT}, {NULL, 0 }, }; RW_STRUCT rw_7[] = { {"boolean", XBOOLEAN}, {"context", XCONTEXT}, {"generic", XGENERIC}, {"integer", XINTEGER}, {"inverse", XINVERSE}, {"logical", XLOGICAL}, {"subtype", XSUBTYPE}, {"const_e", XCONST_E}, {"unknown", XUNKNOWN}, {"purpose", XPURPOSE}, {"the_day", THE_DAY}, {NULL, 0 }, }; RW_STRUCT rw_8[] = { {"end_code", ENDCODE}, /* for ltx2x */ {"function", FUNCTION}, /* P&E */ {"abstract", XABSTRACT}, {"constant", XCONSTANT}, {"end_case", XEND_CASE}, {"end_rule", XEND_RULE}, {"end_type", XEND_TYPE}, {"optional", XOPTIONAL}, {"criteria", XCRITERIA}, {"the_year", THE_YEAR}, {NULL, 0 }, }; RW_STRUCT rw_9[] = { {"procedure", PROCEDURE}, /* P&E */ {"aggregate", XAGGREGATE}, {"end_alias", XEND_ALIAS}, {"end_local", XEND_LOCAL}, {"end_model", XEND_MODEL}, {"otherwise", XOTHERWISE}, {"reference", XREFERENCE}, {"supertype", XSUPERTYPE}, {"end_notes", XEND_NOTES}, {"objective", XOBJECTIVE}, {"parameter", XPARAMETER}, {"test_case", XTEST_CASE}, {"the_month", THE_MONTH}, {NULL, 0 }, }; RW_STRUCT rw_10[] = { {"end_entity", XEND_ENTITY}, {"end_repeat", XEND_REPEAT}, {"end_schema", XEND_SCHEMA}, {"references", XREFERENCES}, {NULL, 0}, }; RW_STRUCT rw_11[] = { {"end_context", XEND_CONTEXT}, {"enumeration", XENUMERATION}, {"end_purpose", XEND_PURPOSE}, {"realization", XREALIZATION}, {"schema_data", XSCHEMA_DATA}, {NULL, 0}, }; RW_STRUCT rw_12[] = { {"end_constant", XEND_CONSTANT}, {"end_function", XEND_FUNCTION}, {"end_criteria", XEND_CRITERIA}, {NULL, 0}, }; RW_STRUCT rw_13[] = { {"end_procedure", XEND_PROCEDURE}, {"end_objective", XEND_OBJECTIVE}, {"end_parameter", XEND_PARAMETER}, {"end_test_case", XEND_TEST_CASE}, {NULL, 0}, }; RW_STRUCT rw_14[] = { {"end_references", XEND_REFERENCES}, {NULL, 0}, }; RW_STRUCT rw_15[] = { {"end_realization", XEND_REALIZATION}, {"end_schema_data", XEND_SCHEMA_DATA}, {NULL, 0}, }; RW_STRUCT *rw_table[] = { NULL, NULL, rw_2, rw_3, rw_4, rw_5, rw_6, rw_7, rw_8, rw_9, rw_10, rw_11, rw_12, rw_13, rw_14, rw_15, }; /* token lists */ /* Tokens that start or follow a statement */ TOKEN_CODE statement_start_list[] = {BEGIN, CASE, IF, REPEAT, IDENTIFIER, XRETURN, XSKIP, XESCAPE, 0}; TOKEN_CODE statement_end_list[] = {SEMICOLON, TO, ENDCODE, END_OF_FILE, 0}; /* Tokens that start a declaration */ TOKEN_CODE declaration_start_list[] = {XENTITY, TYPE, XRULE, XCONSTANT, XLOCAL, PROCEDURE, FUNCTION, 0}; TOKEN_CODE follow_indexes_list[] = {OF, IDENTIFIER, ARRAY, XBAG, XLIST, SET, XBINARY, XBOOLEAN, XINTEGER, XLOGICAL, XNUMBER, XREAL, XSTRING, XGENERIC, SEMICOLON, ENDCODE, END_OF_FILE, 0}; /* Operator tokens */ TOKEN_CODE rel_op_list[] = {LT, LE, EQUAL, NE, GE, GT, COLONEQUALCOLON, COLONNEQCOLON, IN, XLIKE, 0}; TOKEN_CODE add_op_list[] = {PLUS, MINUS, OR, XXOR, 0}; TOKEN_CODE mult_op_list[] = {STAR, SLASH, DIV, MOD, AND, 0}; TOKEN_CODE follow_header_list[] = {SEMICOLON, ENDCODE, END_OF_FILE, 0}; TOKEN_CODE follow_proc_id_list[] = {LPAREN, SEMICOLON, ENDCODE, END_OF_FILE, 0}; TOKEN_CODE follow_func_id_list[] = {LPAREN, COLON, SEMICOLON, END_OF_FILE, 0}; /* Tokens that follow a formal parameter list */ TOKEN_CODE follow_parms_list[] = {RPAREN, SEMICOLON, ENDCODE, END_OF_FILE, 0}; /* Tokens that follow an actual parameter list */ TOKEN_CODE follow_parm_list[] = {COMMA, RPAREN, 0}; /* Tokens that can follow an expression */ TOKEN_CODE follow_expr_list[] = {OF, SEMICOLON, ENDCODE, END_OF_FILE, 0}; /* Tokens that can start or follow a case label */ TOKEN_CODE case_label_start_list[] = {IDENTIFIER, NUMBER_LITERAL, PLUS, MINUS, STRING_LITERAL, XOTHERWISE, 0}; TOKEN_CODE follow_case_label_list[] = {COLON, SEMICOLON, 0}; /* Tokens that follow declarations in SETUP code */ TOKEN_CODE ltx2x_follow_decls_list[] = {SEMICOLON, BEGIN, CASE, IF, REPEAT, IDENTIFIER, ENDCODE, 0}; /* Tokens that follow declarations in procedures/functions */ TOKEN_CODE follow_decls_list[] = {SEMICOLON, BEGIN, CASE, IF, REPEAT, IDENTIFIER, XRETURN, XEND_FUNCTION, XEND_PROCEDURE, ENDCODE, END_OF_FILE, 0}; /* The tokens for simple types */ TOKEN_CODE simple_type_list[] = {XBINARY, XBOOLEAN, XINTEGER, XLOGICAL, XNUMBER, XREAL, XSTRING, XGENERIC, 0}; /* the tokens of constants */ TOKEN_CODE constant_list[] = {QUERY_CHAR, XCONST_E, XFALSE, XPI, XSELF, XTRUE, XUNKNOWN, THE_DAY, THE_MONTH, THE_YEAR, 0}; /* The tokens of aggregates */ TOKEN_CODE aggregation_type_list[] = {ARRAY, SET, XAGGREGATE, XBAG, XLIST, 0}; TOKEN_CODE follow_min_bound_list[] = {COLON, NUMBER_LITERAL, LPAREN, MINUS, PLUS, 0}; /* The tokens that can follow an entity's explicit attributes */ TOKEN_CODE follow_attributes_list[] = {XDERIVE, XUNIQUE, XINVERSE, XWHERE, XEND_ENTITY, 0}; /* GLOBALS */ /* char ch; */ int ch; TOKEN_CODE token; /* code of current token */ LITERAL literal; /* value of a literal */ int buffer_offset; /* char offset into source buffer */ int level = 0; /* current nesting level */ int line_number = 0; /* current source line number */ BOOLEAN print_flag = TRUE; /* TRUE to print source lines */ BOOLEAN block_flag = FALSE; /* TRUE iff parsing a block */ BOOLEAN in_comment = FALSE; /* TRUE iff in an EXPRESS comment */ BOOLEAN in_eol_comment = FALSE; /* TRUE iff in an EXPRESS eol comment */ int num_opencom = 0; /* # of open EXPRESS comment markers */ char source_buffer[MAX_SOURCE_LINE_LENGTH]; /* source file buffer */ char token_string[MAX_TOKEN_STRING_LENGTH]; /* token string */ char word_string[MAX_TOKEN_STRING_LENGTH]; /* single-cased token string */ char *bufferp = source_buffer; /* ptr to source buffer */ char *tokenp = token_string; /* ptr to token string */ int digit_count; /* total # of digits in number */ BOOLEAN count_error; /* TRUE iff too many digits in number */ /* int page_number = 0; current o/p page number */ /* int line_count = MAX_LINES_PER_PAGE; # of lines on current o/p page */ char source_name[MAX_FILE_NAME_LENGTH]; /* name of source file */ char date[DATE_STRING_LENGTH]; /* current date and time */ FILE *source_file; /* CHAR_CODE char_table[256]; ----------- add two to this */ CHAR_CODE char_table[258]; /* ----------- added two to this */ /* array of string representation of token codes */ char *tok_code_strs[] = { #define sctc(a, b, c) b, #include "l2xisctc.h" #undef sctc }; /* MACROS */ /* char_code(ch) return the character code of ch */ #define char_code(ch) char_table[ch] /* FORWARDS */ /* init_tok_code_strs(); */ /***************************************************************************/ /* init_scanner(name) Initialise the scanner globals and open source */ /* file. */ /***************************************************************************/ /* init_scanner(afil) Initialise the scanner globals and set the source */ /* file. */ init_scanner(afil) FILE *afil; /* source file */ { int ich; /* * sprintf(dbuffer,"\n init_scanner called with name = %s\n", name); * debug_print(dbuffer); */ /* initialise character table */ for (ich = 0; ich < 256; ++ich) char_table[ich] = SPECIAL; for (ich = '0'; ich <= '9'; ++ich) char_table[ich] = DIGIT; for (ich = 'A'; ich <= 'Z'; ++ich) char_table[ich] = LETTER; for (ich = 'a'; ich <= 'z'; ++ich) char_table[ich] = LETTER; char_table[QUOTE_CHAR] = QUOTE; char_table[US_CHAR] = USCORE; char_table[EOF_CHAR] = EOF_CODE; source_file = afil; /* get the first character */ bufferp = ""; get_char(); debug_print(" init_scanner: All done\n"); return; } /* end init_scanner */ /***************************************************************************/ /***************************************************************************/ /* quit_scanner() no more scanning */ quit_scanner() { /* close_source_file(); */ } /* end quit_scanner */ /***************************************************************************/ /* CHARACTER ROUTINES */ /***************************************************************************/ /* get_char() set ch to next char from the source buffer */ get_char() { BOOLEAN get_source_line(); /* if current source line exhausted, get next line */ /* if file exhausted, set ch to EOF character and return */ if (*bufferp == EOS) { if (!get_source_line()) { ch = EOF_CHAR; return; } bufferp = source_buffer; buffer_offset = 0; } ch = *bufferp++; /* special character processing */ /* tab --- up buffer offset to next multiple of TAB_SIZE, and replace */ /* ch with a blank */ /* newline -- replace ch with a blank */ /* { (start of comment) -- skip over comment and replace with a blank */ switch (ch) { case '\t': { buffer_offset += TAB_SIZE - buffer_offset%TAB_SIZE; ch = ' '; break; } case '\n': { ++buffer_offset; ch = ' '; break; } case '-': { /* start of eol comment? */ if (*bufferp == '-') { /* yest, an eol comment */ skip_eol_comment(); ch = ' '; } else { ++buffer_offset; } break; } case '(': { /* start of long comment? */ if (*bufferp == '*') { /* yes, a long comment */ skip_long_comment(); ch = ' '; } else { ++buffer_offset; } break; } default: { ++buffer_offset; break; } } /* end switch */ } /* end get_char */ /***************************************************************************/ /***************************************************************************/ /* skip_comment() skip over a comment and set ch to 'end of comment' char */ skip_comment() { do { get_char(); } while ((ch != '}') && (ch != EOF_CHAR)); } /* end skip_coment */ /***************************************************************************/ /***************************************************************************/ /* skip_eol_comment() skip over an EXPRESS eol comment */ skip_eol_comment() { /* skip to the next line */ if (!get_source_line()) { ch = EOF_CHAR; return; } bufferp = source_buffer; buffer_offset = 0; return; } /* end SKIP_EOL_COMMENT */ /***************************************************************************/ /***************************************************************************/ /* skip_long_comment() skip over an EXPRESS long comment */ skip_long_comment() { BOOLEAN get_source_line(); num_opencom = 1; /* update buffer pointers to the * after the ( */ *bufferp++; ++buffer_offset; while (num_opencom > 0) { /* if current source line exhausted, get next line */ /* if file exhausted, set ch to EOF character and return */ if (*bufferp == EOS) { if (!get_source_line()) { ch = EOF_CHAR; return; } bufferp = source_buffer; buffer_offset = 0; } ch = *bufferp++; switch (ch) { case '(': { /* start of long comment? */ if (*bufferp == '*') { /* yes */ num_opencom++; *bufferp++; ++buffer_offset; } break; } case '*': { /* end of long comment? */ if (*bufferp == ')') { /* yes */ num_opencom--; *bufferp++; ++buffer_offset; } break; } default: { break; } } /* end switch */ ++buffer_offset; } /* end while */ return; } /* end SKIP_LONG_COMMENT */ /***************************************************************************/ /***************************************************************************/ /* skip_blanks skip past any blanks at current location in source */ /* buffer and set ch to next non-blank char */ skip_blanks() { while (ch == ' ') get_char(); } /* end skip_blanks */ /***************************************************************************/ /* TOKEN ROUTINES */ /* after a token has been extracted, ch is the first char after the token */ /***************************************************************************/ /* get_token extract next token from source buffer */ get_token() { entry_debug("get_token"); scan_source_debug(); skip_blanks(); tokenp = token_string; switch (char_code(ch)) { case LETTER: { get_word(); break; } case DIGIT: { get_number(); break; } case QUOTE: { get_string(); break; } case EOF_CODE: { token = END_OF_FILE; break; } default: { get_special(); break; } } /* end switch */ scan_token_debug(); /* for the interpeter: If parsing a block, crunch the token code */ /* and append it to the code buffer */ if (block_flag) crunch_token(); exit_debug("get_token"); } /* end get_token */ /***************************************************************************/ /***************************************************************************/ /* get_word extract a word, shift into single case. If not a reserved */ /* set token to IDENTIFIER */ /* for ltx2x, added underscore as an allowed word character */ get_word() { BOOLEAN is_reserved_word(); entry_debug("get_word"); scan_source_debug(); /* extract the word */ while ((char_code(ch) == LETTER) || (char_code(ch) == DIGIT) || (char_code(ch) == USCORE)) { *tokenp++ = ch; get_char(); } *tokenp = EOS; shift_word(); if (!is_reserved_word()) token = IDENTIFIER; exit_debug("get_word"); } /* end get_word */ /***************************************************************************/ /***************************************************************************/ /* shift_word() copy a word token into word_string with all letters */ /* in the same (lower? higher?) case */ shift_word() { int offset = 'a' - 'A'; /* offset to downshift a letter */ char *wp = word_string; char *tp = token_string; /* copy word into word_string, shifting as we go */ do { *wp++ = (*tp >= 'A') && (*tp <= 'Z') /* check for wrong case letter */ ? *tp + offset /* shift and copy */ : *tp; /* or just copy */ ++tp; } while (*tp != EOS); *wp = EOS; } /* end shift_word */ /***************************************************************************/ /***************************************************************************/ /* get_number extract a number token and set literal to its value. */ /* set token to NUMBER_LITERAL */ get_number() { int whole_count = 0; /* # of digits in whole part */ int decimal_offset = 0; /* # of digits to move decimal */ char exponent_sign = '+'; int exponent = 0; /* value of exponent */ XPRSAREAL nvalue = 0.0; /* value of number */ XPRSAREAL evalue = 0.0; /* value of exponent */ BOOLEAN saw_dotdot = FALSE; /* TRUE if found .. */ entry_debug("get_number"); digit_count = 0; count_error = FALSE; token = NO_TOKEN; literal.type = INTEGER_LIT; /* extract the whole part of the number */ accumulate_value(&nvalue, INVALID_NUMBER); if (token == ERROR) { exit_debug("get_number"); return; } whole_count = digit_count; /* if current char is a . then either start of fraction or .. */ if (ch == '.') { get_char(); if (ch == '.') { /* have a .. token, backup bufferp so that the token can be */ /* extracted next */ saw_dotdot = TRUE; --bufferp; } else { /* start of a fraction */ literal.type = REAL_LIT; *tokenp++ = '.'; /* accumulate fraction part */ accumulate_value(&nvalue, INVALID_FRACTION); if (token == ERROR) return; decimal_offset = whole_count - digit_count; } } /* extract the exponent part, if any. None if seen a .. */ if (!saw_dotdot && ((ch == 'E') || (ch == 'e'))) { literal.type = REAL_LIT; *tokenp++ = ch; get_char(); /* get sign, if any */ if ((ch == '+') || (ch == '-')) { *tokenp++ = exponent_sign = ch; get_char(); } /* extract the exponent and accumulate into evalue */ accumulate_value(&evalue, INVALID_EXPONENT); if (token == ERROR) return; if (exponent_sign == '-') evalue = -evalue; } /* Too many digits? */ if (count_error) { error(TOO_MANY_DIGITS); token = ERROR; exit_debug("get_number"); return; } /* adjust the number's value using decimal_offset and exponent */ exponent = evalue + decimal_offset; if ((exponent + whole_count < -MAX_EXPONENT) || (exponent + whole_count > MAX_EXPONENT)) { error(REAL_OUT_OF_RANGE); token = ERROR; exit_debug("get_number"); return; } if (exponent != 0) { nvalue = nvalue*pow(10.0, (double) exponent); } /* set the literal's value */ if (literal.type == INTEGER_LIT) { if ((nvalue < -MAX_INTEGER) || (nvalue > MAX_INTEGER)) { error(INTEGER_OUT_OF_RANGE); token = ERROR; exit_debug("get_number"); return; } literal.value.integer = (XPRSAINT) nvalue; } else { literal.value.real = nvalue; } *tokenp = EOS; token = NUMBER_LITERAL; exit_debug("get_number"); } /* end get_number */ /***************************************************************************/ /***************************************************************************/ /* get_string() Extract a string token. Set token to STRING_LITERAL. */ /* Quotes are stored as part of token_string, but not */ /* as part of literal.value.string */ get_string() { char *sp = literal.value.string; *tokenp++ = QUOTE_CHAR; get_char(); /* extract the string (two consecutive quotes represent a single quote) */ while (ch != EOF_CHAR) { if (ch == QUOTE_CHAR) { *tokenp++ = ch; get_char(); if (ch != QUOTE_CHAR) break; } *tokenp++ = ch; *sp++ = ch; get_char(); } *tokenp = EOS; *sp = EOS; token = STRING_LITERAL; literal.type = STRING_LIT; sprintf(dbuffer, "Scanned string: sp = %s, tokenp = %s\n", literal.value.string, token_string); debug_print(dbuffer); } /* end get_string */ /***************************************************************************/ /***************************************************************************/ /* get_special() Extract a special token. Most are single chars, but some */ /* are multiple. Set appropriate token value */ get_special() { *tokenp++ = ch; switch (ch) { case ')': token = RPAREN; get_char(); break; case '+': token = PLUS; get_char(); break; case '[': token = LBRACKET; get_char(); break; case ']': token = RBRACKET; get_char(); break; case ';': token = SEMICOLON; get_char(); break; case ',': token = COMMA; get_char(); break; case '.': token = PERIOD; get_char(); break; case '/': token = SLASH; get_char(); break; /* extra for EXPRESS */ case '{': token = LBRACE; get_char(); break; case '}': token = RBRACE; get_char(); break; case '?': token = QUERY_CHAR; get_char(); break; case '%': token = PERCENT; get_char(); break; case '\\': token = BACKSLASH; get_char(); break; case '@': token = COMMERCIAL_AT; get_char(); break; case '!': token = EXCLAMATION; get_char(); break; case '"': token = DOUBLEQUOTE; get_char(); break; case '*': { /* * (EXPRESS or ** or *) */ get_char(); if (ch == '*') { *tokenp++ = '*'; token = STARSTAR; get_char(); } else if (ch == ')') { *tokenp++ = ')'; token = STARPAREN; get_char(); } else { token = STAR; } break; } case '-': { /* - (EXPRESS or -- or -> ) */ get_char(); if (ch == '-') { *tokenp++ = '-'; token = MINUSMINUS; get_char(); } else if (ch == '>') { *tokenp++ = '>'; token = MINUSGT; get_char(); } else { token = MINUS; } break; } case '=': { /* = (EXPRESS == ) */ get_char(); if (ch == '=') { *tokenp++ = '='; token = EQUALEQUAL; get_char(); } else { token = EQUAL; } break; } case ':': { /* : or := (EXPRESS or :=: or :<>: ) */ get_char(); if (ch == '=') { *tokenp++ = '='; token = COLONEQUAL; get_char(); if (ch == ':') { *tokenp++ = ':'; token = COLONEQUALCOLON; get_char(); } } else if (ch == '<') { get_char(); if (ch == '>') { get_char(); if (ch == ':') { *tokenp++ = '<'; *tokenp++ = '>'; *tokenp++ = ':'; token = COLONNEQCOLON; get_token(); } } } else { token = COLON; } break; } case '<': { /* < or <= or <> (EXPRESS-I <- ) */ get_char(); if (ch == '=') { *tokenp++ = '='; token = LE; get_char(); } else if (ch == '>') { *tokenp++ = '>'; token = NE; get_char(); } else if (ch == '-') { *tokenp++ = '-'; token = LTMINUS; get_char(); } else { token = LT; } break; } case '>': { /* > or >= */ get_char(); if (ch == '=') { *tokenp++ = '='; token = GE; get_char(); } else token = GT; break; } case '(': { /* ( (EXPRESS (* ) */ get_char(); if (ch == '*') { *tokenp++ = '*'; token = PARENSTAR; get_char(); } else { token = LPAREN; } break; } case '|': { /* (EXPRESS | or || ) */ get_char(); if (ch == '|') { *tokenp++ = '|'; token = BARBAR; get_char(); } else { token = BAR; } break; } default: { token = ERROR; get_char(); break; } } /* end switch */ *tokenp = EOS; } /* end get_special */ /***************************************************************************/ /***************************************************************************/ /* accumulate_value(valuep, error_code) Extract a number part (digits) */ /* and accumulate its value. */ /* Error if first char not a digit */ accumulate_value(valuep, error_code) XPRSAREAL *valuep; ERROR_CODE error_code; { XPRSAREAL value = *valuep; /* error if first char not a digit */ if (char_code(ch) != DIGIT) { error(error_code); token = ERROR; return; } /* accumulate the value, provided not too many digits */ do { *tokenp++ = ch; if (++digit_count <= MAX_DIGIT_COUNT) value = 10*value + (ch - '0'); else count_error = TRUE; get_char(); } while (char_code(ch) == DIGIT); /* end do */ *valuep = value; } /* end accumulate_value */ /***************************************************************************/ /* TOKEN TESTERS */ /***************************************************************************/ /* token_in(token_list) If the current token is in token_list */ /* return TRUE, else FALSE. */ BOOLEAN token_in(token_list) TOKEN_CODE token_list[]; { TOKEN_CODE *atokenp; if (token_list == NULL) return(FALSE); for (atokenp = &token_list[0]; *atokenp; ++atokenp) { if (token == *atokenp) return(TRUE); } return(FALSE); } /* end */ /***************************************************************************/ /***************************************************************************/ /* synchronize(token_list1, token_list2, token_list3) If the current token */ /* is not in any of the lists, flag it as an error. Then skip tokens */ /* until one of those in the lists is found */ synchronize(token_list1, token_list2, token_list3) TOKEN_CODE token_list1[], token_list2[], token_list3[]; { BOOLEAN error_flag = (!token_in(token_list1)) && (!token_in(token_list2)) && (!token_in(token_list3)); if (error_flag) { /* error(token == END_OF_FILE ? UNEXPECTED_END_OF_FILE * : UNEXPECTED_TOKEN); */ if (token == END_OF_FILE) error(UNEXPECTED_END_OF_FILE); else if (token == ENDCODE) error(UNEXPECTED_ENDCODE); else error(UNEXPECTED_TOKEN); /* skip tokens to synchronize */ while ((!token_in(token_list1)) && (!token_in(token_list2)) && (!token_in(token_list3)) && ((token != END_OF_FILE) || (token != ENDCODE))) { get_token(); } } } /* end synchronize */ /***************************************************************************/ /***************************************************************************/ /* is_reserved_word() If token is a reserved word, set token and */ /* return TRUE, else return FALSE */ BOOLEAN is_reserved_word() { int word_length = strlen(word_string); RW_STRUCT *rwp; /* is length in range? */ if ((word_length >= MIN_RESERVED_WORD_LENGTH) && (word_length <= MAX_RESERVED_WORD_LENGTH)) { /* check in approp. word list */ for (rwp = rw_table[word_length]; rwp->string != NULL; ++rwp) { if (strcmp(word_string, rwp->string) == 0) { token = rwp->token_code; return(TRUE); } } } return(FALSE); } /* end is_reserved_word */ /***************************************************************************/ /* SOURCE FILE ROUTINES */ /***************************************************************************/ /* open_source_file(name) Open the named file and fetch the first char */ open_source_file(name) char *name; { if ((name == NULL) || ((source_file = fopen(name, "r")) == NULL)) { error(FAILED_SOURCE_FILE_OPEN); exit(-FAILED_SOURCE_FILE_OPEN); } /* get the first character */ bufferp = ""; get_char(); } /* end open_source_file */ /***************************************************************************/ /***************************************************************************/ /* close_source_file() */ close_source_file() { fclose(source_file); } /* end close_source_file */ /***************************************************************************/ /***************************************************************************/ /* get_source_line() Read the next line from the source file. If there is */ /* one, print it out and return TRUE. */ /* return FALSE for end of file */ BOOLEAN get_source_line() { char print_buffer[MAX_SOURCE_LINE_LENGTH + 9]; entry_debug("get_source_line"); if ((fgets(source_buffer, MAX_SOURCE_LINE_LENGTH, source_file)) != NULL) { ++line_number; if (print_flag) { sprintf(print_buffer, "%4d %d: %s", line_number, level, source_buffer); print_line(print_buffer); } exit_debug("get_source_line"); return(TRUE); } exit_debug("get_source_line"); return(FALSE); } /* end get_source_line */ /***************************************************************************/ /* PRINTOUT ROUTINES */ /***************************************************************************/ /* print_line(line) Print out a line. */ print_line(line) char line[]; { char save_ch; char *save_chp = NULL; entry_debug("print_line"); if (strlen(line) > MAX_PRINT_LINE_LENGTH) { save_chp = &line[MAX_PRINT_LINE_LENGTH]; save_ch = *save_chp; *save_chp = EOS; } /* fprintf(filout, "%s", line); */ /* ltx2x change, print to stdout, not o/p file */ /* printf("%s", line); just print to error file */ fprintf(ferr, "%s", line); /* PW addition -> error file */ if (save_chp) *save_chp = save_ch; exit_debug("print_line"); return; } /* end print_line */ /***************************************************************************/ /***************************************************************************/ /* tok_code_to_str(tok) Returns a string wrt a TOKEN_CODE */ char *tok_code_to_str(tok) TOKEN_CODE tok; { if ((tok >= NO_TOKEN) && (tok <= EOTC)) return(tok_code_strs[tok]); return(NULL); } /* end tok_code_to_str */ /***************************************************************************/