# Copyright 2010-2023 Free Software Foundation, Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License,
# or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
#

#######################################################
# From gawk manual
# ord.awk --- do ord and chr

# Global identifiers:
#    _ord_:        numerical values indexed by characters
#    _ord_init:    function to initialize _ord_

BEGIN    { _ord_init() }

function _ord_init(    low, high, i, t)
{
    low = sprintf("%c", 7) # BEL is ascii 7
    if (low == "\a") {    # regular ascii
        low = 0
        high = 127
    } else if (sprintf("%c", 128 + 7) == "\a") {
        # ascii, mark parity
        low = 128
        high = 255
    } else {        # ebcdic(!)
        low = 0
        high = 255
    }

    for (i = low; i <= high; i++) {
        t = sprintf("%c", i)
        _ord_[t] = i
    }
}

function ord(str,    c)
{
    # only first character is of interest
    c = substr(str, 1, 1)
    return _ord_[c]
}

#######################################################


BEGIN {
    bs_escapes["\\n"] = "\n"
    bs_escapes["\\f"] = "\f"
    bs_escapes["\\t"] = "\t"
    bs_escapes["\\\\"] = "\\"
    bs_escapes["\\\""] = "\""
    bs_escapes["\\x20"] = " "

    for (v in bs_escapes) {
        inv_bs_escapes[bs_escapes[v]] = v
    }

    if (srcdir == "") {
        srcdir = "."
    }
    CD = srcdir "/command_data.c"
    CI = srcdir "/command_ids.h"

    print "/* This file automatically generated by command_data.awk */" > CI
    print "#ifndef COMMAND_IDS_H"                 > CI
    print "#define COMMAND_IDS_H"                 > CI
    print                                         > CI
    print "/* Useful aliases */"                  > CI
    print "#define CM_hex_09 CM_TAB"              > CI
    print "#define CM_hex_0a CM_NEWLINE"          > CI
    print "#define CM_hex_20 CM_SPACE"            > CI
    print "#define CM_hex_21 CM_EXCLAMATION_MARK" > CI
    print "#define CM_hex_22 CM_POUND_SIGN"       > CI
    print "#define CM_hex_27 CM_APOSTROPHE"       > CI
    print "#define CM_hex_2a CM_ASTERISK"         > CI
    print "#define CM_hex_2c CM_COMMA"            > CI
    print "#define CM_hex_2d CM_HYPHEN"           > CI
    print "#define CM_hex_2e CM_FULL_STOP"        > CI
    print "#define CM_hex_2f CM_SLASH"            > CI
    print "#define CM_hex_3a CM_COLON"            > CI
    print "#define CM_hex_3d CM_EQUALS"           > CI
    print "#define CM_hex_3f CM_QUESTION_MARK"    > CI
    print "#define CM_hex_40 CM_AT_SIGN"          > CI
    print "#define CM_hex_5c CM_BACKSLASH"        > CI
    print "#define CM_hex_5e CM_CIRCUMFLEX"       > CI
    print "#define CM_hex_60 CM_BACKQUOTE"        > CI
    print "#define CM_hex_7b CM_OPEN_BRACE"       > CI
    print "#define CM_hex_7c CM_VERTICAL_BAR"     > CI
    print "#define CM_hex_7d CM_CLOSE_BRACE"      > CI
    print "#define CM_hex_7e CM_TILDE"            > CI
    print                                         > CI
    print "/* Defined on MS-Windows */"           > CI
    print "#undef CM_NONE"                        > CI
    print                                         > CI
    print "enum command_id {"                     > CI
    print "CM_NONE,"                              > CI
    print                                         > CI

}

!/^$/ && !/^#/ {
    if ($1 in bs_escapes) {
        c = bs_escapes[$1]
    } else {
        c = $1
    }
    commands[c] = $2
    data[c] = $3
    args_nr[c] = $4
}

END {
    print "COMMAND builtin_command_data[] = {" > CD

    print "0, 0, 0, 0," > CD

    # We want the output sorted so we can use bsearch
    PROCINFO["sorted_in"]="@ind_str_asc"
    for (c in commands) {
        # Single character commands with unusual names
        if (c ~ /^[^[:alpha:]]$/) {
                if (c in inv_bs_escapes) {
                    c2 = inv_bs_escapes[c]
                } else
                    c2 = c
                printf "CM_hex_%02x,\n", ord(c) > CI
        } else {
                c2 = c
                print "CM_" c "," > CI
        }

        if (commands[c] != "") {
            split(commands[c], flags_array, ",")
            flags_str = ""
            for (flag_idx in flags_array) {
              if (flag_idx == 1) {
                # first flag is always kept, corresponds to the category
                flags_str = flags_array[flag_idx]
                # all the line and block commands have the no_paragraph flag
                if (flags_str == "line" || flags_str == "block") {
                  old_str = flags_str
                  flags_str = old_str "," "no_paragraph"
                }
              } else {
                # filter out flags not relevant for the XS parser.  Use
                # an array and not a regexp because word boundary matching
                # does not seems to be portable and we want to be sure to
                # match correctly even if an ignored flag is a substring of
                # another flag
                if (flags_array[flag_idx] != "letter_no_arg" \
                    && flags_array[flag_idx] != "inline_format" \
                    && flags_array[flag_idx] != "inline_conditional" \
                    && flags_array[flag_idx] != "in_index" \
                    && flags_array[flag_idx] != "in_def" \
                    && flags_array[flag_idx] != "brace_code" \
                    && flags_array[flag_idx] != "explained" \
                    && flags_array[flag_idx] != "formatted_line" \
                    && flags_array[flag_idx] != "formatted_nobrace" \
                    && flags_array[flag_idx] != "formattable_line" \
                    && flags_array[flag_idx] != "non_formatted_block" \
                    && flags_array[flag_idx] != "preamble") {
                  old_str = flags_str
                  flags_str = old_str "," flags_array[flag_idx]
                }
              }
            }
        }

        if (flags_str == "") {
            flags = "0"
        } else {
            flags = "CF_" flags_str
            gsub (/,/, " | CF_", flags)
        }

        if (data[c] != "") {
            command_data = data[c]
        } else {
            command_data = "0"
        }

        if (args_nr[c] != "") {
            args_nr_data = args_nr[c]
        } else {
            where = 0
            if (commands[c] != "") {
              where = match(commands[c], /block/)
              if (where == 0) {
                where = match(command_data, /^NOBRACE_/)
              }
            }
            if (where != 0 || command_data == "BRACE_noarg" || command_data == "LINE_lineraw" ) {
              args_nr_data = "0"
            } else {
              args_nr_data = "1"
            }
        }
        print "\"" c2 "\", " flags ", " command_data ", " args_nr_data "," > CD
    }
    print "};" > CD
    print "};" > CI
    print "#endif" > CI
}