-- The semantic analysis step of static analysis determines the meaning of the different function calls.

local lexical_analysis = require("explcheck-lexical-analysis")
local syntactic_analysis = require("explcheck-syntactic-analysis")
local get_option = require("explcheck-config").get_option
local ranges = require("explcheck-ranges")
local parsers = require("explcheck-parsers")
local identity = require("explcheck-utils").identity

local get_token_byte_range = lexical_analysis.get_token_byte_range
local is_token_simple = lexical_analysis.is_token_simple
local token_types = lexical_analysis.token_types

local extract_text_from_tokens = syntactic_analysis.extract_text_from_tokens

local CONTROL_SEQUENCE = token_types.CONTROL_SEQUENCE

local new_range = ranges.new_range
local range_flags = ranges.range_flags

local INCLUSIVE = range_flags.INCLUSIVE
local MAYBE_EMPTY = range_flags.MAYBE_EMPTY

local call_types = syntactic_analysis.call_types
local get_calls = syntactic_analysis.get_calls
local get_call_token_range = syntactic_analysis.get_call_token_range
local transform_replacement_text_tokens = syntactic_analysis.transform_replacement_text_tokens

local CALL = call_types.CALL
local OTHER_TOKENS = call_types.OTHER_TOKENS

local lpeg = require("lpeg")

local statement_types = {
  FUNCTION_DEFINITION = "function definition",
  FUNCTION_VARIANT_DEFINITION = "function variant definition",
  OTHER_STATEMENT = "other statement",
  OTHER_TOKENS_SIMPLE = "block of other simple tokens",
  OTHER_TOKENS_COMPLEX = "block of other complex tokens",
}

local FUNCTION_DEFINITION = statement_types.FUNCTION_DEFINITION
local FUNCTION_VARIANT_DEFINITION = statement_types.FUNCTION_VARIANT_DEFINITION
local OTHER_STATEMENT = statement_types.OTHER_STATEMENT
local OTHER_TOKENS_SIMPLE = statement_types.OTHER_TOKENS_SIMPLE
local OTHER_TOKENS_COMPLEX = statement_types.OTHER_TOKENS_COMPLEX

local statement_subtypes = {
  FUNCTION_DEFINITION = {
    DIRECT = "direct function definition",
    INDIRECT = "indirect function definition",
  }
}

local FUNCTION_DEFINITION_DIRECT = statement_subtypes.FUNCTION_DEFINITION.DIRECT
local FUNCTION_DEFINITION_INDIRECT = statement_subtypes.FUNCTION_DEFINITION.INDIRECT

local statement_confidences = {
  DEFINITELY = 1,
  MAYBE = 0.5,
  NONE = 0,
}

local DEFINITELY = statement_confidences.DEFINITELY
local MAYBE = statement_confidences.MAYBE
local NONE = statement_confidences.NONE

local csname_types = {
  TEXT = "direct text representation of a control sequence name or its part, usually paired with confidence DEFINITELY",
  PATTERN = "a PEG pattern that recognizes different control sequences or their parts, usually paired with confidence MAYBE"
}

local TEXT = csname_types.TEXT
local PATTERN = csname_types.PATTERN

-- Determine the meaning of function calls and register any issues.
local function semantic_analysis(pathname, content, issues, results, options)

  -- Determine the type of a span of tokens as either "simple text" [1, p. 383] with no expected side effects or
  -- a more complex material that may have side effects and presents a boundary between chunks of well-understood
  -- expl3 statements.
  --
  --  [1]: Donald Ervin Knuth. 1986. TeX: The Program. Addison-Wesley, USA.
  --
  local function classify_tokens(tokens, token_range)
    for _, token in token_range:enumerate(tokens) do
      if not is_token_simple(token) then  -- complex material
        return OTHER_TOKENS_COMPLEX
      end
    end
    return OTHER_TOKENS_SIMPLE  -- simple material
  end

  -- Extract statements from function calls and record them. For all identified function definitions, also record replacement texts.
  local function record_statements_and_replacement_texts(tokens, transformed_tokens, calls, first_map_back, first_map_forward)
    local statements = {}
    local replacement_text_tokens = {}
    for call_number, call in ipairs(calls) do

      local call_range = new_range(call_number, call_number, INCLUSIVE, #calls)
      local byte_range = call.token_range:new_range_from_subranges(get_token_byte_range(tokens), #content)

      -- Try and convert tokens from an argument into a text.
      local function extract_text_from_argument(argument)
        assert(lpeg.match(parsers.n_type_argument_specifier, argument.specifier) ~= nil)
        return extract_text_from_tokens(argument.token_range, transformed_tokens, first_map_forward)
      end

      -- Extract the name of a control sequence from a call argument.
      local function extract_csname_from_argument(argument)
        local csname
        if argument.specifier == "N" then
          local csname_token = transformed_tokens[first_map_forward(argument.token_range:start())]
          if csname_token.type ~= CONTROL_SEQUENCE then  -- the N-type argument is not a control sequence, give up
            return nil
          end
          csname = csname_token.payload
        elseif argument.specifier == "c" then
          csname = extract_text_from_argument(argument)
          if csname == nil then  -- the c-type argument contains complex material, give up
            return nil
          end
        else
          return nil
        end
        assert(csname ~= nil)
        return csname
      end

      -- Split an expl3 control sequence name to a stem and the argument specifiers.
      local function parse_expl3_csname(csname)
        local _, _, csname_stem, argument_specifiers = csname:find("([^:]*):([^:]*)")
        return csname_stem, argument_specifiers
      end

      -- Determine whether a function is private or public based on its name.
      local function is_function_private(csname)
        return csname:sub(1, 2) == "__"
      end

      -- Replace the argument specifiers in an expl3 control sequence name.
      local function replace_argument_specifiers(csname_stem, argument_specifiers)
        local csname
        if type(argument_specifiers) == 'string' then
          csname = string.format("%s:%s", csname_stem, argument_specifiers)
        else
          local transcript = string.format("%s:%s", csname_stem, argument_specifiers.transcript)
          if argument_specifiers.type == TEXT then
            csname = {
              payload = string.format("%s:%s", csname_stem, argument_specifiers.payload),
              transcript = transcript,
              type = TEXT
            }
          elseif argument_specifiers.type == PATTERN then
            csname = {
              payload = lpeg.P(csname_stem) * lpeg.P(":") * argument_specifiers.payload,
              transcript = transcript,
              type = PATTERN
            }
          else
            error('Unexpected argument specifiers type "' .. argument_specifiers.type .. '"')
          end
        end
        return csname
      end

      -- Determine the control sequence name of a conditional function given a base control sequence name and a condition.
      local function get_conditional_function_csname(csname_stem, argument_specifiers, condition)
        local csname
        if condition == "p" then  -- predicate function
          local format = "%s_p:%s"
          if type(argument_specifiers) == 'string' then
            csname = string.format(format, csname_stem, argument_specifiers)
          else
            local transcript = string.format(format, csname_stem, argument_specifiers.transcript)
            if argument_specifiers.type == TEXT then
              csname = {
                payload = string.format(format, csname_stem, argument_specifiers.payload),
                transcript = transcript,
                type = TEXT
              }
            elseif argument_specifiers.type == PATTERN then
              csname = {
                payload = lpeg.P(csname_stem) * lpeg.P("_p:") * argument_specifiers.payload,
                transcript = transcript,
                type = PATTERN
              }
            else
              error('Unexpected argument specifiers type "' .. argument_specifiers.type .. '"')
            end
          end
        elseif condition == "T" then  -- true-branch conditional function
          local format = "%s:%sT"
          if type(argument_specifiers) == 'string' then
            csname = string.format(format, csname_stem, argument_specifiers)
          else
            local transcript = string.format(format, csname_stem, argument_specifiers.transcript)
            if argument_specifiers.type == TEXT then
              csname = {
                payload = string.format(format, csname_stem, argument_specifiers.payload),
                transcript = transcript,
                type = TEXT
              }
            elseif argument_specifiers.type == PATTERN then
              csname = {
                payload = lpeg.P(csname_stem) * lpeg.P(":") * argument_specifiers.payload * lpeg.P("T"),
                transcript = transcript,
                type = PATTERN
              }
            else
              error('Unexpected argument specifiers type "' .. argument_specifiers.type .. '"')
            end
          end
        elseif condition == "F" then  -- false-branch conditional function
          local format = "%s:%sF"
          if type(argument_specifiers) == 'string' then
            csname = string.format(format, csname_stem, argument_specifiers)
          else
            local transcript = string.format(format, csname_stem, argument_specifiers.transcript)
            if argument_specifiers.type == TEXT then
              csname = {
                payload = string.format(format, csname_stem, argument_specifiers.payload),
                transcript = transcript,
                type = TEXT
              }
            elseif argument_specifiers.type == PATTERN then
              csname = {
                payload = lpeg.P(csname_stem) * lpeg.P(":") * argument_specifiers.payload * lpeg.P("F"),
                transcript = transcript,
                type = PATTERN
              }
            else
              error('Unexpected argument specifiers type "' .. argument_specifiers.type .. '"')
            end
          end
        elseif condition == "TF" then  -- true-and-false-branch conditional function
          local format = "%s:%sTF"
          if type(argument_specifiers) == 'string' then
            csname = string.format(format, csname_stem, argument_specifiers)
          else
            local transcript = string.format(format, csname_stem, argument_specifiers.transcript)
            if argument_specifiers.type == TEXT then
              csname = {
                payload = string.format(format, csname_stem, argument_specifiers.payload),
                transcript = transcript,
                type = TEXT
              }
            elseif argument_specifiers.type == PATTERN then
              csname = {
                payload = lpeg.P(csname_stem) * lpeg.P(":") * argument_specifiers.payload * lpeg.P("TF"),
                transcript = transcript,
                type = PATTERN,
              }
            else
              error('Unexpected argument specifiers type "' .. argument_specifiers.type .. '"')
            end
          end
        else
          error('Unexpected condition "' .. condition .. '"')
        end
        return csname
      end

      -- Try and extract a list of conditions in a conditional function (variant) definition.
      -- Together with the conditions, include a measurement of confidence about the correctness of the extracted information.
      local function parse_conditions(argument)
        local conditions

        -- try to determine the list of conditions
        local conditions_text, condition_list
        if argument.specifier ~= "n" then  -- conditions are hidden behind expansion, assume all conditions with lower confidence
          goto unknown_conditions
        end
        conditions_text = extract_text_from_argument(argument)
        if conditions_text == nil then  -- failed to read conditions
          goto unknown_conditions  -- assume all conditions with lower confidence
        end
        condition_list = lpeg.match(parsers.conditions, conditions_text)
        if condition_list == nil then  -- cound not parse conditions, give up
          return nil
        end
        conditions = {}
        for _, condition in ipairs(condition_list) do
          table.insert(conditions, {condition, DEFINITELY})
        end
        goto done_parsing

        ::unknown_conditions::
        -- assume all possible conditions with lower confidence
        conditions = {{"p", MAYBE}, {"T", MAYBE}, {"F", MAYBE}, {"TF", MAYBE}}

        ::done_parsing::
        return conditions
      end

      -- Try and extract a list of variant argument specifiers in a (conditional) function variant definition.
      -- Together with the argument specifiers, include a measurement of confidence about the correctness of the extracted information.
      local function parse_variant_argument_specifiers(csname, argument)
        -- extract the argument specifiers from the csname
        local _, base_argument_specifiers = parse_expl3_csname(csname)
        if base_argument_specifiers == nil then
          return nil  -- we couldn't parse the csname, give up
        end

        local variant_argument_specifiers

        -- try to determine all sets of variant argument specifiers
        local variant_argument_specifiers_text, variant_argument_specifiers_list
        if argument.specifier ~= "n" then  -- specifiers are hidden behind expansion, assume all possibilities with lower confidence
          goto unknown_argument_specifiers
        end
        variant_argument_specifiers_text = extract_text_from_argument(argument)
        if variant_argument_specifiers_text == nil then  -- failed to read specifiers
          goto unknown_argument_specifiers  -- assume all specifiers with lower confidence
        end
        variant_argument_specifiers_list = lpeg.match(parsers.variant_argument_specifiers, variant_argument_specifiers_text)
        if variant_argument_specifiers_list == nil then  -- cound not parse specifiers, assume all possibilities with lower confidence
          goto unknown_argument_specifiers
        end
        variant_argument_specifiers = {}
        for _, argument_specifiers in ipairs(variant_argument_specifiers_list) do
          if #argument_specifiers ~= #base_argument_specifiers then
            if #argument_specifiers < #base_argument_specifiers then  -- variant argument specifiers are shorter than base specifiers
              argument_specifiers = string.format(
                "%s%s",  -- treat the variant specifiers as a prefix with the rest filled in with the base specifiers
                argument_specifiers, base_argument_specifiers:sub(#argument_specifiers + 1)
              )
            else  -- variant argument specifiers are longer than base specifiers
              issues:add("t403", "function variant of incompatible type", byte_range)
              return nil  -- give up
            end
          end
          assert(#argument_specifiers == #base_argument_specifiers)
          for i = 1, #argument_specifiers do
            local base_argument_specifier = base_argument_specifiers:sub(i, i)
            local argument_specifier = argument_specifiers:sub(i, i)
            if base_argument_specifier == argument_specifier then  -- variant argument specifier is same as base argument specifier
              goto continue  -- skip further checks
            end
            local any_compatible_specifier = false
            for _, compatible_specifier in ipairs(lpeg.match(parsers.compatible_argument_specifiers, base_argument_specifier)) do
              if argument_specifier == compatible_specifier then  -- variant argument specifier is compatible with base argument specifier
                any_compatible_specifier = true
                break  -- skip further checks
              end
            end
            if not any_compatible_specifier then
              local any_deprecated_specifier = false
              for _, deprecated_specifier in ipairs(lpeg.match(parsers.deprecated_argument_specifiers, base_argument_specifier)) do
                if argument_specifier == deprecated_specifier then  -- variant argument specifier is deprecated regarding the base specifier
                  any_deprecated_specifier = true
                  break  -- skip further checks
                end
              end
              if any_deprecated_specifier then
                issues:add("w410", "function variant of deprecated type", byte_range)
              else
                issues:add("t403", "function variant of incompatible type", byte_range)
                return nil  -- variant argument specifier is incompatible with base argument specifier, give up
              end
            end
            ::continue::
          end
          table.insert(variant_argument_specifiers, {
            payload = argument_specifiers,
            transcript = argument_specifiers,
            type = TEXT,
            confidence = DEFINITELY
          })
        end
        goto done_parsing

        ::unknown_argument_specifiers::
        -- assume all possible sets of variant argument specifiers with lower confidence
        do
          variant_argument_specifiers = {}
          local compatible_specifier_pattern, compatible_specifier_transcripts = parsers.success, {}
          for i = 1, #base_argument_specifiers do
            local base_argument_specifier = base_argument_specifiers:sub(i, i)
            local compatible_specifiers = table.concat(lpeg.match(parsers.compatible_argument_specifiers, base_argument_specifier))
            compatible_specifier_pattern = compatible_specifier_pattern * lpeg.S(compatible_specifiers)
            local compatible_specifier_transcript = string.format('[%s]', compatible_specifiers)
            table.insert(compatible_specifier_transcripts, compatible_specifier_transcript)
          end
          local compatible_specifiers_transcript = table.concat(compatible_specifier_transcripts)
          table.insert(variant_argument_specifiers, {
            payload = compatible_specifier_pattern,
            transcript = compatible_specifiers_transcript,
            type = PATTERN,
            confidence = MAYBE
          })
        end

        ::done_parsing::
        return variant_argument_specifiers
      end

      if call.type == CALL then  -- a function call
        -- Ignore error S204 (Missing stylistic whitespaces) in Lua code.
        for _, arguments_number in ipairs(lpeg.match(parsers.expl3_function_call_with_lua_code_argument_csname, call.csname)) do
          local lua_code_argument = call.arguments[arguments_number]
          if #lua_code_argument.token_range > 0 then
            local lua_code_byte_range = lua_code_argument.token_range:new_range_from_subranges(get_token_byte_range(tokens), #content)
            issues:ignore('s204', lua_code_byte_range)
          end
        end

        local function_variant_definition = lpeg.match(parsers.expl3_function_variant_definition_csname, call.csname)
        local function_definition = lpeg.match(parsers.expl3_function_definition_csname, call.csname)

        -- Process a function variant definition.
        if function_variant_definition ~= nil then
          local is_conditional = table.unpack(function_variant_definition)
          -- determine the name of the defined function
          local base_csname_argument = call.arguments[1]
          local base_csname = extract_csname_from_argument(base_csname_argument)
          if base_csname == nil then  -- we couldn't extract the csname, give up
            goto other_statement
          end
          local base_csname_stem, base_argument_specifiers = parse_expl3_csname(base_csname)
          if base_csname_stem == nil then  -- we couldn't parse the csname, give up
            goto other_statement
          end
          -- determine the variant argument specifiers
          local variant_argument_specifiers = parse_variant_argument_specifiers(base_csname, call.arguments[2])
          if variant_argument_specifiers == nil then  -- we couldn't parse the variant argument specifiers, give up
            goto other_statement
          end
          -- determine all defined csnames
          local defined_csnames = {}
          for _, argument_specifiers in ipairs(variant_argument_specifiers) do
            if is_conditional then  -- conditional function
              -- determine the conditions
              local conditions = parse_conditions(call.arguments[#call.arguments])
              if conditions == nil then  -- we couldn't determine the conditions, give up
                goto other_statement
              end
              -- determine the defined csnames
              for _, condition_table in ipairs(conditions) do
                local condition, condition_confidence = table.unpack(condition_table)
                local base_conditional_csname = get_conditional_function_csname(base_csname_stem, base_argument_specifiers, condition)
                local defined_conditional_csname = get_conditional_function_csname(base_csname_stem, argument_specifiers, condition)
                local confidence = math.min(argument_specifiers.confidence, condition_confidence)
                if base_conditional_csname ~= defined_conditional_csname then
                  table.insert(defined_csnames, {base_conditional_csname, defined_conditional_csname, confidence})
                end
              end
            else  -- non-conditional function
              local defined_csname = replace_argument_specifiers(base_csname_stem, argument_specifiers)
              if base_csname ~= defined_csname then
                table.insert(defined_csnames, {base_csname, defined_csname, argument_specifiers.confidence})
              end
            end
          end
          -- record function variant definition statements for all effectively defined csnames
          for _, defined_csname_table in ipairs(defined_csnames) do  -- lua
            local effective_base_csname, defined_csname, confidence = table.unpack(defined_csname_table)
            local statement = {
              type = FUNCTION_VARIANT_DEFINITION,
              call_range = call_range,
              confidence = confidence,
              -- The following attributes are specific to the type.
              base_csname = effective_base_csname,
              defined_csname = defined_csname,
              is_private = is_function_private(base_csname),
              is_conditional = is_conditional,
            }
            table.insert(statements, statement)
          end
          goto continue
        end

        -- Process a function definition.
        if function_definition ~= nil then
          local is_direct = table.unpack(function_definition)
          -- Process a direct function definition.
          if is_direct then
            -- determine the properties of the defined function
            local _, _, is_creator_function = table.unpack(function_definition)
            local is_conditional, maybe_redefinition, is_global, is_protected, is_nopar
            local defined_csname_argument, num_parameters
            if is_creator_function == true then  -- direct application of a creator function
              defined_csname_argument = call.arguments[1]
              _, is_conditional, _, maybe_redefinition, is_global, is_protected, is_nopar = table.unpack(function_definition)
            else  -- indirect application of a creator function
              defined_csname_argument = call.arguments[2]
              local num_parameter_argument = call.arguments[3]
              if num_parameter_argument ~= nil and num_parameter_argument.specifier == "n" then
                local num_parameters_text = extract_text_from_argument(num_parameter_argument)
                if num_parameters_text ~= nil then
                  num_parameters = tonumber(num_parameters_text)
                end
              end
              local creator_function_csname = extract_csname_from_argument(call.arguments[1])
              if creator_function_csname == nil then  -- couldn't determine the name of the creator function, give up
                goto other_statement
              end
              local actual_function_definition = lpeg.match(parsers.expl3_function_definition_csname, creator_function_csname)
              if actual_function_definition == nil then  -- couldn't understand the creator function, give up
                goto other_statement
              end
              _, is_conditional, _, maybe_redefinition, is_global, is_protected, is_nopar = table.unpack(actual_function_definition)
            end
            -- determine the name of the defined function
            local defined_csname = extract_csname_from_argument(defined_csname_argument)
            if defined_csname == nil then  -- we couldn't extract the csname, give up
              goto other_statement
            end
            local defined_csname_stem, argument_specifiers = parse_expl3_csname(defined_csname)
            -- determine the replacement text
            local replacement_text_number
            local replacement_text_argument = call.arguments[#call.arguments]
            do
              if replacement_text_argument.specifier ~= "n" then  -- replacement text is hidden behind expansion
                goto skip_replacement_text  -- record partial information
              end
              -- determine the number of parameters of the defined function
              local function update_num_parameters(updated_num_parameters)
                assert(updated_num_parameters ~= nil)
                if num_parameters == nil or updated_num_parameters > num_parameters then  -- trust the highest guess
                  num_parameters = updated_num_parameters
                end
              end
              if argument_specifiers ~= nil and lpeg.match(parsers.N_or_n_type_argument_specifiers, argument_specifiers) ~= nil then
                update_num_parameters(#argument_specifiers)
              end
              for _, argument in ipairs(call.arguments) do  -- next, try to look for p-type "TeX parameter" argument specifiers
                if argument.specifier == "p" and argument.num_parameters ~= nil then
                  update_num_parameters(argument.num_parameters)
                  break
                end
              end
              if num_parameters == nil then  -- we couldn't determine the number of parameters
                goto skip_replacement_text  -- record partial information
              end
              -- parse the replacement text and record the function definition
              local mapped_replacement_text_token_range = new_range(
                first_map_forward(replacement_text_argument.token_range:start()),
                first_map_forward(replacement_text_argument.token_range:stop()),
                INCLUSIVE + MAYBE_EMPTY,
                #transformed_tokens
              )
              local doubly_transformed_tokens, second_map_back, second_map_forward = transform_replacement_text_tokens(
                content,
                transformed_tokens,
                issues,
                num_parameters,
                mapped_replacement_text_token_range
              )
              if doubly_transformed_tokens == nil then  -- we couldn't parse the replacement text
                goto skip_replacement_text  -- record partial information
              end
              local function map_back(...) return first_map_back(second_map_back(...)) end
              local function map_forward(...) return second_map_forward(first_map_forward(...)) end
              table.insert(replacement_text_tokens, {
                token_range = replacement_text_argument.token_range,
                transformed_tokens = doubly_transformed_tokens,
                map_back = map_back,
                map_forward = map_forward,
              })
              replacement_text_number = #replacement_text_tokens
            end
            ::skip_replacement_text::
            -- determine all effectively defined csnames
            local effectively_defined_csnames = {}
            if is_conditional then  -- conditional function
              -- determine the conditions
              local conditions = parse_conditions(call.arguments[#call.arguments - 1])
              if conditions == nil then  -- we couldn't determine the conditions, give up
                goto other_statement
              end
              -- determine the defined csnames
              for _, condition_table in ipairs(conditions) do
                local condition, confidence = table.unpack(condition_table)
                if condition == "p" and is_protected then
                  issues:add("e404", "protected predicate function", byte_range)
                end
                local effectively_defined_csname = get_conditional_function_csname(defined_csname_stem, argument_specifiers, condition)
                table.insert(effectively_defined_csnames, {effectively_defined_csname, confidence})
              end
            else  -- non-conditional function
              effectively_defined_csnames = {{defined_csname, DEFINITELY}}
            end
            -- record function definition statements for all effectively defined csnames
            for _, effectively_defined_csname_table in ipairs(effectively_defined_csnames) do  -- lua
              local effectively_defined_csname, confidence = table.unpack(effectively_defined_csname_table)
              local statement = {
                type = FUNCTION_DEFINITION,
                call_range = call_range,
                confidence = confidence,
                -- The following attributes are specific to the type.
                subtype = FUNCTION_DEFINITION_DIRECT,
                maybe_redefinition = maybe_redefinition,
                is_private = is_function_private(defined_csname),
                is_global = is_global,
                defined_csname = effectively_defined_csname,
                -- The following attributes are specific to the subtype.
                is_conditional = is_conditional,
                is_protected = is_protected,
                is_nopar = is_nopar,
                replacement_text_number = replacement_text_number,
                replacement_text_argument = replacement_text_argument,
              }
              table.insert(statements, statement)
            end
          else
            -- Process an indirect function definition.
            local _, is_conditional, maybe_redefinition, is_global = table.unpack(function_definition)
            -- determine the name of the defined function
            local defined_csname_argument = call.arguments[1]
            local defined_csname = extract_csname_from_argument(defined_csname_argument)
            if defined_csname == nil then  -- we couldn't extract the csname, give up
              goto other_statement
            end
            -- determine the name of the base function
            local base_csname_argument = call.arguments[2]
            local base_csname = extract_csname_from_argument(base_csname_argument)
            if base_csname == nil then  -- we couldn't extract the csname, give up
              goto other_statement
            end
            -- determine all effectively defined csnames and effective base csnames
            local effective_defined_and_base_csnames = {}
            if is_conditional then  -- conditional function
              -- parse the base and defined csnames
              local defined_csname_stem, defined_argument_specifiers = parse_expl3_csname(defined_csname)
              if defined_csname_stem == nil then  -- we couldn't parse the defined csname, give up
                goto other_statement
              end
              local base_csname_stem, base_argument_specifiers = parse_expl3_csname(base_csname)
              if base_csname_stem == nil then  -- we couldn't parse the base csname, give up
                goto other_statement
              end
              -- determine the conditions
              local conditions = parse_conditions(call.arguments[#call.arguments - 1])
              if conditions == nil then  -- we couldn't determine the conditions, give up
                goto other_statement
              end
              -- determine the defined and base csnames
              for _, condition_table in ipairs(conditions) do
                local condition, confidence = table.unpack(condition_table)
                local effectively_defined_csname
                  = get_conditional_function_csname(defined_csname_stem, defined_argument_specifiers, condition)
                local effective_base_csname
                  = get_conditional_function_csname(base_csname_stem, base_argument_specifiers, condition)
                table.insert(effective_defined_and_base_csnames, {effectively_defined_csname, effective_base_csname, confidence})
              end
            else  -- non-conditional function
              effective_defined_and_base_csnames = {{defined_csname, base_csname, DEFINITELY}}
            end
            -- record function definition statements for all effectively defined csnames
            for _, effective_defined_and_base_csname_table in ipairs(effective_defined_and_base_csnames) do  -- lua
              local effectively_defined_csname, effective_base_csname, confidence
                = table.unpack(effective_defined_and_base_csname_table)
              local statement = {
                type = FUNCTION_DEFINITION,
                call_range = call_range,
                confidence = confidence,
                -- The following attributes are specific to the type.
                subtype = FUNCTION_DEFINITION_INDIRECT,
                maybe_redefinition = maybe_redefinition,
                is_private = is_function_private(defined_csname),
                is_global = is_global,
                defined_csname = effectively_defined_csname,
                -- The following attributes are specific to the subtype.
                base_csname = effective_base_csname,
                is_conditional = is_conditional,
              }
              table.insert(statements, statement)
            end
          end
          goto continue
        end

        ::other_statement::
        local statement = {
          type = OTHER_STATEMENT,
          call_range = call_range,
          confidence = NONE,
        }
        table.insert(statements, statement)
      elseif call.type == OTHER_TOKENS then  -- other tokens
        local statement_type = classify_tokens(tokens, call.token_range)
        local statement = {
          type = statement_type,
          call_range = call_range,
          confidence = NONE,
        }
        table.insert(statements, statement)
      else
        error('Unexpected call type "' .. call.type .. '"')
      end
      ::continue::
    end
    return statements, replacement_text_tokens
  end

  -- Extract statements from function calls. For all identified function definitions, record replacement texts and recursively
  -- apply syntactic and semantic analysis on them.
  local function get_statements(tokens, groupings, calls)

    -- First, record top-level statements.
    local replacement_texts = {tokens = nil, calls = {}, statements = {}, nesting_depth = {}}
    local statements
    statements, replacement_texts.tokens = record_statements_and_replacement_texts(tokens, tokens, calls, identity, identity)

    -- Then, process any new replacement texts until convergence.
    local previous_num_replacement_texts = 0
    local current_num_replacement_texts = #replacement_texts.tokens
    local current_nesting_depth = 1
    while previous_num_replacement_texts < current_num_replacement_texts do
      for replacement_text_number = previous_num_replacement_texts + 1, current_num_replacement_texts do
        local replacement_text_tokens = replacement_texts.tokens[replacement_text_number]
        -- record the current nesting depth with the replacement text
        table.insert(replacement_texts.nesting_depth, current_nesting_depth)
        -- extract nested calls from the replacement text using syntactic analysis
        local nested_calls = get_calls(
          tokens,
          replacement_text_tokens.transformed_tokens,
          replacement_text_tokens.token_range,
          replacement_text_tokens.map_back,
          replacement_text_tokens.map_forward,
          issues,
          groupings,
          content
        )
        table.insert(replacement_texts.calls, nested_calls)
        -- extract nested statements and replacement texts from the nested calls using semactic analysis
        local nested_statements, nested_replacement_text_tokens = record_statements_and_replacement_texts(
          tokens,
          replacement_text_tokens.transformed_tokens,
          nested_calls,
          replacement_text_tokens.map_back,
          replacement_text_tokens.map_forward
        )
        for _, nested_statement in ipairs(nested_statements) do
          if nested_statement.type == FUNCTION_DEFINITION
              and nested_statement.subtype == FUNCTION_DEFINITION_DIRECT
              and nested_statement.replacement_text_number ~= nil then
            -- make the reference to the replacement text absolute instead of relative
            nested_statement.replacement_text_number = nested_statement.replacement_text_number + current_num_replacement_texts
          end
        end
        table.insert(replacement_texts.statements, nested_statements)
        for _, nested_tokens in ipairs(nested_replacement_text_tokens) do
          table.insert(replacement_texts.tokens, nested_tokens)
        end
      end
      previous_num_replacement_texts = current_num_replacement_texts
      current_num_replacement_texts = #replacement_texts.tokens
      current_nesting_depth = current_nesting_depth + 1
    end

    assert(#replacement_texts.tokens == current_num_replacement_texts)
    assert(#replacement_texts.calls == current_num_replacement_texts)
    assert(#replacement_texts.statements == current_num_replacement_texts)
    assert(#replacement_texts.nesting_depth == current_num_replacement_texts)

    return statements, replacement_texts
  end

  -- Extract statements from function calls.
  local statements = {}
  local replacement_texts = {}
  for part_number, part_calls in ipairs(results.calls) do
    local part_tokens = results.tokens[part_number]
    local part_groupings = results.groupings[part_number]
    local part_statements, part_replacement_texts = get_statements(part_tokens, part_groupings, part_calls)
    table.insert(statements, part_statements)
    table.insert(replacement_texts, part_replacement_texts)
  end

  assert(#statements == #results.calls)
  assert(#statements == #replacement_texts)

  -- Report issues that are apparent after the semantic analysis.
  --- Collect all segments of top-level and nested tokens, calls, and statements.
  local token_segments, call_segments, statement_segments = {}, {}, {}
  for part_number, part_calls in ipairs(results.calls) do
    local part_statements = statements[part_number]
    table.insert(call_segments, part_calls)
    table.insert(statement_segments, part_statements)
    local part_tokens = results.tokens[part_number]
    table.insert(token_segments, {part_tokens, part_tokens, identity})
    local part_replacement_texts = replacement_texts[part_number]
    for replacement_text_number, nested_calls in ipairs(part_replacement_texts.calls) do
      local nested_statements = part_replacement_texts.statements[replacement_text_number]
      table.insert(call_segments, nested_calls)
      table.insert(statement_segments, nested_statements)
      local replacement_text_tokens = part_replacement_texts.tokens[replacement_text_number]
      table.insert(token_segments, {part_tokens, replacement_text_tokens.transformed_tokens, replacement_text_tokens.map_forward})
    end
  end

  --- Make a pass over the segments, building up information.
  local defined_private_functions = {}

  ---- Collect information about symbols that were definitely defined.
  local called_functions_and_variants = {}
  local defined_private_function_variant_texts, defined_private_function_variant_pattern = {}, parsers.fail
  local defined_private_function_variant_byte_ranges = {}
  local variant_base_csnames, indirect_definition_base_csnames = {}, {}

  ---- Collect information about symbols that may have been defined.
  local maybe_defined_csname_texts, maybe_defined_csname_pattern = {}, parsers.fail
  local maybe_used_csname_texts, maybe_used_csname_pattern = {}, parsers.fail

  for segment_number, segment_statements in ipairs(statement_segments) do
    local segment_calls = call_segments[segment_number]
    local segment_tokens, segment_transformed_tokens, map_forward = table.unpack(token_segments[segment_number])

    -- Convert tokens from a range into a PEG pattern.
    local function extract_pattern_from_tokens(token_range)
      local pattern, transcripts, num_simple_tokens = parsers.success, {}, 0
      local previous_token_was_simple = true
      for _, token in token_range:enumerate(segment_transformed_tokens, map_forward) do
        if is_token_simple(token) then  -- simple material
          pattern = pattern * lpeg.P(token.payload)
          table.insert(transcripts, token.payload)
          num_simple_tokens = num_simple_tokens + 1
          previous_token_was_simple = true
        else  -- complex material
          if previous_token_was_simple then
            pattern = pattern * parsers.any^0
            table.insert(transcripts, "*")
          end
          previous_token_was_simple = false
        end
      end
      local transcript = table.concat(transcripts)
      return pattern, transcript, num_simple_tokens
    end

    -- Try and convert tokens from a range into a csname.
    local function extract_csname_from_tokens(token_range)
      local text = extract_text_from_tokens(token_range, segment_transformed_tokens, map_forward)
      local csname
      if text ~= nil then  -- simple material
        csname = {
          payload = text,
          transcript = text,
          type = TEXT
        }
      else  -- complex material
        local pattern, transcript, num_simple_tokens = extract_pattern_from_tokens(token_range)
        if num_simple_tokens < get_option("min_simple_tokens_in_csname_pattern", options, pathname) then  -- too few simple tokens, give up
          return nil
        end
        csname = {
          payload = pattern,
          transcript = transcript,
          type = PATTERN
        }
      end
      return csname
    end

    -- Process an argument and record control sequence name usage and definitions.
    local function process_argument_tokens(argument)
      -- Record control sequence name usage.
      --- Extract text from tokens within c- and v-type arguments.
      if argument.specifier == "c" or argument.specifier == "v" then
        local csname = extract_csname_from_tokens(argument.token_range)
        if csname ~= nil then
          if csname.type == TEXT then
            maybe_used_csname_texts[csname.payload] = true
          elseif csname.type == PATTERN then
            maybe_used_csname_pattern = maybe_used_csname_pattern + csname.payload
          end
        end
      end
      --- Scan control sequence tokens within N- and n-type arguments.
      if lpeg.match(parsers.N_or_n_type_argument_specifier, argument.specifier) ~= nil then
        for _, token in argument.token_range:enumerate(segment_transformed_tokens, map_forward) do
          if token.type == CONTROL_SEQUENCE then
            maybe_used_csname_texts[token.payload] = true
          end
        end
      end
      -- Record control sequence name definitions.
      --- Scan control sequence tokens within N- and n-type arguments.
      if lpeg.match(parsers.N_or_n_type_argument_specifier, argument.specifier) ~= nil then
        for token_number, token in argument.token_range:enumerate(segment_transformed_tokens, map_forward) do
          if token.type == CONTROL_SEQUENCE then
            if token_number + 1 <= #segment_transformed_tokens then
              local next_token = segment_transformed_tokens[token_number + 1]
              if (
                    next_token.type == CONTROL_SEQUENCE
                    and lpeg.match(parsers.expl3_function_definition_csname, token.payload) ~= nil
                  ) then
                maybe_defined_csname_texts[next_token.payload] = true
              end
            end
          end
        end
      end
    end

    for _, statement in ipairs(segment_statements) do
      local token_range = statement.call_range:new_range_from_subranges(get_call_token_range(segment_calls), #segment_tokens)
      local byte_range = token_range:new_range_from_subranges(get_token_byte_range(segment_tokens), #content)
      -- Process a function variant definition.
      if statement.type == FUNCTION_VARIANT_DEFINITION then
        -- Record base control sequence names of variants, both as control sequence name usage and separately.
        table.insert(variant_base_csnames, {statement.base_csname, byte_range})
        maybe_used_csname_texts[statement.base_csname] = true
        -- Record control sequence name definitions.
        if statement.defined_csname.type == TEXT then
          maybe_defined_csname_texts[statement.defined_csname.payload] = true
        elseif statement.defined_csname.type == PATTERN then
          maybe_defined_csname_pattern = maybe_defined_csname_pattern + statement.defined_csname.payload
        else
          error('Unexpected csname type "' .. statement.defined_csname.type .. '"')
        end
        -- Record private function variant definitions.
        if statement.confidence == DEFINITELY and statement.is_private then
          table.insert(defined_private_function_variant_byte_ranges, byte_range)
          local defined_private_function_variant = {
            number = #defined_private_function_variant_byte_ranges,
            csname = statement.defined_csname
          }
          if statement.defined_csname.type == TEXT then
            table.insert(defined_private_function_variant_texts, defined_private_function_variant)
          elseif statement.defined_csname.type == PATTERN then
            defined_private_function_variant_pattern = (
              defined_private_function_variant_pattern
              + statement.defined_csname.payload
              / defined_private_function_variant
            )
          else
            error('Unexpected csname type "' .. statement.defined_csname.type .. '"')
          end
        end
      -- Process a function definition.
      elseif statement.type == FUNCTION_DEFINITION then
        -- Record the base control sequences used in indirect function definitions.
        if statement.subtype == FUNCTION_DEFINITION_INDIRECT then
          maybe_used_csname_texts[statement.base_csname] = true
          table.insert(indirect_definition_base_csnames, {statement.base_csname, byte_range})
        end
        -- Record control sequence name usage and definitions.
        maybe_defined_csname_texts[statement.defined_csname] = true
        if statement.subtype == FUNCTION_DEFINITION_DIRECT and statement.replacement_text_number == nil then
          process_argument_tokens(statement.replacement_text_argument)
        end
        -- Record private function defition.
        if statement.confidence == DEFINITELY and statement.is_private then
          table.insert(defined_private_functions, {statement.defined_csname, byte_range})
        end
      -- Process an unrecognized statement.
      elseif statement.type == OTHER_STATEMENT then
        -- Record control sequence name usage and definitions.
        for _, call in statement.call_range:enumerate(segment_calls) do
          maybe_used_csname_texts[call.csname] = true
          table.insert(called_functions_and_variants, {call.csname, byte_range})
          for _, argument in ipairs(call.arguments) do
            process_argument_tokens(argument)
          end
        end
      -- Process a block of unrecognized tokens.
      elseif statement.type == OTHER_TOKENS_SIMPLE or statement.type == OTHER_TOKENS_COMPLEX then
        -- Record control sequence name usage by scanning all control sequence tokens.
        for _, token in token_range:enumerate(segment_transformed_tokens, map_forward) do
          if token.type == CONTROL_SEQUENCE then
            maybe_used_csname_texts[token.payload] = true
          end
        end
      else
        error('Unexpected statement type "' .. statement.type .. '"')
      end
    end
  end

  -- Finalize PEG patterns.
  maybe_defined_csname_pattern = maybe_defined_csname_pattern * parsers.eof
  maybe_used_csname_pattern = maybe_used_csname_pattern * parsers.eof
  defined_private_function_variant_pattern = defined_private_function_variant_pattern * parsers.eof

  --- Report issues apparent from the collected information.
  ---- Report unused private functions.
  for _, defined_private_function in ipairs(defined_private_functions) do
    local defined_csname, byte_range = table.unpack(defined_private_function)
    if not maybe_used_csname_texts[defined_csname] and lpeg.match(maybe_used_csname_pattern, defined_csname) == nil then
      issues:add('w401', 'unused private function', byte_range)
    end
  end

  ---- Report unused private function variants.
  local used_private_function_variants = {}
  for private_function_variant_number, _ in ipairs(defined_private_function_variant_byte_ranges) do
    used_private_function_variants[private_function_variant_number] = false
  end
  for _, defined_private_function_variant in ipairs(defined_private_function_variant_texts) do
    assert(defined_private_function_variant.csname.type == TEXT)
    if maybe_used_csname_texts[defined_private_function_variant.csname.payload]
        or lpeg.match(maybe_used_csname_pattern, defined_private_function_variant.csname.payload) ~= nil then
      used_private_function_variants[defined_private_function_variant.number] = true
    end
  end
  for maybe_used_csname, _ in pairs(maybe_used_csname_texts) do
    -- NOTE: Although we might want to also test whether "defined_private_function_variant_pattern" and
    -- "maybe_used_csname_pattern" overlap, intersection is undecideable for parsing expression languages (PELs). In
    -- theory, we could use regular expressions instead of PEG patterns, since intersection is decideable for regular
    -- languages. In practice, there are no Lua libraries that would implement the required algorithms. Therefore, it
    -- seems more practical to just accept that low-confidence function variant definitions and function uses don't
    -- interact, not just because the technical difficulty but also because the combined confidence is just too low.
    local defined_private_function_variant = lpeg.match(defined_private_function_variant_pattern, maybe_used_csname)
    if defined_private_function_variant ~= nil then
      assert(defined_private_function_variant.csname.type == PATTERN)
      used_private_function_variants[defined_private_function_variant.number] = true
    end
  end
  for private_function_variant_number, byte_range in ipairs(defined_private_function_variant_byte_ranges) do
    if not used_private_function_variants[private_function_variant_number] then
      issues:add('w402', 'unused private function variant', byte_range)
    end
  end

  local imported_prefixes = get_option('imported_prefixes', options, pathname)
  local expl3_well_known_function_csname = parsers.expl3_well_known_function_csname(imported_prefixes)

  ---- Report function variants for undefined functions.
  for _, variant_base_csname in ipairs(variant_base_csnames) do
    local base_csname, byte_range = table.unpack(variant_base_csname)
    if lpeg.match(expl3_well_known_function_csname, base_csname) == nil
        and not maybe_defined_csname_texts[base_csname]
        and not lpeg.match(maybe_defined_csname_pattern, base_csname) then
      issues:add('e405', 'function variant for an undefined function', byte_range)
    end
  end

  ---- Report calls to undefined functions and function variants.
  for _, called_function_or_variant in ipairs(called_functions_and_variants) do
    local csname, byte_range = table.unpack(called_function_or_variant)
    if lpeg.match(parsers.expl3like_function_csname, csname) ~= nil
        and lpeg.match(expl3_well_known_function_csname, csname) == nil
        and not maybe_defined_csname_texts[csname]
        and not lpeg.match(maybe_defined_csname_pattern, csname) then
      issues:add('e408', 'calling an undefined function', byte_range)
    end
  end

  ---- Report indirect function definitions from undefined base functions.
  for _, indirect_definition_base_csname in ipairs(indirect_definition_base_csnames) do
    local csname, byte_range = table.unpack(indirect_definition_base_csname)
    if lpeg.match(parsers.expl3like_function_csname, csname) ~= nil
        and lpeg.match(expl3_well_known_function_csname, csname) == nil
        and not maybe_defined_csname_texts[csname]
        and not lpeg.match(maybe_defined_csname_pattern, csname) then
      issues:add('e411', 'indirect function definition from an undefined function', byte_range)
    end
  end

  -- Store the intermediate results of the analysis.
  results.statements = statements
  results.replacement_texts = replacement_texts
end

return {
  csname_types = csname_types,
  process = semantic_analysis,
  statement_types = statement_types,
  statement_confidences = statement_confidences,
  statement_subtypes = statement_subtypes,
}