# texindex.awk, generated by jrtangle from ti.twjr.
#
# Copyright 2014-2023 Free Software Foundation, Inc.
# 
# This file is part of GNU Texinfo.
# 
# Texinfo is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
# 
# Texinfo is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program; if not, see <https://www.gnu.org/licenses/>.
# ftrans.awk --- handle data file transitions
#
# user supplies beginfile() and endfile() functions
#
# Arnold Robbins, arnold@skeeve.com, Public Domain
# November 1992

FNR == 1 {
	if (_filename_ != "")
		endfile(_filename_)
	_filename_ = FILENAME
	beginfile(FILENAME)
}

END { endfile(_filename_) }
# join.awk --- join an array into a string
#
# Arnold Robbins, arnold@skeeve.com, Public Domain
# May 1993

function join(array, start, end, sep,    result, i)
{
	if (sep == "")
		sep = " "
	else if (sep == SUBSEP) # magic value
		sep = ""
	result = array[start]
	for (i = start + 1; i <= end; i++)
		result = result sep array[i]
	return result
}
# quicksort --- C.A.R. Hoare's quick sort algorithm.  See Wikipedia
#               or almost any algorithms or computer science text
# Adapted from K&R-II, page 110
#
function quicksort(data, left, right, compare,	# parameters
					i, last, use_index, lt)		# locals
{
	if (left >= right)  # do nothing if array contains fewer
		return          # than two elements

	use_index = (compare == "index")

	quicksort_swap(data, left, int((left + right) / 2))
	last = left
	for (i = left + 1; i <= right; i++) {
		lt = (use_index \
				? less_than(data, i, left) \
				: key_compare(data[i], data[left]) < 0)
		if (lt)
			quicksort_swap(data, ++last, i)
	}
	quicksort_swap(data, left, last)
	quicksort(data, left, last - 1, compare)
	quicksort(data, last + 1, right, compare)
}

# quicksort_swap --- quicksort helper function, could be inline
#
function quicksort_swap(data, i, j, temp)
{
	temp = data[i]
	data[i] = data[j]
	data[j] = temp
}
function del_array(a)
{
	# Portable and faster than
	#   for (i in a)
	#     delete a[i]
	split("", a)
}
function check_split_null(    n, a)
{
	n = split("abcde", a, "")
	return (n == 5)
}
function char_split(string, array,    n, i)
{
	if (Can_split_null)
		return split(string, array, "")

	# do it the hard way
	del_array(array)
	n = length(string)
	for (i = 1; i <= n; i++)
		array[i] = substr(string, i, 1)

	return n
}
function fatal(format, arg1, arg2, arg3, arg4, arg5,
				arg6, arg7, arg8, arg9, arg10, cat)
{
	cat = "cat 1>&2"	# maximal portability
	printf(format, arg1, arg2, arg3, arg4, arg5,
		  arg6, arg7, arg8, arg9, arg10) | cat
	close(cat)

	exit EXIT_FAILURE
}
function isupper(c)
{
	return index("ABCDEFGHIJKLMNOPQRSTUVWXYZ", c)
}

function islower(c)
{
	return index("abcdefghijklmnopqrstuvwxyz", c)
}

function isalpha(c)
{
	return islower(c) || isupper(c)
}

function isdigit(c)
{
	return index("0123456789", c)
}
function make_regexp(regexp,	a, sep, n)
{
	n = split(regexp, a, "%")
	if (Command_char == "\\")
		sep = Command_char Command_char
	else
		sep = Command_char

	return join(a, 1, n, sep)
}
function escape(regexp,		a, n)
{
	if (Command_char != "\\")
		return regexp

	n = split(regexp, a, "\\")
	if (n == 1)
		return regexp

	return join(a, 1, n, "\\\\")
}
function min(a, b)
{
	return (a < b ? a : b)
}
function usage(exit_val)
{
	printf(_"Usage: %s [OPTION]... FILE...\n", Invocation_name)
	print _"Generate a sorted index for each TeX output FILE."
	print _"Usually FILE... is specified as `foo.??' for a document `foo.texi'."
	print ""
	print _"Options:"
	print _" -h, --help   display this help and exit"
	print _" --version    display version information and exit"
	print _" --           end option processing"
	print ""
	print _"Email bug reports to bug-texinfo@gnu.org,"
	print _"general questions and discussion to help-texinfo@gnu.org."
	print _"Texinfo home page: https://www.gnu.org/software/texinfo/"

	exit exit_val
}

function version()
{
	print "texindex (GNU texinfo)", Texindex_version
	print ""
	printf _"Copyright (C) %s Free Software Foundation, Inc.\n", "2023"
	print _"License GPLv3+: GNU GPL version 3 or later <https://gnu.org/licenses/gpl.html>"
	print _"This is free software: you are free to change and redistribute it."
	print _"There is NO WARRANTY, to the extent permitted by law."

	exit EXIT_SUCCESS
}

BEGIN {
	TRUE = 1
	FALSE = 0
	EXIT_SUCCESS = 0
	EXIT_FAILURE = 1
	
	Texindex_version = "7.1"
	if (! Invocation_name) {
		# provide fallback in case it's not passed in.
		Invocation_name = "texindex"
	}
	
	Can_split_null = check_split_null()
	TEXTDOMAIN = "texinfo"
	for (i = 1; i < ARGC; i++) {
		if (ARGV[i] == "-h" || ARGV[i] == "--help") {
			usage(EXIT_SUCCESS)
		} else if (ARGV[i] == "--version") {
			version()
		} else if (ARGV[i] == "-k" || ARGV[i] == "--keep") {
			# do nothing, backwards compatibility
			delete ARGV[i]
		} else if (ARGV[i] == "--") {
			delete ARGV[i]
			break
		} else if (ARGV[i] ~ /^--?.+/) {
			fatal(_"%s: unrecognized option `%s'\n" \
					"Try `%s --help' for more information.\n",
					Invocation_name, ARGV[i], Invocation_name)
			# fatal() will do `exit EXIT_FAILURE'
		} else {
			break
		}
	}
}

function beginfile(filename)
{
	Output_file = filename "s"

	# Reinitialize these for each input file
	del_array(Seen)
	del_array(Keys)
	del_array(Allkeys)
	del_array(Subkeys)
	del_array(Initials)
	del_array(Numfields)
	del_array(Primary)
	del_array(Secondary)
	del_array(Tertiary)
	del_array(See)
	del_array(See_count)
	del_array(Seealso)
	del_array(Seealso_count)
	del_array(Pagedata)
	del_array(Printed)
	Entries = 0
	Do_initials = FALSE
	Prev_initial = ""

	Command_char = substr($0, 1, 1)
	if ((Command_char != "\\" && Command_char != "@") \
		|| substr($0, 2, 5) != "entry")
		fatal(_"%s is not a Texinfo index file\n", filename)

	Special_chars = "{}" Command_char

	Seeentry_re = make_regexp("%seeentry")
	Seealso_re = make_regexp("%seealso")
	Subentry_re = make_regexp(" *%subentry +")
}
function endfile(filename,		i, prev_initial, initial)
{
	# sort the entries
	quicksort(Keys, 1, Entries, "index")

	prev_initial = ""
	for (i = 1; i <= Entries; i++) {
		# deal with initial
		initial = Initials[Keys[i]]
		if (initial != prev_initial) {
			prev_initial = initial
			print_initial(initial)
		}

		write_index_entry(i)
	}
	close(Output_file)
}
{
	# Remove duplicates, which can happen
	if ($0 in Seen)
		next
	Seen[$0] = TRUE
	$0 = substr($0, 7)  # remove leading \entry or @entry
	initial = extract_initial($0)
	numfields = field_split($0, fields, "{", "}", Command_char)
	if (numfields < 3 || numfields > 5)
		fatal(_"%s:%d: Bad entry; expected 3 to 5 fields, not %d\n",
			FILENAME, FNR, numfields)
	key = fields[1]
	pagenum = fields[2]
	primary_text = fields[3]
	secondary_text = (numfields > 3 ? fields[4] : "")
	tertiary_text = (numfields > 4 ? fields[5] : "")
	if (! (key in Allkeys)) {
		# first time we've seen this full line
		Keys[++Entries] = key
		Allkeys[key] = key
		Initials[key] = initial
		Numfields[key] = numfields - 2	# don't count sortkey, page number
		Primary[key] = primary_text
		if (secondary_text)
			Secondary[key] = secondary_text
		if (tertiary_text)
			Tertiary[key] = tertiary_text
		n = split(key, subparts, Subentry_re)
		for (i = 1; i <= n; i++)
			Subkeys[key, i] = subparts[i]
	
		if (pagenum ~ Seeentry_re) {
			See_count[key]++
			See[key, See_count[key]] = pagenum
		} else if (pagenum ~ Seealso_re) {
			Seealso_count[key]++
			Seealso[key, Seealso_count[key]] = pagenum
		} else
			Pagedata[key] = pagenum
	} else {
		# We've seen this key before:
		# Add to see or see also, or else add to list of pages.
		# In the latter case, make sure we've not seen this
		# page number before.  (Shouldn't happen based on the
		# earlier removal of exact duplicates, but we could have
		# an identical key with different formatting of actual text.
	
		if (pagenum ~ Seeentry_re) {
			See_count[key]++
			See[key, See_count[key]] = pagenum
		} else if (pagenum ~ Seealso_re) {
			Seealso_count[key]++
			Seealso[key, Seealso_count[key]] = pagenum
		} else if (! (key in Pagedata)) {
			Pagedata[key] = pagenum
		} else if (Pagedata[key] != pagenum \
					&& Pagedata[key] !~  escape(", " pagenum "$")) {
			Pagedata[key] = Pagedata[key] ", " pagenum
		}
	}
	if (! Do_initials) {
		if (Prev_initial == "")
			Prev_initial = initial
		else if (initial != Prev_initial)
			Do_initials = TRUE
	}
}
function extract_initial(key,  initial, nextchar, i, l, kchars)
{
	l = char_split(key, kchars)
	if (l >= 3 && kchars[2] == "{") {
		bracecount = 1
		i = 3
		while (bracecount > 0 && i <= l) {
			if (kchars[i] == "{")
				bracecount++
			else if (kchars[i] == "}")
				bracecount--
			i++
		}
		if (i > l)
			fatal(_"%s:%d: Bad key %s in record\n", FILENAME, FNR, key)
		initial = substr(key, 2, i - 2)
	} else if (kchars[2] == Command_char) {
		nextchar = kchars[3]
		if (initial == Command_char && index("{}", nextchar) > 0)
			initial = substr(key, 2, 3)
		else {
			initial = toupper(nextchar)
		}
	} else {
		initial = toupper(kchars[2])
	}

	return initial
}
function field_split( \
	record, fields, start, end, com_ch,      # parameters
	chars, numchars, out, delim_count, i, j, k) # locals
{
	del_array(fields)

	numchars = char_split(record, chars)
	j = 1  # index into fields
	k = 1  # index into out
	delim_count = 1
	for (i = 2; i <= numchars; i++) {
		if (chars[i] == com_ch) {
			if (chars[i+1] == Command_char) {	# input was @@
				out[k++] = chars[i+1]
				out[k++] = chars[i+1]
				i++
			} else if (index(Special_chars, chars[i+1]) != 0) {
				out[k++] = chars[i+1]
				i++
			} else
				out[k++] = chars[i]
		} else if (chars[i] == start) {
			delim_count++
			out[k++] = chars[i]
		} else if (chars[i] == end) {
			delim_count--

			if (delim_count == 0) {
				fields[j++] = join(out, 1, k-1, SUBSEP)
				del_array(out)  # reset for next time through
				k = 1
				
				i++
				if (i <= numchars && chars[i] != start)
					fatal(_"%s:%d: Bad entry; expected %s at column %d\n",
						FILENAME, FNR, start, i)
				delim_count = 1
			} else
				out[k++] = chars[i]
		} else
			out[k++] = chars[i]
	}

	return j - 1  # num fields
}
function print_initial(initial)
{
	if (! Do_initials)
		return

	if (index(Special_chars, initial) != 0)
		initial = Command_char initial
	printf("%cinitial {%s}\n",
		Command_char, initial) > Output_file
}
function less_than(data, l, r,		left, right, nfields, cmp1, cmp2)
{
	left = data[l]
	right = data[r]

	left_fields = Numfields[left]
	right_fields = Numfields[right]

	nfields = min(left_fields, right_fields)

	# At least one field, always check the first subkey
	cmp1 = key_compare(Subkeys[left, 1], Subkeys[right, 1])
	if (cmp1 != 0)
		return cmp1 < 0

	# cmp1 == 0: one side has 1 field, other side has 1 to 3 fields
	if (nfields == 1)
		return left_fields < right_fields

	# At least two fields, check second subkey
	cmp2 = key_compare(Subkeys[left, 2], Subkeys[right, 2])
	if (cmp2 != 0)
		return cmp2 < 0

	# cmp1 == 0, cmp2 == 0, one side has 2 fields,
	# other has 2 to 3 fields
	if (nfields == 2)
		return left_fields < right_fields

	# Three fields
	return key_compare(Subkeys[left, 3], Subkeys[right, 3]) < 0
}
BEGIN {
	for (i = 0; i < 256; i++) {
		c = sprintf("%c", i)
		Ordval[c] = i  # map character to value

		if (isdigit(c))
			Ordval[c] += 512
	}

	# Set things up such that 'a' < 'A' < 'b' < 'B' < ...
	i = Ordval["a"]
	j = Ordval["z"]
	newval = i + 512
	for (; i <= j; i++) {
		c = sprintf("%c", i)

		if (islower(c)) {
			Ordval[c] = newval++
			Ordval[toupper(c)] = newval++
		}
	}
}
function key_compare(left, right,	len_l, len_r, len, chars_l, chars_r)
{
	len_l = length(left)
	len_r = length(right)
	len = (len_l < len_r ? len_l : len_r)

	char_split(left, chars_l)
	char_split(right, chars_r)

	for (i = 1; i <= len; i++) {
		if (isalpha(chars_l[i]) && isalpha(chars_r[i])) {
			# same char different case
			# upper case comes out last
			if (chars_l[i] != chars_r[i] &&
				tolower(chars_l[i]) == tolower(chars_r[i])) {
				if (i != len \
					&& (isalpha(chars_l[i+1]) || isalpha(chars_r[i+1])))
					continue

				# negative, zero, or positive
				return Ordval[chars_l[i]] - Ordval[chars_r[i]]
			}
			# same case, different char,
			# or different case, different char:
			# letter order wins
			if (Ordval[chars_l[i]] < Ordval[chars_r[i]])
				return -1

			if (Ordval[chars_l[i]] > Ordval[chars_r[i]])
				return 1

			# equal, keep going
			continue
		}

		# letter and something else, or two non-letters
		# letter order wins
		if (Ordval[chars_l[i]] < Ordval[chars_r[i]])
			return -1

		if (Ordval[chars_l[i]] > Ordval[chars_r[i]])
			return 1

		# equal, keep going
	}

	# equal so far, shorter one wins
	if (len_l < len_r)
		return -1

	if (len_l > len_r)
		return 1

	return 0
}
function print_entry(key, entry_command, entry_text,
					count, see_entries, i)
{
	if ((key, 1) in See)		# at least one ``see''
		print_see_entry(key, entry_command, entry_text,
						See_count, See)

	if (key in Pagedata) {		# at least one page number
		if ((key, 1) in Seealso) {
			count = Seealso_count[key]
			
			# Copy the entries to a separate array
			for (i = 1; i <= count; i++)
				see_entries[i] = Seealso[key, i]
			
			# sort them
			quicksort(see_entries, 1, count, "string")
		
			# now add them to the page data
			for (i = 1; i <= count; i++)
				Pagedata[key] = Pagedata[key] ", " see_entries[i]
		}

		printf("%c%s{%s}{%s}\n",
			Command_char, entry_command,
			entry_text[key], Pagedata[key]) > Output_file

		Printed[key] = True		# mark this key as printed
	} else if ((key, 1) in Seealso) {	# at least one ``see also''
		# Only ``see also'' entry, print it
		count = Seealso_count[key]
		
		# Copy the entries to a separate array
		for (i = 1; i <= count; i++)
			see_entries[i] = Seealso[key, i]
		
		# sort them
		quicksort(see_entries, 1, count, "string")

		printf("%c%s{%s}{",
			Command_char, entry_command,
			entry_text[key]) > Output_file

		# now add them to the page data
		for (i = 1; i <= count; i++) {
			printf("%s", see_entries[i]) > Output_file
			if (i != count)
				printf(", ") > Output_file
		}
		printf("}\n") > Output_file
	}
}
function print_see_entry(key, entry_command, entry_text,	# parameters
						count_array, see_text_array,		# parameters
						i, count, see_entries)				# locals
{
	count = count_array[key]
	if (count == 1) {	# the easy case
		printf("%c%s{%s, %s}{}\n",
			Command_char, entry_command,
			entry_text[key], see_text_array[key, 1]) > Output_file

		return
	}

	# Otherwise, we need to sort the entries and then print them
	# Copy the entries to a separate array
	for (i = 1; i <= count; i++)
		see_entries[i] = see_text_array[key, i]

	# sort them
	quicksort(see_entries, 1, count, "string")

	# now print them
	for (i = 1; i <= count; i++)
		printf("%c%s{%s, %s}{}\n",
			Command_char, entry_command,
			entry_text[key], see_entries[i]) > Output_file
}
function write_index_entry(current,		key)
{
	key = Keys[current]		# current sort key
	if (Numfields[key] == 1) {
		print_entry(key, "entry", Primary)
	} else if (Numfields[key] == 2) {
		if (! (Subkeys[key, 1] in Printed)) {
			printf("%centry{%s,}{}\n",
				Command_char, Primary[key]) > Output_file
			Printed[Subkeys[key, 1]] = True
		}
		print_entry(key, "secondary", Secondary)
	} else if (Numfields[key] == 3) {
		if (! (Subkeys[key, 1] in Printed)) {
			printf("%centry{%s,}{}\n",
				Command_char, Primary[key]) > Output_file
			Printed[Subkeys[key, 1]] = True
		}
		subkey = (Subkeys[key, 1] Command_char "subentry " Subkeys[key, 2])
		if (! (subkey in Printed)) {
			printf("%csecondary{%s,}{}\n",
				Command_char, Secondary[key]) > Output_file
			Printed[subkey] = True
		}
		print_entry(key, "tertiary", Tertiary)
	}
}