# Common.pm: definition of commands. Common code of other Texinfo modules. # # Copyright 2010-2023 Free Software Foundation, Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3 of the License, # or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # # Original author: Patrice Dumas # Parts (also from Patrice Dumas) come from texi2html.pl or texi2html.init. package Texinfo::Common; use strict; # To check if there is no erroneous autovivification #no autovivification qw(fetch delete exists store strict); # for unicode/layer support in binmode # for binmode documented as pushing :utf8 on top of :encoding use 5.008001; # to determine the null file use Config; use File::Spec; # for find_encoding, resolve_alias use Encode; # debugging use Carp qw(cluck confess); # uncomment to check that settable commands are contained in global commands #use List::Compare; use Locale::Messages; use Texinfo::Documentlanguages; use Texinfo::Commands; require Exporter; use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); @ISA = qw(Exporter); %EXPORT_TAGS = ( 'all' => [ qw( collect_commands_in_tree collect_commands_list_in_tree move_index_entries_after_items_in_tree relate_index_entries_to_table_items_in_tree protect_colon_in_tree protect_comma_in_tree protect_first_parenthesis protect_node_after_label_in_tree valid_customization_option valid_tree_transformation ) ] ); @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } ); # This is where the Texinfo modules get access to __( without explicit # import. @EXPORT = qw( __ __p ); $VERSION = '7.1'; # i18n # For the messages translations. my $messages_textdomain = 'texinfo'; sub __($) { my $msgid = shift; return Locale::Messages::dgettext($messages_textdomain, $msgid); } sub __p($$) { my $context = shift; my $msgid = shift; return Locale::Messages::dpgettext($messages_textdomain, $context, $msgid); } # For the in-document output strings translations in the texinfo_document # textdomain. # # Return the string but do nothing else, used to mark strings to be translated # in the Gettext framework. # Can be used in other modules, in particular if the modules already overwrite # gdt from Texinfo::Translations. sub gdt($) { return $_[0]; } # First argument is the translation context. sub pgdt($$) { return $_[1]; } # to be used when the context needs to be returned too sub pgdt_context($$) { return [$_[0], $_[1]]; } # determine the null devices my $default_null_device = File::Spec->devnull(); our %null_device_file = ( $default_null_device => 1 ); # special case, djgpp recognizes both null devices if ($Config{osname} eq 'dos' and $Config{osvers} eq 'djgpp') { $null_device_file{'/dev/null'} = 1; $null_device_file{'NUL'} = 1; } # Customization options # variables not specific of Parser, used in other contexts. Spread over # the different categories set below. The default values are in general # the same as elsewhere, but occasionally may be specific of the Parser. my %default_parser_common_customization = ( 'INCLUDE_DIRECTORIES' => [ '.' ], 'documentlanguage' => undef, # not 'en' as it is better to specify that there is no # need for translation since the strings are in english # rather than ask for translations to en 'EXPANDED_FORMATS' => [], 'DEBUG' => 0, # if >= 10, tree is printed in texi2any.pl after parsing. # If >= 100 tree is printed every line. 'FORMAT_MENU' => 'menu', # if not 'menu' no menu error related. # next three related to file names encoding 'DOC_ENCODING_FOR_INPUT_FILE_NAME' => 1, # use document encoding for input file # names encoding if set 'COMMAND_LINE_ENCODING' => undef, # encoding of command line strings # used to decode file names for error message 'INPUT_FILE_NAME_ENCODING' => undef, # used for input file encoding ); # Customization variables obeyed only by the parser, and the default values. my %default_parser_specific_customization = ( 'IGNORE_SPACE_AFTER_BRACED_COMMAND_NAME' => 1, 'CPP_LINE_DIRECTIVES' => 1, # handle cpp like synchronization lines 'MAX_MACRO_CALL_NESTING' => 100000, # max number of nested macro calls ); # this serves both to set defaults and list customization variable # valid for the parser. # also used in util/txicustomvars our %default_parser_customization_values = (%default_parser_common_customization, %default_parser_specific_customization); # @-commands that can be used multiple time in a document and default # values. Associated with customization values too. # also used in util/txicustomvars our %document_settable_multiple_at_commands = ( 'allowcodebreaks' => 'true', 'clickstyle' => '@arrow', 'codequotebacktick' => 'off', 'codequoteundirected' => 'off', 'contents' => 0, 'deftypefnnewline' => 'off', 'documentencoding' => 'utf-8', 'documentlanguage' => undef, # Documented as en, but no @documentlanguage # really means that the language is not set. # --document-language 'evenfooting' => undef, 'evenheading' => undef, 'everyfooting' => undef, 'everyheading' => undef, # is N ems in TeX, 0.4 in. 'exampleindent' => 5, 'firstparagraphindent' => 'none', 'frenchspacing' => 'off', 'headings' => 'on', 'kbdinputstyle' => 'distinct', 'microtype' => undef, 'oddheading' => undef, 'oddfooting' => undef, 'paragraphindent' => 3, 'shortcontents' => 0, 'summarycontents' => 0, 'urefbreakstyle' => 'after', 'xrefautomaticsectiontitle' => 'off', ); # @-commands that should be unique. Associated with customization values too. # also used in util/txicustomvars our %document_settable_unique_at_commands = ( 'afivepaper' => undef, 'afourpaper' => undef, 'afourlatex' => undef, 'afourwide' => undef, 'bsixpaper' => undef, # when passed through a customization variable, documentdescription # should be already formatted for HTML. There is no default, # what is determined to be the title is used if not set. 'documentdescription' => undef, 'evenfootingmarks' => undef, 'evenheadingmarks' => undef, 'everyfootingmarks' => 'bottom', 'everyheadingmarks' => 'bottom', 'fonttextsize' => 11, 'footnotestyle' => 'end', # --footnote-style 'novalidate' => 0, # --no-validate 'oddfootingmarks' => undef, 'oddheadingmarks' => undef, 'pagesizes' => undef, 'setchapternewpage' => 'on', 'setfilename' => undef, 'smallbook' => undef, ); # check that settable commands are contained in global commands # from command_data.txt if (0) { #if (1) { my @global_unique_settable = keys(%document_settable_unique_at_commands); my @global_unique_commands = keys(%Texinfo::Commands::global_unique_commands); my $lcu = List::Compare->new(\@global_unique_settable, \@global_unique_commands); # only in the first list if (scalar($lcu->get_unique)) { warn 'BUG: Unique settable not global: '.join(',',$lcu->get_unique)."\n"; } if (scalar($lcu->get_complement)) { print STDERR "REMARK: global_uniq commands not settable: " .join(',', $lcu->get_complement)."\n"; } my @global_multi_settable = keys(%document_settable_multiple_at_commands); my @global_multi_commands = keys(%Texinfo::Commands::global_commands); my $lcm = List::Compare->new(\@global_multi_settable, \@global_multi_commands); if (scalar($lcm->get_unique)) { warn 'BUG: Multi settable not global: '.join(',',$lcm->get_unique)."\n"; } if (scalar($lcm->get_complement)) { print STDERR "REMARK: global commands not settable: " .join(',', $lcm->get_complement)."\n"; } } # a value corresponds to defaults that are the same for every output format # otherwise undef is used our %default_converter_command_line_options = ( 'SPLIT_SIZE' => 300000, # --split-size 'FILLCOLUMN' => 72, # --fill-column 'NUMBER_SECTIONS' => 1, # --number-sections 'NUMBER_FOOTNOTES' => 1, # --number-footnotes # only in HTML 'TRANSLITERATE_FILE_NAMES' => 1, # --transliterate-file-names 'SPLIT' => undef, # --split 'HEADERS' => 1, # --headers. Used to set diverse # customization options in main program. # Only directly used in HTML converter 'NODE_FILES' => undef, # --node-files. Depend on SPLIT 'VERBOSE' => undef, # --verbose 'OUTFILE' => undef, # --output If non split and not ending by /. # Setting can be format dependent 'SUBDIR' => undef, # --output If split or ending by /. # Setting can be format dependent 'ENABLE_ENCODING' => 1, # --disable-encoding/--enable-encoding. # The option is directly used in # Info/Plaintext, and used in diverse formats # for index sorting and plain text output. ); # used in main program, defaults documented in manual my %default_main_program_command_line_options = ( 'MACRO_EXPAND' => undef, # --macro-expand. Only for main program # used in HTML only, called from main program 'INTERNAL_LINKS' => undef, # --internal-links 'ERROR_LIMIT' => 100, # --error-limit 'FORCE' => undef, # --force 'NO_WARN' => undef, # --no-warn 'SILENT' => undef, # --silent. Not used. For completeness # following also set in converters 'FORMAT_MENU' => 'menu', # --headers. Modified by the format. ); # used in main program, defaults documented in manual # also used in util/txicustomvars our %default_main_program_customization = ( 'CHECK_NORMAL_MENU_STRUCTURE' => 0, # output warnings when node with # automatic direction and directions in menu are not consistent # with sectionning, and when node directions are not consistent # with menu directions. 'CHECK_MISSING_MENU_ENTRY' => 1, 'DUMP_TREE' => undef, 'DUMP_TEXI' => undef, 'SHOW_BUILTIN_CSS_RULES' => 0, 'SORT_ELEMENT_COUNT' => undef, 'SORT_ELEMENT_COUNT_WORDS' => undef, 'TEXI2DVI' => 'texi2dvi', 'TREE_TRANSFORMATIONS' => undef, ); # defaults for the main program. In general transmitted to converters as defaults our %default_main_program_customization_options = ( %default_main_program_command_line_options, %default_main_program_customization); # used in converters, default documented in manual # also used in util/txicustomvars our %default_converter_customization = ( 'TOP_NODE_UP' => '(dir)', # up node of Top node default value 'BASEFILENAME_LENGTH' => 255 - 10, 'DOC_ENCODING_FOR_INPUT_FILE_NAME' => 1, 'DOC_ENCODING_FOR_OUTPUT_FILE_NAME' => 0, # only used in HTML 'IMAGE_LINK_PREFIX' => undef, 'CASE_INSENSITIVE_FILENAMES' => 0, 'DEBUG' => 0, # only used in HTML 'HANDLER_FATAL_ERROR_LEVEL' => 100, 'TEST' => 0, 'TEXTCONTENT_COMMENT' => undef, # in textcontent format # used in TexinfoXML/SXML # Reset by the main program, therefore this value is only used in converter # tests that use the perl modules directly. Does not need to match with the # documented value used in the main program, nor to be updated every time a # DTD is released, to have a fixed value for the tests. However, it should # be good to update from time to time to avoid test results that are not # valid against their reported DTD. 'TEXINFO_DTD_VERSION' => '7.1', ); # Some are for all converters, EXTENSION for instance, some for # some converters, for example CLOSE_QUOTE_SYMBOL and many # for HTML. Could be added to %default_converter_customization. # Defaults are documented in manual and set in the various converters. # used in util/txicustomvars our @variable_string_settables = ( 'AFTER_BODY_OPEN', 'AFTER_SHORT_TOC_LINES', 'AFTER_TOC_LINES', 'ASCII_DASHES_AND_QUOTES', 'ASCII_GLYPH', 'ASCII_PUNCTUATION', 'AUTO_MENU_DESCRIPTION_ALIGN_COLUMN', 'AUTO_MENU_MAX_WIDTH', 'AVOID_MENU_REDUNDANCY', 'BEFORE_SHORT_TOC_LINES', 'BEFORE_TOC_LINES', 'BIG_RULE', 'BODYTEXT', 'CLASS_BEGIN_USEPACKAGE', # for LaTeX 'COPIABLE_LINKS', 'CHAPTER_HEADER_LEVEL', 'CHECK_HTMLXREF', 'CLOSE_DOUBLE_QUOTE_SYMBOL', 'CLOSE_QUOTE_SYMBOL', 'COMMAND_LINE_ENCODING', 'COMPLEX_FORMAT_IN_TABLE', 'CONTENTS_OUTPUT_LOCATION', 'CONVERT_TO_LATEX_IN_MATH', 'DATE_IN_HEADER', 'DEFAULT_RULE', 'DEF_TABLE', 'DO_ABOUT', 'DOC_ENCODING_FOR_INPUT_FILE_NAME', 'DOC_ENCODING_FOR_OUTPUT_FILE_NAME', 'DOCTYPE', 'END_USEPACKAGE', # for LaTeX 'EPUB_CREATE_CONTAINER_FILE', # for ext/epub3.pm 'EPUB_KEEP_CONTAINER_FOLDER', # for ext/epub3.pm 'EXTENSION', 'EXTERNAL_CROSSREF_EXTENSION', 'EXTERNAL_CROSSREF_SPLIT', 'EXTERNAL_DIR', 'EXTRA_HEAD', 'FOOTNOTE_END_HEADER_LEVEL', 'FOOTNOTE_SEPARATE_HEADER_LEVEL', 'FRAMES', 'FRAMESET_DOCTYPE', 'HEADER_IN_TABLE', 'HIGHLIGHT_SYNTAX', # for ext/highlight_syntax.pm 'HIGHLIGHT_SYNTAX_DEFAULT_LANGUAGE', # for ext/highlight_syntax.pm 'HTML_MATH', 'HTML_ROOT_ELEMENT_ATTRIBUTES', 'HTMLXREF_FILE', 'HTMLXREF_MODE', 'ICONS', 'IMAGE_LINK_PREFIX', 'INDEX_ENTRY_COLON', 'INDEX_SPECIAL_CHARS_WARNING', 'INFO_JS_DIR', 'INFO_SPECIAL_CHARS_QUOTE', 'INFO_SPECIAL_CHARS_WARNING', 'IGNORE_REF_TO_TOP_NODE_UP', 'INLINE_CSS_STYLE', 'INPUT_FILE_NAME_ENCODING', 'JS_WEBLABELS', 'JS_WEBLABELS_FILE', 'LOCALE_ENCODING', 'L2H_CLEAN', 'L2H_FILE', 'L2H_HTML_VERSION', 'L2H_L2H', 'L2H_SKIP', 'L2H_TMP', 'MATHJAX_SCRIPT', 'MATHJAX_SOURCE', 'MAX_HEADER_LEVEL', 'MENU_ENTRY_COLON', 'MENU_SYMBOL', 'MESSAGE_ENCODING', 'MONOLITHIC', 'NO_CSS', 'NO_NUMBER_FOOTNOTE_SYMBOL', 'NO_CUSTOM_HTML_ATTRIBUTE', 'NODE_NAME_IN_INDEX', 'NODE_NAME_IN_MENU', 'NO_TOP_NODE_OUTPUT', 'NO_USE_SETFILENAME', 'OPEN_DOUBLE_QUOTE_SYMBOL', 'OPEN_QUOTE_SYMBOL', 'OUTPUT_CHARACTERS', 'OUTPUT_ENCODING_NAME', 'OUTPUT_FILE_NAME_ENCODING', 'OUTPUT_PERL_ENCODING', 'PACKAGE', 'PACKAGE_AND_VERSION', 'PACKAGE_NAME', 'PACKAGE_URL', 'PACKAGE_VERSION', 'PRE_BODY_CLOSE', 'PREFIX', 'PROGRAM', 'PROGRAM_NAME_IN_ABOUT', 'PROGRAM_NAME_IN_FOOTER', 'SECTION_NAME_IN_TITLE', 'SHORT_TOC_LINK_TO_TOC', 'SHOW_TITLE', 'SIMPLE_MENU', 'SORT_ELEMENT_COUNT', 'T4H_LATEX_CONVERSION', 'T4H_MATH_CONVERSION', 'T4H_TEX_CONVERSION', 'TEXI2HTML', 'TEXINFO_OUTPUT_FORMAT', 'TOC_LINKS', 'TOP_FILE', 'TOP_NODE_FILE_TARGET', 'TOP_NODE_UP_URL', 'USE_ACCESSKEY', 'USE_ISO', 'USE_LINKS', 'USE_NEXT_HEADING_FOR_LONE_NODE', 'USE_NODES', 'USE_NODE_DIRECTIONS', 'USE_NUMERIC_ENTITY', 'USE_REL_REV', 'USE_SETFILENAME_EXTENSION', 'USE_TITLEPAGE_FOR_TITLE', 'USE_UNIDECODE', 'USE_UP_NODE_FOR_ELEMENT_UP', 'USE_XML_SYNTAX', 'VERTICAL_HEAD_NAVIGATION', 'WORDS_IN_PAGE', 'XREF_USE_FLOAT_LABEL', 'XREF_USE_NODE_NAME_ARG', ); # Not strings. my @variable_other_settables = ( # Documented in the texi2any_api manual 'LINKS_BUTTONS', 'TOP_BUTTONS', 'SECTION_BUTTONS', 'CHAPTER_FOOTER_BUTTONS', 'SECTION_FOOTER_BUTTONS', 'NODE_FOOTER_BUTTONS', 'MISC_BUTTONS', 'CHAPTER_BUTTONS', 'ACTIVE_ICONS', 'PASSIVE_ICONS', # set from command line. # TODO not documented. 'CSS_FILES', # --css-include 'CSS_REFS', # --css-ref 'EXPANDED_FORMATS', # --if* 'INCLUDE_DIRECTORIES', # -I ); our %document_settable_at_commands = (%document_settable_multiple_at_commands, %document_settable_unique_at_commands); my %valid_customization_options; foreach my $var (keys(%document_settable_at_commands), keys(%default_main_program_command_line_options), keys(%default_converter_command_line_options), keys(%default_main_program_customization), keys(%default_parser_specific_customization), keys(%default_converter_customization), @variable_string_settables, @variable_other_settables) { $valid_customization_options{$var} = 1; } sub valid_customization_option($) { my $option = shift; return $valid_customization_options{$option}; } # not documented on purpose, should not be directly called in user-defined # codes sub add_valid_customization_option($) { my $option = shift; if ($option =~ /^[A-Z][A-Z_]{2,}$/) { $valid_customization_options{$option} = 1; return 1; } return 0; } # Output formats our %texinfo_output_formats; foreach my $output_format_command ('info', 'plaintext', grep {$Texinfo::Commands::block_commands{$_} eq 'format_raw'} keys(%Texinfo::Commands::block_commands)) { $texinfo_output_formats{$output_format_command} = $output_format_command; } # Tree transformations my %valid_tree_transformations; foreach my $valid_transformation ('simple_menus', 'fill_gaps_in_sectioning', 'move_index_entries_after_items', 'relate_index_entries_to_items', 'insert_nodes_for_sectioning_commands', 'complete_tree_nodes_menus', 'complete_tree_nodes_missing_menu', 'regenerate_master_menu', 'indent_menu_descriptions') { $valid_tree_transformations{$valid_transformation} = 1; } sub valid_tree_transformation ($) { my $transformation = shift; return 1 if (defined($transformation) and $valid_tree_transformations{$transformation}); return 0; } # information on encodings # in Texinfo up to 5.2, we presume that ISO-8859-1 was considered as # the default encoding (although it had never been said explicitly in # the manual, it is consistent with HTML output without encoding # being the default for makeinfo output in earlier versions and being, # at that time, considered as ISO-8859-1). The wording in the Texinfo # manual implied that setting US-ASCII had no effect, a possible # interpretation being that it was an alias for ISO-8859-1. Since # ISO-8859-1 extends US-ASCII in a compatible way, this interpretation # is valid. Also, as long as the same 8bit encoding is used for input and # output, the precise 8bit encoding used to extend US-ASCII has no # practical consequence, something consistent with past makeinfo supporting # any 8bit encoding without documentencoding and also when US-ASCII was # specified as encoding. # # To support old manuals in which US-ASCII can be specified although # the encoding corresponds to any 8bit encoding compatible with ISO-8859-1, # we convert US-ASCII as ISO-8859-1 to avoid errors for characters in # ISO-8859-1 but not in US-ASCII. our %encoding_name_conversion_map; %encoding_name_conversion_map = ( 'us-ascii' => 'iso-8859-1', # The mapping to utf-8 is important for perl code, as it means using a strict # conversion to utf-8 and not a lax conversion: # https://perldoc.perl.org/perlunifaq#What's-the-difference-between-UTF-8-and-utf8? # In more detail, we want to use utf-8 only for two different reasons # 1) if input is malformed it is better to error out it as soon as possible # 2) we do not want to have different behaviour and hard to find bugs # depending on whether the user used @documentencoding utf-8 # or @documentencoding utf8. There is a warning with utf8, but # we want to be clear in any case. 'utf8' => 'utf-8', ); # information on @-commands our %nobrace_symbol_text; %nobrace_symbol_text = ( '*', "\n", ' ', ' ', "\t", ' ', "\n", ' ', '-', '', # hyphenation hint '|', '', # used in formatting commands @evenfooting and friends '/', '', ':', '', '!', '!', '?', '?', '.', '.', '@', '@', '}', '}', '{', '{', '&', '&', '\\', '\\', # should only appear in math ); our %def_map = ( # basic commands. # 'arg' and 'argtype' are for everything appearing after the other # arguments. 'defline', [ 'category', 'name', 'arg' ], 'deftypeline', [ 'category', 'type', 'name', 'argtype' ], 'deffn', [ 'category', 'name', 'arg' ], 'defvr', [ 'category', 'name' ], 'deftypefn', [ 'category', 'type', 'name', 'argtype' ], 'deftypeop', [ 'category', 'class' , 'type', 'name', 'argtype' ], 'deftypevr', [ 'category', 'type', 'name' ], 'defcv', [ 'category', 'class' , 'name' ], 'deftypecv', [ 'category', 'class' , 'type', 'name' ], 'defop', [ 'category', 'class' , 'name', 'arg' ], 'deftp', [ 'category', 'name', 'argtype' ], # shortcuts # The strings are marked to be translated in the parsers with type # 'untranslated'. 'defun', {'deffn' => pgdt_context( 'category of functions for @defun', 'Function')}, # TRANSLATORS: category of macros for @defmac 'defmac', {'deffn' => gdt('Macro')}, # TRANSLATORS: category of special forms for @defspec 'defspec', {'deffn' => gdt('Special Form')}, 'defvar', {'defvr' => pgdt_context( 'category of variables for @defvar', 'Variable')}, # TRANSLATORS: category of user-modifiable options for @defopt 'defopt', {'defvr' => gdt('User Option')}, 'deftypefun', {'deftypefn' => pgdt_context( 'category of functions for @deftypefun', 'Function')}, 'deftypevar', {'deftypevr' => pgdt_context( 'category of variables in typed languages for @deftypevar', 'Variable')}, 'defivar', {'defcv' => pgdt_context( 'category of instance variables in object-oriented programming for @defivar', 'Instance Variable')}, 'deftypeivar', {'deftypecv' => pgdt_context( 'category of instance variables with data type in object-oriented programming for @deftypeivar', 'Instance Variable')}, 'defmethod', {'defop' => pgdt_context( 'category of methods in object-oriented programming for @defmethod', 'Method')}, 'deftypemethod', {'deftypeop' => pgdt_context( 'category of methods with data type in object-oriented programming for @deftypemethod', 'Method')}, ); # Argument not metasyntactic variables only. our %def_no_var_arg_commands; our %def_aliases; foreach my $def_command(keys %def_map) { if (ref($def_map{$def_command}) eq 'HASH') { my ($real_command) = keys (%{$def_map{$def_command}}); $def_aliases{$def_command} = $real_command; $def_aliases{$def_command.'x'} = $real_command.'x'; } $def_no_var_arg_commands{$def_command} = 1 if ($def_command =~ /^deftype/); } our %small_block_associated_command; for my $cmd ('example', 'display', 'format', 'lisp', 'quotation', 'indentedblock') { $small_block_associated_command{'small'.$cmd} = $cmd; }; # Section and heading commands hierarchical levels our %command_structuring_level = ( 'top' => 0, 'part' => 0, # out of the main hierarchy 'chapter' => 1, 'majorheading' => 1, # same as chapheading 'unnumbered' => 1, 'centerchap' => 1, # like unnumbered 'chapheading' => 1, 'appendix' => 1, 'section' => 2, 'unnumberedsec' => 2, 'heading' => 2, 'appendixsec' => 2, 'appendixsection' => 2, # same as appendixsec 'subsection' => 3, 'unnumberedsubsec' => 3, 'subheading', => 3, 'appendixsubsec' => 3, 'subsubsection' => 4, 'unnumberedsubsubsec' => 4, 'subsubheading' => 4, 'appendixsubsubsec' => 4, ); our %level_to_structuring_command; { my $sections = [ ]; my $appendices = [ ]; my $unnumbered = [ ]; my $headings = [ ]; # set levels for synonyms $level_to_structuring_command{'appendixsection'} = $appendices; $level_to_structuring_command{'majorheading'} = $headings; $level_to_structuring_command{'centerchap'} = $unnumbered; foreach my $command (keys (%command_structuring_level)) { next if defined($level_to_structuring_command{$command}); if ($command =~ /^appendix/) { $level_to_structuring_command{$command} = $appendices; } elsif ($command =~ /^unnumbered/ or $command eq 'top') { $level_to_structuring_command{$command} = $unnumbered; } elsif ($command =~ /section$/ or $command eq 'chapter') { $level_to_structuring_command{$command} = $sections; } else { # no mapping for part, it is outside of the main hierarchy next if ($command eq 'part'); $level_to_structuring_command{$command} = $headings; } my $command_level = $command_structuring_level{$command}; if (defined($level_to_structuring_command{$command}->[$command_level])) { die "$command: level_to_structuring_command already set to " .$level_to_structuring_command{$command}->[$command_level]."\n"; } $level_to_structuring_command{$command}->[$command_level] = $command; } } # %all_commands includes user-settable commands only. # The internal commands are not in %all_commands. # used in util/txicmdlist our %all_commands; foreach my $command ( keys(%Texinfo::Commands::block_commands), keys(%Texinfo::Commands::brace_commands), keys(%Texinfo::Commands::line_commands), keys(%Texinfo::Commands::nobrace_commands), ) { $all_commands{$command} = 1; } # brace commands that are not replaced with text. our %non_formatted_brace_commands; foreach my $non_formatted_brace_command ('anchor', 'caption', 'errormsg', 'hyphenation', 'shortcaption', 'sortas') { $non_formatted_brace_commands{$non_formatted_brace_command} = 1; } # functions for main program. Should not be called in user-defined code. # FIXME locate_init_file() is also called in HTML Converter for htmlxref files. # $FILE: file name to locate. It can be a file path. Binary string. # $DIRECTORIES: a reference on a array containing a list of directories to # search the file in. Binary strings. # $ALL_FILES: if true collect all the files with that name, otherwise stop # at first match. sub locate_init_file($$$) { my $file = shift; my $directories = shift; my $all_files = shift; if (File::Spec->file_name_is_absolute($file)) { return $file if (-e $file and -r $file); } else { my @files; foreach my $dir (@$directories) { next unless (-d $dir); my $possible_file = File::Spec->catfile($dir, $file); if ($all_files) { push (@files, $possible_file) if (-e $possible_file and -r $possible_file); } else { return $possible_file if (-e $possible_file and -r $possible_file); } } return @files if ($all_files); } return undef; } # API to open, set encoding and register files. # In general $SELF is stored as $converter->{'output_files'} # and should be accessed through $converter->output_files_information(); # TODO next three functions not documented anywhere, probably relevant to document # both in POD and in HTML Customization API. sub output_files_initialize { return {'unclosed_files' => {}, 'opened_files' => []}; } # # All the opened files are registered, except for stdout, # and the closing of files should be registered too with # output_files_register_closed() below. This makes possible to # unlink all the opened files and close the files not already # closed. # # $FILE_PATH is the file path, it should be a binary string. # If $USE_BINMODE is set, call binmode() to set binary mode. # $OUTPUT_ENCODING argument overrides the output encoding. # Returns # - the opened filehandle, or undef if opening failed, # - the $! error message or undef if opening succeeded. sub output_files_open_out($$$;$$) { my $self = shift; my $customization_information = shift; my $file_path = shift; my $use_binmode = shift; my $output_encoding = shift; #if (!defined($file_path)) { # cluck('output_files_open_out: file_path undef'); #} my $encoding; if (defined($output_encoding)) { $encoding = $output_encoding; } elsif (defined($customization_information->get_conf('OUTPUT_PERL_ENCODING'))) { $encoding = $customization_information->get_conf('OUTPUT_PERL_ENCODING'); } if ($file_path eq '-') { binmode(STDOUT) if $use_binmode; binmode(STDOUT, ":encoding($encoding)") if (defined($encoding)); if ($self) { $self->{'unclosed_files'}->{$file_path} = \*STDOUT; } return \*STDOUT, undef; } my $filehandle = do { local *FH }; if (!open ($filehandle, '>', $file_path)) { my $error_message = $!; return undef, $error_message; } # If $use_binmode is true, we run binmode to turn off outputting LF as CR LF # under MS-Windows, so that Info tag tables will have correct offsets. This # must be done before setting the encoding filters with binmode. binmode($filehandle) if $use_binmode; if ($encoding) { binmode($filehandle, ":encoding($encoding)"); } if ($self) { push @{$self->{'opened_files'}}, $file_path; $self->{'unclosed_files'}->{$file_path} = $filehandle; } return $filehandle, undef; } # see the description of $SELF in comment above output_files_open_out. # # $FILE_PATH is the file path, it should be a binary string. sub output_files_register_closed($$) { my $self = shift; my $file_path = shift; if ($self->{'unclosed_files'}->{$file_path}) { delete $self->{'unclosed_files'}->{$file_path}; } else { cluck "$file_path not opened\n"; } } # The next two functions should not be called from user-defined # code, only from the main program. They are defined here for # consistency of the API and clarity of the code. # # see the description of $SELF in comment above output_files_open_out. sub output_files_opened_files($) { my $self = shift; if (defined($self->{'opened_files'})) { return @{$self->{'opened_files'}}; } else { return (); } } # see the description of $SELF in comment above output_files_open_out. sub output_files_unclosed_files($) { my $self = shift; return $self->{'unclosed_files'}; } # end of output_files API # functions used in main program, Parser and/or Texinfo::Structuring. # Not supposed to be called in user-defined code. # Called both in NonXS and XS parsers sub rearrange_tree_beginning($$) { my $self = shift; my $before_node_section = shift; # Put everything before @setfilename in a special type. This allows to # ignore everything before @setfilename. if ($self->global_commands_information()->{'setfilename'} and $self->global_commands_information()->{'setfilename'}->{'parent'} eq $before_node_section) { my $before_setfilename = {'type' => 'preamble_before_setfilename', 'parent' => $before_node_section, 'contents' => []}; while (@{$before_node_section->{'contents'}} and (!$before_node_section->{'contents'}->[0]->{'cmdname'} or $before_node_section->{'contents'}->[0]->{'cmdname'} ne 'setfilename')) { my $content = shift @{$before_node_section->{'contents'}}; $content->{'parent'} = $before_setfilename; push @{$before_setfilename->{'contents'}}, $content; } unshift (@{$before_node_section->{'contents'}}, $before_setfilename) if (@{$before_setfilename->{'contents'}}); delete $before_node_section->{'contents'} if (scalar(@{$before_node_section->{'contents'}}) == 0); } _add_preamble_before_content($before_node_section); } sub _add_preamble_before_content($) { my $before_node_section = shift; # add a preamble for informational commands my $informational_preamble = {'type' => 'preamble_before_content', 'parent' => $before_node_section, 'contents' => []}; my @first_types; if ($before_node_section->{'contents'}) { while (@{$before_node_section->{'contents'}}) { my $next_content = $before_node_section->{'contents'}->[0]; if ($next_content->{'type'} and ($next_content->{'type'} eq 'preamble_before_beginning' or $next_content->{'type'} eq 'preamble_before_setfilename')) { push @first_types, shift @{$before_node_section->{'contents'}}; } elsif (($next_content->{'type'} and $next_content->{'type'} eq 'paragraph') or ($next_content->{'cmdname'} and not $Texinfo::Commands::preamble_commands{ $next_content->{'cmdname'}})) { last; } else { my $content = shift @{$before_node_section->{'contents'}}; $content->{'parent'} = $informational_preamble; push @{$informational_preamble->{'contents'}}, $content; } } } push @first_types, $informational_preamble; unshift (@{$before_node_section->{'contents'}}, @first_types); } sub get_perl_encoding($$$) { my $commands_info = shift; my $registrar = shift; my $configuration_information = shift; my $result; if (defined($commands_info->{'documentencoding'})) { foreach my $element (@{$commands_info->{'documentencoding'}}) { my $perl_encoding = element_associated_processing_encoding($element); if (!defined($perl_encoding)) { my $encoding = $element->{'extra'}->{'input_encoding_name'} if ($element->{'extra'}); if (defined($encoding)) { $registrar->line_warn($configuration_information, sprintf(__("unrecognized encoding name `%s'"), $encoding), $element->{'source_info'}); } } else { $result = $perl_encoding; } } } return $result; } # for Parser and main program sub warn_unknown_language($) { my $lang = shift; my @messages = (); my $lang_code = $lang; my $region_code; if ($lang =~ /^([a-z]+)_([A-Z]+)/) { $lang_code = $1; $region_code = $2; } if (! $Texinfo::Documentlanguages::language_codes{$lang_code}) { push @messages, sprintf(__("%s is not a valid language code"), $lang_code); } if (defined($region_code) and ! $Texinfo::Documentlanguages::region_codes{$region_code}) { push @messages, sprintf(__("%s is not a valid region code"), $region_code); } return @messages; } # next functions are for code used in Structuring in addition to Parser. # also possibly used in Texinfo::Transformations. sub _find_end_brace($$) { my $text = shift; my $braces_count = shift; my $before = ''; while ($braces_count > 0 and length($text)) { if ($text =~ s/([^()]*)([()])//) { $before .= $1.$2; my $brace = $2; if ($brace eq '(') { $braces_count++; } else { $braces_count--; if ($braces_count == 0) { return ($before, $text, 0); } } } else { $before .= $text; $text = ''; } } return ($before, undef, $braces_count); } # This only counts opening braces, and returns 0 once all the parentheses # are closed sub _count_opened_tree_braces($$); sub _count_opened_tree_braces($$) { my $current = shift; my $braces_count = shift; if (defined($current->{'text'})) { my ($before, $after); ($before, $after, $braces_count) = _find_end_brace($current->{'text'}, $braces_count); } return $braces_count; } # relocate $SOURCE_MARKS source marks with position between # $BEGIN_POSITION and $BEGIN_POSITION + $ADDED_LEN to be relative to # $BEGIN_POSITION, and move to element $E. # return $BEGIN_POSITION + $ADDED_LEN if there were source marks sub relocate_source_marks($$$$) { my $source_marks = shift; return undef if (!$source_marks); my $e = shift; my $begin_position = shift; my $added_len = shift; my $end_position = $begin_position + $added_len; my @indices_to_remove; # collect source marks to remove starting from the beginning to keep # the correct order in the $e element. Order indices to remove # in the reverse order to start from the last in order not to change # the array order when the entry is splice'd away. for (my $i = 0; $i < scalar(@$source_marks); $i++) { my $source_mark = $source_marks->[$i]; if (($begin_position == 0 and (!defined($source_marks->[$i]->{'position'}) # this should never happen or $source_marks->[$i]->{'position'} == 0)) or ($source_marks->[$i]->{'position'} > $begin_position and $source_marks->[$i]->{'position'} <= $end_position)) { unshift @indices_to_remove, $i; if ($source_mark->{'position'}) { $source_mark->{'position'} = $source_mark->{'position'} - $begin_position; } elsif ($begin_position) { warn "BUG: no $source_mark->{'position'} but $begin_position\n"; } $e->{'source_marks'} = [] if (! defined($e->{'source_marks'})); push @{$e->{'source_marks'}}, $source_mark; } elsif ($source_marks->[$i]->{'position'} > $end_position) { # only correct if positions are always monotonically increasing # but should be the case for now last; } } foreach my $i (@indices_to_remove) { splice (@$source_marks, $i, 1); } return $end_position; } # retrieve a leading manual name in parentheses, if there is one. # $LABEL_CONTENTS_CONTAINER->{'contents'} is the Texinfo for the specification # of a node. It is relevant in any situation when a label is expected, # @node, menu entry, float, anchor... For the @node command, for instance, # $LABEL_CONTENTS_CONTAINER is typically $node->{'args'}->[0]. # # Returned object is a hash with two fields: # # manual_content - Texinfo tree for a manual name extracted from the # node specification. # node_content - Texinfo tree for the node name on its own # # A contents array where the manual_content and node_content # elements substituted the initial contents is also returned, # typically to replace $LABEL_CONTENTS_CONTAINER->{'contents'} # for consistency. # # Could be documented, but only if there is evidence that this function # is useful in user-defined code. sub parse_node_manual($;$) { my $label_contents_container = shift; my $modify_node = shift; return (undef, undef) if (!$label_contents_container->{'contents'}); my $contents = $label_contents_container->{'contents'}; my $idx = 0; my $manual; my $result; my $node_content = []; if ($contents->[0] and $contents->[0]->{'text'} and $contents->[0]->{'text'} =~ /^\(/) { my ($new_first, $opening_brace); $manual = []; my $braces_count = 1; # Number of ( seen minus number of ) seen. # the leading ( from @$contents is not in manual. # If the first contents element is "(" followed by more text, split # the leading "(" into its own element. my $first = $contents->[0]; if ($first->{'text'} ne '(') { if ($modify_node) { $opening_brace = {'text' => '(', 'parent' => $label_contents_container}; } my $brace_text = $first->{'text'}; $brace_text =~ s/^\(//; $new_first = { 'text' => $brace_text}; } else { # first element is "(", it is not part of the manual, keep it $idx++; } for (; $idx < scalar(@$contents); $idx++) { my $content; if ($idx == 0) { $content = $new_first; } else { $content = $contents->[$idx]; } if (!defined($content->{'text'}) or $content->{'text'} !~ /\)/) { push @$manual, $content; $braces_count = _count_opened_tree_braces($content, $braces_count); # This is an error, braces were closed in a command if ($braces_count == 0) { last; } } else { my ($before, $after); ($before, $after, $braces_count) = _find_end_brace($content->{'text'}, $braces_count); if ($braces_count == 0) { my @remaining_source_marks; my $current_position = 0; # At this point, we are sure that there is a manual part, # so the pending removal/addition of elements at the beginning # of the manual can proceed (if modify_node). if ($modify_node) { if ($opening_brace) { # remove the original first element and prepend the # split "(" and text elements shift @$contents; $new_first ->{'parent'} = $label_contents_container; unshift @$contents, $new_first; unshift @$contents, $opening_brace; $idx++; if ($first->{'source_marks'}) { my $current_position = relocate_source_marks( $first->{'source_marks'}, $opening_brace, 0, length($opening_brace->{'text'})); relocate_source_marks($first->{'source_marks'}, $new_first, $current_position, length($new_first->{'text'})); } } # Remove current element $content with closing brace from the tree. splice(@$contents, $idx, 1); } # remove the closing ), it is not in manual_content $before =~ s/(\))$//; my $end_paren = $1; if ($before ne '') { # text before ), part of the manual name my $last_manual_element = { 'text' => $before }; push @$manual, $last_manual_element; if ($modify_node) { $last_manual_element->{'parent'} = $content->{'parent'}; splice(@$contents, $idx, 0, $last_manual_element); $idx++; $current_position = relocate_source_marks( $content->{'source_marks'}, $last_manual_element, $current_position, length($before)); } } if ($modify_node) { my $closing_brace = {'text' => ')', 'parent' => $content->{'parent'}}; splice(@$contents, $idx, 0, $closing_brace); $idx++; $current_position = relocate_source_marks( $content->{'source_marks'}, $closing_brace, $current_position, length($closing_brace->{'text'})); } $after =~ s/^(\s*)//; my $spaces_after = $1; if ($spaces_after and $modify_node) { my $spaces_element = {'text' => $spaces_after, 'parent' => $content->{'parent'}}; splice(@$contents, $idx, 0, $spaces_element); $idx++; $current_position = relocate_source_marks( $content->{'source_marks'}, $spaces_element, $current_position, length($spaces_after)); } if ($after ne '') { # text after ), part of the node name. my $leading_node_content = {'text' => $after}; push @$node_content, $leading_node_content; if ($modify_node) { $leading_node_content->{'parent'} = $content->{'parent'}; splice(@$contents, $idx, 0, $leading_node_content); $current_position = relocate_source_marks( $content->{'source_marks'}, $leading_node_content, $current_position, length($after)); } $idx++; } last; } else { push @$manual, $content; } } } if ($braces_count != 0) { # unclosed brace, reset $manual = undef; $idx = 0; } else { $result = {}; $result->{'manual_content'} = $manual; } } if ($idx < scalar(@$contents)) { push(@$node_content, @$contents[$idx .. scalar(@$contents)-1]); } if (scalar(@$node_content)) { $result = {} if (!$result); $result->{'node_content'} = $node_content; } return $result; } # misc functions used in diverse contexts and useful in converters sub element_associated_processing_encoding($) { my $element = shift; my $perl_encoding; my $encoding = $element->{'extra'}->{'input_encoding_name'} if ($element->{'extra'}); if (defined($encoding) and $encoding ne '') { $encoding = $encoding_name_conversion_map{$encoding} if (defined($encoding_name_conversion_map{$encoding})); my $Encode_encoding_object = Encode::find_encoding($encoding); if (defined($Encode_encoding_object)) { $perl_encoding = $Encode_encoding_object->name(); $perl_encoding = undef if (defined($perl_encoding) and $perl_encoding eq ''); } } return $perl_encoding; } # Reverse the decoding of the file name from the input encoding. When # dealing with file names, we want Perl strings representing sequences of # bytes, not Unicode codepoints. # This is necessary even if the name of the included file is purely # ASCII, as the name of the directory it is located within may contain # non-ASCII characters. # Otherwise, the -e operator and similar may not work correctly. sub encode_file_name($$) { my $file_name = shift; my $input_encoding = shift; my $encoding; return ($file_name, $encoding) if (not defined($input_encoding)); if ($input_encoding eq 'utf-8' or $input_encoding eq 'utf-8-strict') { $encoding = 'utf-8'; } else { $encoding = $input_encoding; } $file_name = Encode::encode($encoding, $file_name); return ($file_name, $encoding); } sub locate_include_file($$) { my $customization_information = shift; my $input_file_path = shift; my $ignore_include_directories = 0; my ($volume, $directories, $filename) = File::Spec->splitpath($input_file_path); my @directories = File::Spec->splitdir($directories); #print STDERR "$customization_information $input_file_path ". # @{$customization_information->get_conf('INCLUDE_DIRECTORIES')}\n"; # If the path is absolute or begins with . or .., do not search in # include directories. This is consistent with Kpathsea for Texinfo TeX. if (File::Spec->file_name_is_absolute($input_file_path)) { $ignore_include_directories = 1; } else { foreach my $dir (@directories) { if ($dir eq File::Spec->updir() or $dir eq File::Spec->curdir()) { $ignore_include_directories = 1; last; } elsif ($dir ne '') { last; } } } my $found_file; if ($ignore_include_directories) { $found_file = $input_file_path if (-e $input_file_path and -r $input_file_path); } else { my @include_directories; if ($customization_information and $customization_information->get_conf('INCLUDE_DIRECTORIES')) { @include_directories = @{$customization_information->get_conf('INCLUDE_DIRECTORIES')}; } else { # no object with directory list and not an absolute path, never succeed return undef; } foreach my $include_dir (@include_directories) { my ($include_volume, $include_dir_path, $include_filename) = File::Spec->splitpath($include_dir, 1); my $possible_file = File::Spec->catpath($include_volume, File::Spec->catdir(File::Spec->splitdir($include_dir_path), @directories), $filename); $found_file = $possible_file if (-e $possible_file and -r $possible_file); last if (defined($found_file)); } } return $found_file; } sub _informative_command_value($) { my $element = shift; my $cmdname = $element->{'cmdname'}; if ($Texinfo::Commands::line_commands{$cmdname} eq 'lineraw') { if (not $Texinfo::Commands::commands_args_number{$cmdname}) { return 1; } elsif ($element->{'args'}) { return join(' ', map {$_->{'text'}} @{$element->{'args'}}); } } elsif ($element->{'extra'} and exists($element->{'extra'}->{'text_arg'})) { return $element->{'extra'}->{'text_arg'}; } elsif ($element->{'extra'} and $element->{'extra'}->{'misc_args'} and exists($element->{'extra'}->{'misc_args'}->[0])) { return $element->{'extra'}->{'misc_args'}->[0]; } elsif ($Texinfo::Commands::line_commands{$cmdname} eq 'line' and $element->{'args'} and scalar(@{$element->{'args'}}) and $element->{'args'}->[0] and $element->{'args'}->[0]->{'contents'} and scalar(@{$element->{'args'}->[0]->{'contents'}}) and exists($element->{'args'}->[0]->{'contents'}->[0]->{'text'})) { return $element->{'args'}->[0]->{'contents'}->[0]->{'text'}; } return undef; } # REMARK documentencoding handling is not reverted by resetting a value with # set_conf, as the encodings are set using other sources of information # (possibly based on @documentencoding) in converter. sub set_informative_command_value($$) { my $self = shift; my $element = shift; my $cmdname = $element->{'cmdname'}; $cmdname = 'shortcontents' if ($cmdname eq 'summarycontents'); my $value = _informative_command_value($element); if (defined($value)) { return $self->set_conf($cmdname, $value); } return 0; } sub _in_preamble($) { my $element = shift; my $current_element = $element; while ($current_element->{'parent'}) { if (defined($current_element->{'parent'}->{'type'}) and $current_element->{'parent'}->{'type'} eq 'preamble_before_content') { return 1; } $current_element = $current_element->{'parent'}; } return 0; } # $COMMAND_LOCATION is 'last', 'preamble' or 'preamble_or_first' # 'preamble' means setting sequentially to the values in the preamble. # 'preamble_or_first' means setting to the first value for the command # in the document if the first command is not in the preamble, else set # sequentially to the values in the preamble. # 'last' means setting to the last value for the command in the document. # # For unique command, the last may be considered to be the same as the first. # # Notice that the only effect is to use set_conf (directly or through # set_informative_command_value), no @-commands setting side effects are done # and associated customization variables are not set/reset either. sub set_global_document_command($$$$) { my $self = shift; my $global_commands_information = shift; my $global_command = shift; my $command_location = shift; if ($command_location ne 'last' and $command_location ne 'preamble_or_first' and $command_location ne 'preamble') { warn "BUG: set_global_document_command: unknown command_location: $command_location"; } my $element; if ($global_commands_information and defined($global_commands_information->{$global_command}) and ref($global_commands_information->{$global_command}) eq 'ARRAY') { if ($command_location eq 'last') { $element = $global_commands_information->{$global_command}->[-1]; set_informative_command_value($self, $element); } else { if ($command_location eq 'preamble_or_first' and not _in_preamble($global_commands_information->{$global_command}->[0])) { $element = $global_commands_information->{$global_command}->[0]; set_informative_command_value($self, $element); } else { foreach my $command_element (@{$global_commands_information->{$global_command}}) { if (_in_preamble($command_element)) { $element = $command_element; set_informative_command_value($self, $element); } else { last; } } } } } elsif ($global_commands_information and defined($global_commands_information->{$global_command})) { # unique command, first, preamble and last are the same $element = $global_commands_information->{$global_command}; set_informative_command_value($self, $element); } return $element; } sub lookup_index_entry($$) { my $index_entry_info = shift; my $indices_information = shift; my ($entry_index_name, $entry_number) = @{$index_entry_info}; my $index_info; if ($indices_information->{$entry_index_name}) { $index_info = $indices_information->{$entry_index_name}; if ($index_info->{'index_entries'} and $index_info->{'index_entries'}->[$entry_number-1]) { return ($index_info->{'index_entries'}->[$entry_number-1], $index_info); } } return (undef, $index_info); } sub set_output_encodings($$) { my $customization_information = shift; my $parser_information = shift; $customization_information->set_conf('OUTPUT_ENCODING_NAME', $parser_information->{'input_encoding_name'}) if ($parser_information and $parser_information->{'input_encoding_name'}); if (not defined($customization_information->get_conf('OUTPUT_PERL_ENCODING')) and defined($customization_information->get_conf('OUTPUT_ENCODING_NAME'))) { my $conversion_encoding = $customization_information->get_conf('OUTPUT_ENCODING_NAME'); if (defined($encoding_name_conversion_map{$conversion_encoding})) { $conversion_encoding = $encoding_name_conversion_map{$conversion_encoding}; } my $perl_encoding = Encode::resolve_alias($conversion_encoding); if (defined($perl_encoding) and $perl_encoding ne '') { $customization_information->set_conf('OUTPUT_PERL_ENCODING', $perl_encoding); } } } my $min_level = $command_structuring_level{'chapter'}; my $max_level = $command_structuring_level{'subsubsection'}; # Return numbered level of an element, as modified by raise/lowersections sub section_level($) { my $section = shift; my $level = $command_structuring_level{$section->{'cmdname'}}; # correct level according to raise/lowersections if ($section->{'extra'} and $section->{'extra'}->{'sections_level'}) { $level -= $section->{'extra'}->{'sections_level'}; if ($level < $min_level) { if ($command_structuring_level{$section->{'cmdname'}} < $min_level) { $level = $command_structuring_level{$section->{'cmdname'}}; } else { $level = $min_level; } } elsif ($level > $max_level) { $level = $max_level; } } return $level; } sub trim_spaces_comment_from_content($) { my $contents = shift; shift @$contents if ($contents->[0] and $contents->[0]->{'type'} and ($contents->[0]->{'type'} eq 'ignorable_spaces_after_command' or $contents->[0]->{'type'} eq 'spaces_after_close_brace')); while (@$contents and (($contents->[-1]->{'cmdname'} and ($contents->[-1]->{'cmdname'} eq 'c' or $contents->[-1]->{'cmdname'} eq 'comment')) or ($contents->[-1]->{'type'} and $contents->[-1]->{'type'} eq 'spaces_at_end'))) { pop @$contents; } } # decompose a decimal number on a given base. sub _decompose_integer($$) { my $number = shift; my $base = shift; my @result = (); while ($number >= 0) { my $factor = $number % $base; push (@result, $factor); $number = int(($number - $factor) / $base) - 1; } return @result; } sub enumerate_item_representation($$) { my $specification = shift; my $number = shift; if ($specification =~ /^[0-9]+$/) { return $specification + $number -1; } my $result = ''; my $base_letter = ord('a'); $base_letter = ord('A') if (ucfirst($specification) eq $specification); my @letter_ords = _decompose_integer(ord($specification) - $base_letter + $number - 1, 26); foreach my $ord (@letter_ords) { $result = chr($base_letter + $ord) . $result; } return $result; } sub is_content_empty($;$); sub is_content_empty($;$) { my $tree = shift; my $do_not_ignore_index_entries = shift; if (!defined($tree) or !exists($tree->{'contents'})) { return 1; } foreach my $content (@{$tree->{'contents'}}) { if ($content->{'cmdname'}) { if ($content->{'type'} and $content->{'type'} eq 'index_entry_command') { if ($do_not_ignore_index_entries) { return 0; } else { next; } } if (exists($Texinfo::Commands::line_commands{$content->{'cmdname'}})) { if ($Texinfo::Commands::formatted_line_commands{$content->{'cmdname'}} or $Texinfo::Commands::formattable_line_commands{$content->{'cmdname'}}) { return 0; } else { next; } } elsif (exists($Texinfo::Commands::nobrace_commands{$content->{'cmdname'}})) { if ($Texinfo::Commands::formatted_nobrace_commands{$content->{'cmdname'}}) { return 0; } else { next; } } elsif ($non_formatted_brace_commands{$content->{'cmdname'}} or $Texinfo::Commands::non_formatted_block_commands{$content->{'cmdname'}}) { next; } else { return 0; } } if ($content->{'type'}) { if ($content->{'type'} eq 'paragraph') { return 0; } } if ($content->{'text'} and $content->{'text'} =~ /\S/) { return 0; } if (not is_content_empty($content, $do_not_ignore_index_entries)) { return 0; } } return 1; } # if in this container, we are 'inline', within a running text my @inline_types = ('def_line', 'paragraph', 'preformatted', 'line_arg', 'block_line_arg', 'menu_entry_name', 'menu_entry_node'); my %inline_types; foreach my $type (@inline_types) { $inline_types{$type} = 1; } my %not_inline_commands; foreach my $command ( keys(%Texinfo::Commands::root_commands), keys(%Texinfo::Commands::block_commands), grep {$Texinfo::Commands::brace_commands{$_} eq 'context'} keys(%Texinfo::Commands::brace_commands)) { $not_inline_commands{$command} = 1; } # Return 1 if inline in a running text, 0 if right in top-level or block # environment, and undef otherwise. sub _inline_or_block($) { my $current = shift; if ($current->{'type'} and $inline_types{$current->{'type'}}) { return 1; } elsif ($current->{'cmdname'} and exists($not_inline_commands{$current->{'cmdname'}})) { return 0; } else { return undef; } } # return true if in running text context. # If $CHECK_CURRENT is set, check the element itself, too, in # addition to the parent context. sub element_is_inline($;$) { my $current = shift; my $check_current = shift; if ($check_current) { my $inline_or_block = _inline_or_block($current); return ($inline_or_block) if (defined($inline_or_block)); } while ($current->{'parent'}) { $current = $current->{'parent'}; my $inline_or_block = _inline_or_block($current); return ($inline_or_block) if (defined($inline_or_block)); } return 0; } sub normalize_top_node_name($) { my $node_name = shift; if ($node_name =~ /^top$/i) { return 'Top'; } return $node_name; } # Used in count_bytes my $Encode_encoding_object; my $last_encoding; sub count_bytes($$;$) { my $self = shift; my $string = shift; my $encoding = shift; if (!defined($encoding) and $self and $self->get_conf('OUTPUT_PERL_ENCODING')) { $encoding = $self->get_conf('OUTPUT_PERL_ENCODING'); } if ($encoding and $encoding ne 'ascii') { if (!defined($last_encoding) or $last_encoding ne $encoding) { # Look up and save encoding object for next time. This is # slightly faster than calling Encode::encode. $last_encoding = $encoding; $Encode_encoding_object = Encode::find_encoding($encoding); if (!defined($Encode_encoding_object)) { Carp::croak "Unknown encoding '$encoding'"; } } return length($Encode_encoding_object->encode($string)); } else { return length($string); } } # if $PREFER_REFERENCE_ELEMENT is set, prefer an untranslated element. sub index_content_element($;$) { my $element = shift; my $prefer_reference_element = shift; if ($element->{'extra'} and $element->{'extra'}->{'def_command'}) { if ($prefer_reference_element and $element->{'extra'}->{'def_index_ref_element'}) { return $element->{'extra'}->{'def_index_ref_element'}; } else { return $element->{'extra'}->{'def_index_element'}; } } else { return $element->{'args'}->[0]; } } # custom heading command line is split at @| sub split_custom_heading_command_contents($) { my $contents = shift; my $result = []; my $nr_split_contents = 0; my @contents = @$contents; trim_spaces_comment_from_content(\@contents); if (scalar(@contents) == 0) { # or undef? return $result; } push @$result, []; while (scalar(@contents)) { my $current_content = $contents[0]; #print STDERR "$nr_split_contents ".scalar(@contents).": " # .debug_print_element($current_content)."\n"; if (defined($current_content->{'cmdname'}) and $current_content->{'cmdname'} eq '|') { shift @contents; push @$result, []; $nr_split_contents++; if ($nr_split_contents >= 2) { last; } } else { push @{$result->[-1]}, shift @contents; } } push @{$result->[-1]}, @contents; return $result; } # not currently used sub find_parent_root_command($$); sub find_parent_root_command($$) { my $self = shift; my $current = shift; my $root_command; while (1) { if ($current->{'cmdname'}) { if ($Texinfo::Commands::root_commands{$current->{'cmdname'}}) { return $current; } elsif ($Texinfo::Commands::block_commands{$current->{'cmdname'}} and $Texinfo::Commands::block_commands{$current->{'cmdname'}} eq 'region') { if ($current->{'cmdname'} eq 'copying' and $self and $self->{'global_commands'} and $self->{'global_commands'}->{'insertcopying'}) { foreach my $insertcopying(@{$self->{'global_commands'}->{'insertcopying'}}) { my $root_command = find_parent_root_command($self, $insertcopying); return $root_command if (defined($root_command)); } } else { return undef; } } } if ($current->{'parent'}) { $current = $current->{'parent'}; } else { return undef; } } # Should never get there return undef; } # In the default case, global informative commands are collected # by the parsers. The following functions allow to collect # any @-command. # Used in customization init files code and should be useful in # particular in user-defined init files. sub collect_commands_in_tree($$) { my $root = shift; my $commands_list = shift; my $commands_hash = {}; foreach my $command_name (@$commands_list) { $commands_hash->{$command_name} = []; } _collect_commands_in_tree($root, $commands_hash); return $commands_hash; } sub _collect_commands_in_tree($$); sub _collect_commands_in_tree($$) { my $current = shift; my $commands_hash = shift; if (defined($current->{'cmdname'}) and defined($commands_hash->{$current->{'cmdname'}})) { push @{$commands_hash->{$current->{'cmdname'}}}, $current; } foreach my $key ('args', 'contents') { if ($current->{$key}) { foreach my $child (@{$current->{$key}}) { _collect_commands_in_tree($child, $commands_hash); } } } } sub collect_commands_list_in_tree($$) { my $root = shift; my $commands_list = shift; my $collected_commands_list = []; my $commands_hash = {}; foreach my $command_name (@$commands_list) { $commands_hash->{$command_name} = 1; } _collect_commands_list_in_tree($root, $commands_hash, $collected_commands_list); return $collected_commands_list; } sub _collect_commands_list_in_tree($$$); sub _collect_commands_list_in_tree($$$) { my $current = shift; my $commands_hash = shift; my $collected_commands_list = shift; if (defined($current->{'cmdname'}) and defined($commands_hash->{$current->{'cmdname'}})) { push @{$collected_commands_list}, $current; } foreach my $key ('args', 'contents') { if ($current->{$key}) { foreach my $child (@{$current->{$key}}) { _collect_commands_list_in_tree($child, $commands_hash, $collected_commands_list); } } } } # functions useful for Texinfo tree transformations # and some tree transformations functions, mostly those # used in conversion to main output formats. In general, # tree transformations functions are documented in the POD section. # Some helper functions defined here are used in other # modules but are not generally useful in converters # and therefore not public. sub _copy_tree($$$); sub _copy_tree($$$) { my $current = shift; my $parent = shift; my $reference_associations = shift; if (exists($reference_associations->{$current})) { # happens for def_*index_element (which are not in the main tree) # as they contain pieces of the main tree #print STDERR "COPY: replace $current: " # ."$reference_associations->{$current}; " # .Texinfo::Common::debug_print_element($current)."\n"; return $reference_associations->{$current}; } my $new = {}; $reference_associations->{$current} = $new; $new->{'parent'} = $parent if ($parent); foreach my $key ('type', 'cmdname', 'text') { $new->{$key} = $current->{$key} if (exists($current->{$key})); } foreach my $key ('args', 'contents') { if ($current->{$key}) { if (ref($current->{$key}) ne 'ARRAY') { my $command_or_type = ''; if ($new->{'cmdname'}) { $command_or_type = '@'.$new->{'cmdname'}; } elsif ($new->{'type'}) { $command_or_type = $new->{'type'}; } print STDERR "BUG: Not an array [$command_or_type] $key ". ref($current->{$key})."\n"; } $new->{$key} = []; foreach my $child (@{$current->{$key}}) { push @{$new->{$key}}, _copy_tree($child, $new, $reference_associations); } } } return $new; } # this code works with arrays mixing scalars and reference to elements. # In practice arrays in extra are either only scalars (index_entry, # misc_args) or only elements (the remaining). sub _substitute_references_in_array($$$;$); sub _substitute_references_in_array($$$;$) { my $array = shift; my $reference_associations = shift; my $context = shift; my $level = shift; $level = 0 if (!defined($level)); $level++; my $result = []; my $index = 0; foreach my $item (@{$array}) { if (ref($item) eq '') { push @{$result}, $item; } elsif ($reference_associations->{$item}) { push @{$result}, $reference_associations->{$item}; } elsif (ref($item) eq 'ARRAY') { # nothing like a two level array currently, and hopefully never push @$result, _substitute_references_in_array($item, $reference_associations, "$context [$index]", $level); } else { print STDERR "Trouble with $context [$index] (".ref($item).")\n"; push @{$result}, undef; } $index++; } return $result; } sub _copy_extra_info($$$;$); sub _copy_extra_info($$$;$) { my $current = shift; my $new = shift; my $reference_associations = shift; my $level = shift; my $command_or_type = ''; if ($new->{'cmdname'}) { $command_or_type = '@'.$new->{'cmdname'}; } elsif ($new->{'type'}) { $command_or_type = $new->{'type'}; } $level = 0 if (!defined($level)); $level++; #print STDERR (' ' x $level) # .Texinfo::Common::debug_print_element($current).": $current\n"; foreach my $key ('args', 'contents') { if ($current->{$key}) { my $index = 0; foreach my $child (@{$current->{$key}}) { _copy_extra_info($child, $new->{$key}->[$index], $reference_associations, $level); $index++; } } } foreach my $info_type ('info', 'extra') { next if (!$current->{$info_type}); foreach my $key (keys %{$current->{$info_type}}) { #print STDERR (' ' x $level) . "K $info_type $key\n"; my $value = $current->{$info_type}->{$key}; if (ref($value) eq '') { $new->{$info_type}->{$key} = $value; } elsif (ref($value) eq 'ARRAY') { # authors index_entry manual_content menus misc_args node_content #print STDERR "Array $command_or_type $info_type -> $key\n"; $new->{$info_type}->{$key} = _substitute_references_in_array($value, $reference_associations, "${info_type}[$command_or_type]{$key}", $level); } elsif (ref($value) eq 'HASH') { if ($reference_associations->{$value}) { # reference to another element in the tree, for example: # associated_node caption columnfractions def_index_element # command_as_argument seealso subentry $new->{$info_type}->{$key} = $reference_associations->{$value}; #print STDERR "Done $info_type [$command_or_type]: $key\n"; } elsif ($value->{'contents'} or $value->{'args'} or $value->{'cmdname'} or $value->{'type'} or (defined($value->{'text'}) and $value->{'text'} ne '')) { # Out of tree element. # Note that the code works only if the out of tree elements are # not referred to by main tree elements. #print STDERR "Doing $info_type [$command_or_type]: $key\n"; my $new_element = _copy_tree($value, undef, $reference_associations); _copy_extra_info($value, $new_element, $reference_associations, $level); $new->{$info_type}->{$key} = $new_element; } else { # code that could handle hash structures that would not be elements. # Not used currently, and it would be better if it stayed that way. #print STDERR "HASH $info_type $key\n"; $new->{$info_type}->{$key} = {}; foreach my $type_key (keys(%{$value})) { if (ref($value->{$type_key}) eq '') { $new->{$info_type}->{$key}->{$type_key} = $value->{$type_key}; } elsif ($reference_associations->{$value->{$type_key}}) { $new->{$info_type}->{$key}->{$type_key} = $reference_associations->{$value->{$type_key}}; } elsif (ref($value->{$type_key}) eq 'ARRAY') { $new->{$info_type}->{$key}->{$type_key} = _substitute_references_in_array($value->{$type_key}, $reference_associations, "${info_type}[$command_or_type]{$key}{$type_key}", $level); } else { print STDERR "Unexpected $info_type [$command_or_type]{$key}: $type_key\n"; } } } } } } } sub copy_tree($;$) { my $current = shift; my $parent = shift; my $reference_associations = {}; my $copy = _copy_tree($current, $parent, $reference_associations); _copy_extra_info($current, $copy, $reference_associations); return $copy; } sub copy_contents($) { my $contents = shift; if (ref($contents) ne 'ARRAY') { cluck "$contents not an array"; return undef; } my $copy = copy_tree({'contents' => $contents}); return $copy->{'contents'}; } sub modify_tree($$;$); sub modify_tree($$;$) { my $tree = shift; my $operation = shift; my $argument = shift; #print STDERR "modify_tree tree: $tree\n"; if (!defined($tree) or ref($tree) ne 'HASH') { cluck "tree ".(!defined($tree) ? 'UNDEF' : "not a hash: $tree"); return undef; } if ($tree->{'args'}) { my @args = @{$tree->{'args'}}; for (my $i = 0; $i <= $#args; $i++) { my @new_args = &$operation('arg', $args[$i], $argument); modify_tree($args[$i], $operation, $argument); # this puts the new args at the place of the old arg using the # offset from the end of the array splice (@{$tree->{'args'}}, $i - $#args -1, 1, @new_args); } } if ($tree->{'contents'}) { my @contents = @{$tree->{'contents'}}; for (my $i = 0; $i <= $#contents; $i++) { my @new_contents = &$operation('content', $contents[$i], $argument); modify_tree($contents[$i], $operation, $argument); # this puts the new contents at the place of the old content using the # offset from the end of the array splice (@{$tree->{'contents'}}, $i - $#contents -1, 1, @new_contents); } } if ($tree->{'source_marks'}) { my @source_marks = @{$tree->{'source_marks'}}; for (my $i = 0; $i <= $#source_marks; $i++) { if ($source_marks[$i]->{'element'}) { my @new_element = &$operation('source_mark', $source_marks[$i]->{'element'}, $argument); $source_marks[$i]->{'element'} = $new_element[0]; } } } return $tree; } sub _protect_comma($$) { my $type = shift; my $current = shift; return _protect_text($current, quotemeta(',')); } sub protect_comma_in_tree($) { my $tree = shift; return modify_tree($tree, \&_protect_comma); } sub _new_asis_command_with_text($$;$) { my $text = shift; my $parent = shift; my $text_type = shift; my $new_command = {'cmdname' => 'asis', 'parent' => $parent }; push @{$new_command->{'args'}}, {'type' => 'brace_command_arg', 'parent' => $new_command}; push @{$new_command->{'args'}->[0]->{'contents'}}, { 'text' => $text, 'parent' => $new_command->{'args'}->[0]}; if (defined($text_type)) { $new_command->{'args'}->[0]->{'contents'}->[0]->{'type'} = $text_type; } return $new_command; } sub _protect_text($$) { my $current = shift; my $to_protect = shift; #print STDERR "_protect_text: $to_protect: $current " # .debug_print_element($current, 1)."\n"; if (defined($current->{'text'}) and $current->{'text'} =~ /$to_protect/ and !(defined($current->{'type'}) and $current->{'type'} eq 'raw')) { my @result = (); my $remaining_text = $current->{'text'}; while ($remaining_text) { if ($remaining_text =~ s/^(.*?)(($to_protect)+)//) { if ($1 ne '') { push @result, {'text' => $1, 'parent' => $current->{'parent'}}; $result[-1]->{'type'} = $current->{'type'} if defined($current->{'type'}); } if ($to_protect eq quotemeta(',')) { for (my $i = 0; $i < length($2); $i++) { push @result, {'cmdname' => 'comma', 'parent' => $current->{'parent'}, 'args' => [{'type' => 'brace_command_arg'}]}; } } else { push @result, _new_asis_command_with_text($2, $current->{'parent'}, $current->{'type'}); } } else { push @result, {'text' => $remaining_text, 'parent' => $current->{'parent'}}; $result[-1]->{'type'} = $current->{'type'} if defined($current->{'type'}); last; } } #print STDERR "_protect_text: Result: @result\n"; return @result; } else { #print STDERR "_protect_text: No change\n"; return ($current); } } sub _protect_colon($$) { my $type = shift; my $current = shift; return _protect_text($current, quotemeta(':')); } sub protect_colon_in_tree($) { my $tree = shift; return modify_tree($tree, \&_protect_colon); } sub _protect_node_after_label($$) { my $type = shift; my $current = shift; return _protect_text($current, '['. quotemeta(".\t,") .']'); } sub protect_node_after_label_in_tree($) { my $tree = shift; return modify_tree($tree, \&_protect_node_after_label); } sub protect_first_parenthesis($) { my $contents = shift; confess("BUG: protect_first_parenthesis contents undef") if (!defined($contents)); #print STDERR "protect_first_parenthesis: $contents\n"; my @contents = @$contents; my $brace; if ($contents[0] and $contents->[0]{'text'} and $contents[0]->{'text'} =~ /^\(/) { if ($contents[0]->{'text'} !~ /^\($/) { $brace = shift @contents; my $brace_text = $brace->{'text'}; $brace_text =~ s/^\(//; unshift @contents, { 'text' => $brace_text, 'type' => $brace->{'type'}, 'parent' => $brace->{'parent'} } if $brace_text ne ''; } else { $brace = shift @contents; } unshift @contents, _new_asis_command_with_text('(', $brace->{'parent'}, $brace->{'type'}); } return \@contents; } sub move_index_entries_after_items($) { # enumerate or itemize my $current = shift; return unless ($current->{'contents'}); my $previous; foreach my $item (@{$current->{'contents'}}) { #print STDERR "Before proceeding: $previous $item->{'cmdname'} (@{$previous->{'contents'}})\n" if ($previous and $previous->{'contents'}); if (defined($previous) and $item->{'cmdname'} and $item->{'cmdname'} eq 'item' and $previous->{'contents'} and scalar(@{$previous->{'contents'}})) { my $previous_ending_container; if ($previous->{'contents'}->[-1]->{'type'} and ($previous->{'contents'}->[-1]->{'type'} eq 'paragraph' or $previous->{'contents'}->[-1]->{'type'} eq 'preformatted')) { $previous_ending_container = $previous->{'contents'}->[-1]; } else { $previous_ending_container = $previous; } my @gathered_index_entries; #print STDERR "Gathering for item $item in previous $previous ($previous_ending_container)\n"; while ($previous_ending_container->{'contents'}->[-1] and (($previous_ending_container->{'contents'}->[-1]->{'type'} and $previous_ending_container->{'contents'}->[-1]->{'type'} eq 'index_entry_command') or ($previous_ending_container->{'contents'}->[-1]->{'cmdname'} and ($previous_ending_container->{'contents'}->[-1]->{'cmdname'} eq 'c' or $previous_ending_container->{'contents'}->[-1]->{'cmdname'} eq 'comment')))) { unshift @gathered_index_entries, pop @{$previous_ending_container->{'contents'}}; } #print STDERR "Gathered: @gathered_index_entries\n"; if (scalar(@gathered_index_entries)) { # put back leading comments while ($gathered_index_entries[0] and (!$gathered_index_entries[0]->{'type'} or $gathered_index_entries[0]->{'type'} ne 'index_entry_command')) { #print STDERR "Putting back $gathered_index_entries[0] $gathered_index_entries[0]->{'cmdname'}\n"; push @{$previous_ending_container->{'contents'}}, shift @gathered_index_entries; } # We have the index entries of the previous @item or before item. # Now put them right after the current @item command. if (scalar(@gathered_index_entries)) { my $item_container; if ($item->{'contents'} and $item->{'contents'}->[0] and $item->{'contents'}->[0]->{'type'} and $item->{'contents'}->[0]->{'type'} eq 'preformatted') { $item_container = $item->{'contents'}->[0]; } else { $item_container = $item; } foreach my $entry(@gathered_index_entries) { $entry->{'parent'} = $item_container; } if ($item_container->{'contents'} and $item_container->{'contents'}->[0] and $item_container->{'contents'}->[0]->{'type'} and $item_container->{'contents'}->[0]->{'type'} eq 'ignorable_spaces_after_command') { $item_container->{'contents'}->[0]->{'text'} .= "\n" if ($item_container->{'contents'}->[0]->{'text'} !~ /\n$/); unshift @gathered_index_entries, shift @{$item_container->{'contents'}}; } unshift @{$item_container->{'contents'}}, @gathered_index_entries; } } } $previous = $item; } } sub _move_index_entries_after_items($$) { my $type = shift; my $current = shift; if ($current->{'cmdname'} and ($current->{'cmdname'} eq 'enumerate' or $current->{'cmdname'} eq 'itemize')) { move_index_entries_after_items($current); } return ($current); } # For @itemize/@enumerate sub move_index_entries_after_items_in_tree($) { my $tree = shift; return modify_tree($tree, \&_move_index_entries_after_items); } sub _relate_index_entries_to_table_items_in($$) { my $table = shift; my $indices_information = shift; return unless $table->{'contents'}; foreach my $table_entry (@{$table->{'contents'}}) { next unless $table_entry->{'contents'} and $table_entry->{'type'} eq 'table_entry'; my $term = $table_entry->{'contents'}->[0]; my $definition; my $item; my @moved_index_entries; # Move any index entries from the start of a 'table_definition' to # the 'table_term'. if (defined($table_entry->{'contents'}->[1]) and defined($table_entry->{'contents'}->[1]->{'type'}) and $table_entry->{'contents'}->[1]->{'type'} eq 'table_definition') { $definition = $table_entry->{'contents'}->[1]; while (defined($definition->{'contents'}->[0])) { my $child = $definition->{'contents'}->[0]; last if !defined($child->{'type'}) or $child->{'type'} ne 'index_entry_command'; shift @{$definition->{'contents'}}; push @moved_index_entries, $child; $child->{'parent'} = $term; } unshift @{$term->{'contents'}}, @moved_index_entries; } if (defined($term->{'type'}) and $term->{'type'} eq 'table_term') { # Relate the first index_entry_command in the 'table_term' to # the term itself. my $index_entry; foreach my $content (@{$term->{'contents'}}) { if ($content->{'type'} and $content->{'type'} eq 'index_entry_command') { my $index_info; ($index_entry, $index_info) = Texinfo::Common::lookup_index_entry( $content->{'extra'}->{'index_entry'}, $indices_information) unless $index_entry; } elsif ($content->{'cmdname'} and $content->{'cmdname'} eq 'item') { $item = $content unless $item; } if ($item and $index_entry) { # This is better than overwriting 'entry_element', which # holds important information. $index_entry->{'entry_associated_element'} = $item; last; } } } } } # Locate all @tables in the tree, and relate index entries to # the @item that immediately follows or precedes them. sub _relate_index_entries_to_table_items($$$) { my $type = shift; my $current = shift; my $indices_information = shift; return $current unless $current->{'cmdname'}; if ($current->{'cmdname'} eq 'table') { _relate_index_entries_to_table_items_in($current, $indices_information); } return $current; } sub relate_index_entries_to_table_items_in_tree($$) { my $tree = shift; my $indices_information = shift; return modify_tree($tree, \&_relate_index_entries_to_table_items, $indices_information); } # Common to different module, but not meant to be used in user-defined # codes. # Register a target element associated to a label that may be the target of # a reference and must be unique in the document. Corresponds to @node, # @anchor, and @float (float label corresponds to the second argument). # # $TARGET_ELEMENTS_LIST array reference for elements associated to a label # $TARGET_ELEMENT is the tree element associated to the label. sub register_label($$) { my ($target_elements_list, $target_element) = @_; # register the element in the list. push @{$target_elements_list}, $target_element; } sub get_label_element($) { my $current = shift; if (($current->{'cmdname'} eq 'node' or $current->{'cmdname'} eq 'anchor') and $current->{'args'} and scalar(@{$current->{'args'}})) { return $current->{'args'}->[0] } elsif ($current->{'cmdname'} eq 'float' and $current->{'args'} and scalar(@{$current->{'args'}}) >= 2) { return $current->{'args'}->[1]; } return undef; } # functions used for debugging. May be used in other modules. # Not documented. sub _parent_string($) { my $current = shift; my $parent_string; if ($current->{'parent'}) { my $parent = $current->{'parent'}; my $parent_cmd = ''; my $parent_type = ''; $parent_cmd = "\@$parent->{'cmdname'}" if (defined($parent->{'cmdname'})); $parent_type = "($parent->{'type'})" if (defined($parent->{'type'})); $parent_string = " <- $parent_cmd$parent_type"; } return $parent_string } sub debug_command_name($) { my $cmdname = shift; if ($cmdname eq "\n") { return '\n'; } elsif ($cmdname eq "\t") { return '\t'; } else { return $cmdname; } } # informations on a tree element, short version sub debug_print_element($;$) { my $current = shift; my $print_parent = shift; if (!defined($current)) { return "debug_print_element: UNDEF\n"; } if (ref($current) ne 'HASH') { return "debug_print_element: $current not a hash\n"; } my $type = ''; my $cmd = ''; my $text = ''; $type = "($current->{'type'})" if (defined($current->{'type'})); # specific of HTML $type .= '{'.$current->{'extra'}->{'special_element_type'}.'}' if (defined($current->{'extra'}) and defined($current->{'extra'}->{'special_element_type'})); if (defined($current->{'cmdname'})) { $cmd = '@' . debug_command_name($current->{'cmdname'}); } if (defined($current->{'text'}) and $current->{'text'} ne '') { my $text_str = $current->{'text'}; $text_str =~ s/\n/\\n/g; $text = "[T: $text_str]"; } my $args = ''; my $contents = ''; $args = "[A".scalar(@{$current->{'args'}}).']' if $current->{'args'}; $contents = "[C".scalar(@{$current->{'contents'}}).']' if $current->{'contents'}; my $parent_string = ''; if ($print_parent) { $parent_string = _parent_string($current); $parent_string = '' if (!defined($parent_string)); } return "$cmd$type$text$args$contents$parent_string"; } # for debugging sub debug_print_element_details($;$) { my $current = shift; my $print_parent = shift; my $string = debug_print_element($current, $print_parent); foreach my $key (keys (%$current)) { $string .= " $key: $current->{$key}\n"; } if ($current->{'extra'}) { $string .= " EXTRA\n"; foreach my $key (keys (%{$current->{'extra'}})) { $string .= " $key: $current->{'extra'}->{$key}\n"; } } return $string; } # format list for debugging messages sub debug_list { my $label = shift; my (@list) = (ref $_[0] && $_[0] =~ /.*ARRAY.*/) ? @{$_[0]} : @_; my $str = "$label: ["; my @items = (); for my $item (@list) { $item = "" if ! defined ($item); $item =~ s/\n/\\n/g; push (@items, $item); } $str .= join (" ", @items); $str .= "]"; warn "$str\n"; } # format hash for debugging messages sub debug_hash { my ($label) = shift; my (%hash) = (ref $_[0] && $_[0] =~ /.*HASH.*/) ? %{$_[0]} : @_; my $str = "$label: {"; my @items = (); for my $key (sort keys %hash) { my $val = $hash{$key} || ""; # no undef $key =~ s/\n/\\n/g; $val =~ s/\n/\\n/g; push (@items, "$key:$val"); } $str .= join (",", @items); $str .= "}"; warn "$str\n"; } use Data::Dumper; my @kept_keys = ('contents', 'cmdname', 'type', 'text', 'args', 'extra', 'info', 'def_role', 'spaces_before_argument', 'spaces_after_argument', 'comment_at_end', 'index_entry' ); my %kept_keys; foreach my $key (@kept_keys) { $kept_keys{$key} = 1; } sub _filter_print_keys { [grep {$kept_keys{$_}} ( sort keys %{$_[0]} )] }; sub debug_print_tree($) { my $tree = shift; local $Data::Dumper::Sortkeys = \&_filter_print_keys; local $Data::Dumper::Purity = 1; local $Data::Dumper::Indent = 1; return Data::Dumper->Dump([$tree]); } 1; __END__ =head1 NAME Texinfo::Common - Texinfo modules common data and miscellaneous methods =head1 SYNOPSIS use Texinfo::Common; my @commands_to_collect = ('math'); my $collected_commands = Texinfo::Common::collect_commands_in_tree($document_root, \@commands_to_collect); =head1 NOTES The Texinfo Perl module main purpose is to be used in C to convert Texinfo to other formats. There is no promise of API stability. =head1 DESCRIPTION Texinfo::Common holds hashes with miscellaneous information and some hashes with information on Texinfo @-commands, as well as miscellaneous methods. =head1 MISC INFORMATION Hashes are defined as C variables, and are therefore available outside of the module. TODO: undocumented %null_device_file %default_parser_customization_values %document_settable_multiple_at_commands %document_settable_unique_at_commands %default_converter_command_line_options %default_main_program_customization_options %default_converter_customization @variable_string_settables %document_settable_at_commands %def_map %command_structuring_level %level_to_structuring_command %encoding_name_conversion_map =over =item %texinfo_output_formats X> Cannonical output formats that have associated conditionals. In practice corresponds to C C<%block_commands> plus C and C. =back =head1 @-COMMAND INFORMATION Hashes are defined as C<our> variables, and are therefore available outside of the module. The key of the hashes are @-command names without the @. The following hashes are available: =over =item %all_commands X<C<%all_commands>> All the @-commands. =item %def_aliases =item %def_no_var_arg_commands X<C<%def_aliases>> X<C<%def_no_var_arg_commands>> C<%def_aliases> associates an aliased command to the original command, for example C<defun> is associated to C<deffn>. C<%def_no_var_arg_commands> associates a definition command name with a true value if the I<argument> on the definition command line can contain non-metasyntactic variables. For instance, it is true for C<deftypevr> but false for C<defun>, since C<@defun> I<argument> is supposed to contain metasyntactic variables only. =item %nobrace_symbol_text X<C<%nobrace_symbol_text>> Values are ASCII representation of single character non-alphabetical commands without brace such as C<*> or C<:>. The value may be an empty string. =item %non_formatted_brace_commands Brace commands that are not immediately replaced with text, such as C<anchor>, C<caption>, C<errormsg> and others. =item %small_block_associated_command X<C<%small_block_associated_command>> Associate small command like C<smallexample> to the regular command C<example>. =back =head1 METHODS Two methods are exported in the default case for Texinfo modules messages translation in the Uniforum gettext framework, C<__> and C<__p>. The Texinfo tree and Texinfo tree elements used in argument of some functions are documented in L<Texinfo::Parser/TEXINFO TREE>. When customization information is needed, an object that defines C<set_conf> and/or C<get_conf> is expected, for example a converter inheriting from C<Texinfo::Convert::Converter>, see L<Texinfo::Convert::Converter/Getting and setting customization variables>. =over =item $translated_string = __($msgid) =item $translated_string = __p($msgctxt, $msgid) Returns the I<$msgid> string translated in the Texinfo messages text domain. C<__p> can be used instead of C<__> to pass a I<$msgctxt> context string to provide translators with information on the string context when the string is short or if the translation could depend on the context. C<__> corresponds to the C<gettext> function and C<__p> to the C<pgettext> function. It is not advised to use those functions in user-defined code. It is not practical either, as the translatable strings marked by C<__> or C<__p> need to be collected and added to the Texinfo messages domain. This facility could only be used in user-defined code with translatable strings already present in the domain anyway. In fact, these functions are documented mainly because they are automatically exported. See L<libintl-perl>, L<C<gettext> C interface|https://www.gnu.org/software/gettext/manual/html_node/gettext.html>, L<Perl in GNU Gettext|https://www.gnu.org/software/gettext/manual/html_node/Perl.html>. For translation of strings in output, see L<Texinfo::Translations>. =item collect_commands_in_tree($tree, $commands_list) X<C<collect_commands_in_tree>> Returns a hash reference with keys @-commands names specified in the I<$commands_list> array reference and values arrays of tree elements corresponding to those @-command found in I<$tree> by traversing the tree. =item collect_commands_list_in_tree($tree, $commands_list) X<C<collect_commands_list_in_tree>> Return a list reference containing the tree elements corresponding to the @-commands names specified in the I<$commands_list> found in I<$tree> by traversing the tree. The order of the @-commands should be kept. =item $encoding_name = element_associated_processing_encoding($element) X<C<element_associated_processing_encoding>> Returns the encoding name that can be used for decoding derived from the encoding that was set where I<$element> appeared. =item $result = element_is_inline($element, $check_current) X<C<element_is_inline>> Return true if the element passed in argument is in running text context. If the optional I<$check_current> argument is set, check the element itself, in addition to the parent context. =item ($encoded_file_name, $encoding) = encode_file_name($file_name, $input_encoding) Encode the I<$file_name> text string to a binary string I<$encoded_file_name> based on I<$input_encoding>. Also returns the I<$encoding> name actually used which may have undergone some normalization. This function is mostly a wrapper around L<Encode::encode|Encode/encode> which avoids calling the module if not needed. Do nothing if I<$input_encoding> is C<undef>. =item $text = enumerate_item_representation($specification, $number) X<C<enumerate_item_representation>> This function returns the number or letter correponding to item number I<$number> for an C<@enumerate> specification I<$specification>, appearing on an C<@enumerate> line. For example enumerate_item_representation('c', 3) is C<e>. =item $command = find_parent_root_command($object, $tree_element) X<C<find_parent_root_command>> Find the parent root command (sectioning command or node) of a tree element. The I<$object> argument is optional, its C<global_commands> field is used to continue through C<@insertcopying> if in a C<@copying>. =item $entry_content_element = index_content_element($element, $prefer_reference_element) Return a Texinfo tree element corresponding to the content of the index entry associated to I<$element>. If I<$prefer_reference_element> is set, prefer an untranslated element. If the element is an index command like C<@cindex> or an C<@ftable> C<@item>, the content element is the argument of the command. If the element is a definition line, the index entry element is based on the name and class. =item $result = is_content_empty($tree, $do_not_ignore_index_entries) X<C<is_content_empty>> Return true if the I<$tree> has content that could be formatted. I<$do_not_ignore_index_entries> is optional. If set, index entries are considered to be formatted. =item $file = locate_include_file($customization_information, file_path) X<C<locate_include_file>> Locate I<$file_path>. If I<$file_path> is an absolute path or has C<.> or C<..> in the path directories it is checked that the path exists and is a file. Otherwise, the file name in I<$file_path> is located in include directories also used to find texinfo files included in Texinfo documents. I<$file_path> should be a binary string. C<undef> is returned if the file was not found, otherwise the file found is returned as a binary string. =item ($index_entry, $index_info) = lookup_index_entry($index_entry_info, $indices_information) Returns an I<$index_entry> hash based on the I<$index_entry_info> and I<$indices_information>. Also returns the I<$index_info> hash with information on the index associated to the index entry. I<$index_entry_info> should be an array reference with an index name as first element and the index entry number in that index (1-based) as second element. In general, the I<$index_entry_info> is an L<C<extra> I<index_entry>|Texinfo::Parser/index_entry> associated to an element. The I<$index_entry> hash is described in L<Texinfo::Parser/index_entries>. The I<$index_info> hash is described in LL<< C<Texinfo::Parser::indices_information>|Texinfo::Parser/$indices_information = $parser->indices_information() >>. =item move_index_entries_after_items_in_tree($tree) X<C<move_index_entries_after_items_in_tree>> In C<@enumerate> and C<@itemize> from the tree, move index entries appearing just before C<@item> after the C<@item>. Comment lines between index entries are moved too. =item relate_index_entries_to_table_items_in_tree($tree) X<C<relate_index_entries_to_table_items_in_tree>> In tables, relate index entries preceding and following an entry with said item. Reference one of them in the entry's C<entry_associated_element>. =item $normalized_name = normalize_top_node_name($node_string) X<C<normalize_top_node_name>> Normalize the node name string given in argument, by normalizing Top node case. =item protect_colon_in_tree($tree) =item protect_node_after_label_in_tree($tree) X<C<protect_colon_in_tree>> X<C<protect_node_after_label_in_tree>> Protect colon with C<protect_colon_in_tree> and characters that are special in node names after a label in menu entries (tab dot and comma) with C<protect_node_after_label_in_tree>. The protection is achieved by putting protected characters in C<@asis{}>. =item protect_comma_in_tree($tree) X<C<protect_comma_in_tree>> Protect comma characters, replacing C<,> with @comma{} in tree. =item $contents_result = protect_first_parenthesis($contents) X<C<protect_first_parenthesis>> Return a contents array reference with first parenthesis in the contents array reference protected. If I<$contents> is undef a fatal error with a backtrace will be emitted. =item $level = section_level($section) X<C<section_level>> Return numbered level of the tree sectioning element I<$section>, as modified by raise/lowersections. =item $element = set_global_document_command($customization_information, $global_commands_information, $cmdname, $command_location) X<C<set_global_document_command>> Set the Texinfo customization variable corresponding to I<$cmdname> in I<$customization_information>. The I<$global_commands_information> should contain information about global commands in a Texinfo document, typically obtained from a parser L<< $parser->global_commands_information()|Texinfo::Parser/$commands = global_commands_information($parser) >>. I<$command_location> specifies where in the document the value should be taken from, for commands that may appear more than once. The possibilities are: =over =item last Set to the last value for the command. =item preamble Set sequentially to the values in the Texinfo preamble. =item preamble_or_first Set to the first value of the command if the first command is not in the Texinfo preamble, else set as with I<preamble>, sequentially to the values in the Texinfo preamble. =back The I<$element> returned is the last element that was used to set the customization value, or C<undef> if no customization value was found. Notice that the only effect of this function is to set a customization variable value, no @-command side effects are run, no associated customization variables are set. =item $status = set_informative_command_value($customization_information, $element) X<C<set_informative_command_value>> Set the Texinfo customization option corresponding to the tree element I<$element>. The command associated to the tree element should be a command that sets some information, such as C<@documentlanguage>, C<@contents> or C<@footnotestyle> for example. Return true if the command argument was found and the customization variable was set. =item set_output_encodings($customization_information, $parser_information) X<C<set_output_encodings>> If not already set, set C<OUTPUT_ENCODING_NAME> based on input file encoding. Also set C<OUTPUT_PERL_ENCODING> accordingly which is used to output in the correct encoding. In general, C<OUTPUT_PERL_ENCODING> should not be set directly by user-defined code such that it corresponds to C<OUTPUT_ENCODING_NAME>. =item $split_contents = split_custom_heading_command_contents($contents) X<C<split_custom_heading_command_contents>> Split the I<$contents> array reference at C<@|> in at max three parts. Return an array reference containing the split parts. The I<$contents> array reference is supposed to be C<< $element->{'args'}->[0]->{'contents'} >> of C<%Texinfo::Commands::heading_spec_commands> commands such as C<@everyheading>. =item trim_spaces_comment_from_content($contents) X<C<trim_spaces_comment_from_content>> Remove empty spaces after commands or braces at begin and spaces and comments at end from a content array, modifying it. =item $status = valid_customization_option($name) X<C<valid_option>> Return true if the I<$name> is a known customization option. =item $status = valid_tree_transformation($name) X<C<valid_tree_transformation>> Return true if the I<$name> is a known tree transformation name that may be passed with C<TREE_TRANSFORMATIONS> to modify a texinfo tree. =back =head1 SEE ALSO L<Texinfo::Parser>, L<Texinfo::Convert::Converter> and L<Texinfo::Report>. =head1 AUTHOR Patrice Dumas, E<lt>pertusus@free.frE<gt> =head1 COPYRIGHT AND LICENSE Copyright 2010- Free Software Foundation, Inc. See the source file for all copyright years. This library is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. =cut