#! /usr/bin/env perl # texi-elements-by-size -- dump list of elements based on words or line counts. # Also serve as an example of using the Texinfo::Parser module, # including the usual per-format options. # # Copyright 2012-2023 Free Software Foundation, Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3 of the License, # or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # # Original author: Patrice Dumas use strict; use Config; # to determine the path separator use Getopt::Long qw(GetOptions); Getopt::Long::Configure("gnu_getopt"); BEGIN { # The purpose of these includes is to make it possible to run the # script from a Texinfo source checkout. If that's not relevant, # probably best to simply assume all the needed packages are in the # Perl include path. # (my $mydir = $0) =~ s,/[^/]*$,,; # dir we are in my $txi_libdir = "$mydir/../tp"; # find tp relative to $0 unshift (@INC, $txi_libdir); # my @txi_maint_dirs = qw(Text-Unidecode Unicode-EastAsianWidth libintl-perl); unshift (@INC, map { "$txi_libdir/maintain/lib/$_/lib" } @txi_maint_dirs ); } use Texinfo::Parser; use Texinfo::Structuring; use Texinfo::Convert::TextContent; my $my_version = "0.1 (TP $Texinfo::Parser::VERSION)"; (my $real_command_name = $0) =~ s/.*\///; $real_command_name =~ s/\.pl$//; # determine the path separators my $path_separator = $Config{'path_sep'}; $path_separator = ':' if (!defined($path_separator)); my $quoted_path_separator = quotemeta($path_separator); my $force = 0; my $use_sections = 0; my $count_words = 0; my $no_warn = 0; # placeholder for future i18n. sub __($) { return $_[0]; } my $format = 'info'; # make our counts from the Info output # this is the format associated with the output format, which is replaced # when the output format changes. It may also be removed if there is the # corresponding --no-ifformat. #my $default_expanded_format = [ $format ]; # directories specified on the command line. my @include_dirs = (); my @prepend_dirs = (); my $parser_default_options = { #'expanded_formats' => [], 'expanded_formats' => [ $format ], 'values' => {}, #'gettext' => \&__ }; sub set_expansion($$) { my $region = shift; my $set = shift; $set = 1 if (!defined($set)); if ($set) { push @{$parser_default_options->{'expanded_formats'}}, $region unless (grep {$_ eq $region} @{$parser_default_options->{'expanded_formats'}}); } else { @{$parser_default_options->{'expanded_formats'}} = grep {$_ ne $region} @{$parser_default_options->{'expanded_formats'}}; # @{$default_expanded_format} # = grep {$_ ne $region} @{$default_expanded_format}; } } my $result_options = Getopt::Long::GetOptions ( 'help|h' => sub { print help(); exit 0; }, 'version|V' => sub {print "$real_command_name $my_version\n\n"; printf __("Copyright (C) %s Free Software Foundation, Inc. License GPLv3+: GNU GPL version 3 or later This is free software: you are free to change and redistribute it. There is NO WARRANTY, to the extent permitted by law.\n"), "2016"; exit 0;}, 'force' => \$force, 'ifhtml!' => sub { set_expansion('html', $_[1]); }, 'ifinfo!' => sub { set_expansion('info', $_[1]); }, 'ifxml!' => sub { set_expansion('xml', $_[1]); }, 'ifdocbook!' => sub { set_expansion('docbook', $_[1]); }, 'iftex!' => sub { set_expansion('tex', $_[1]); }, 'ifplaintext!' => sub { set_expansion('plaintext', $_[1]); }, 'use-sections!' => \$use_sections, 'count-words!' => \$count_words, 'no-warn' => \$no_warn, 'D=s' => sub {$parser_default_options->{'values'}->{$_[1]} = 1;}, 'U=s' => sub {delete $parser_default_options->{'values'}->{$_[1]};}, 'I=s' => sub { push @include_dirs, split(/$quoted_path_separator/, $_[1]); }, 'P=s' => sub { unshift @prepend_dirs, split(/$quoted_path_separator/, $_[1]); }, 'number-sections!' => sub { set_from_cmdline('NUMBER_SECTIONS', $_[1]); }, ); exit 1 if (!$result_options); my @input_files = @ARGV; # use STDIN if not a tty, like makeinfo does @input_files = ('-') if (!scalar(@input_files) and !-t STDIN); die sprintf(__("%s: missing file argument.\n"), $real_command_name) .sprintf(__("Try `%s --help' for more information.\n"), $real_command_name) unless (scalar(@input_files) >= 1); if (scalar(@input_files) > 1) { warn sprintf(__("%s: superfluous file arguments: @input_files\n"), $real_command_name); } my $input_file_name = shift @input_files; sub help() { my $help = sprintf(__("Usage: %s [OPTION]... TEXINFO-FILE...\n"), $real_command_name) ."\n". __("Write to standard output a list of Texinfo elements (nodes or sections) sorted by the number of lines (or words) they contain, after translation to Info format.\n") ."\n"; $help .= __("General Options: --count-words count words instead of lines. --force keep going even if Texinfo file parsing fails. --help display this help and exit. --no-warn suppress warnings (but not errors). --use-sections use sections as elements instead of nodes. --version display version information and exit.\n") ."\n"; $help .= __("Input file options: -D VAR define the variable VAR, as with \@set. -I DIR append DIR to the \@include search path. -P DIR prepend DIR to the \@include search path. -U VAR undefine the variable VAR, as with \@clear.\n") ."\n"; $help .= __("Conditional processing in input: --ifdocbook process \@ifdocbook and \@docbook. --ifhtml process \@ifhtml and \@html. --ifinfo process \@ifinfo. --ifplaintext process \@ifplaintext. --iftex process \@iftex and \@tex. --ifxml process \@ifxml and \@xml. --no-ifdocbook do not process \@ifdocbook and \@docbook text. --no-ifhtml do not process \@ifhtml and \@html text. --no-ifinfo do not process \@ifinfo text. --no-ifplaintext do not process \@ifplaintext text. --no-iftex do not process \@iftex and \@tex text. --no-ifxml do not process \@ifxml and \@xml text. Also, for the --no-ifFORMAT options, do process \@ifnotFORMAT text.\n"); return $help; } sub _exit($) { my $error_count = shift; exit (1) if ($error_count and !$force); } sub handle_errors($$) { my $self = shift; my $error_count = shift; my ($errors, $new_error_count) = $self->errors(); $error_count += $new_error_count if ($new_error_count); foreach my $error_message (@$errors) { warn $error_message->{'error_line'} if ($error_message->{'type'} eq 'error' or !$no_warn); } _exit($error_count); return $error_count; } my $input_directory = '.'; if ($input_file_name =~ /(.*\/)/) { $input_directory = $1; } my $parser_options = { %$parser_default_options }; $parser_options->{'include_directories'} = [@include_dirs]; my @prepended_include_directories = ('.'); push @prepended_include_directories, $input_directory if ($input_directory ne '.'); unshift @{$parser_options->{'include_directories'}}, @prepended_include_directories; unshift @{$parser_options->{'include_directories'}}, @prepend_dirs; my $error_count = 0; my $parser = Texinfo::Parser::parser($parser_options); my $tree = $parser->parse_texi_file($input_file_name); my $registrar = $parser->registered_errors(); if (!defined($tree)) { handle_errors($registrar, $error_count); exit (1); } my $converter_options = {}; $converter_options->{'parser'} = $parser; my $converter = Texinfo::Convert::TextContent->converter($converter_options); my ($sorted_name_counts_array, $formatted_result) = $converter->sort_element_counts($tree, $use_sections, $count_words); print STDOUT $formatted_result; exit (0);