#! /usr/bin/env perl
# texi-elements-by-size -- dump list of elements based on words or line counts.
# Also serve as an example of using the Texinfo::Parser module,
# including the usual per-format options.
#
# Copyright 2012-2023 Free Software Foundation, Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License,
# or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
#
# Original author: Patrice Dumas
use strict;
use Config; # to determine the path separator
use Getopt::Long qw(GetOptions);
Getopt::Long::Configure("gnu_getopt");
BEGIN {
# The purpose of these includes is to make it possible to run the
# script from a Texinfo source checkout. If that's not relevant,
# probably best to simply assume all the needed packages are in the
# Perl include path.
#
(my $mydir = $0) =~ s,/[^/]*$,,; # dir we are in
my $txi_libdir = "$mydir/../tp"; # find tp relative to $0
unshift (@INC, $txi_libdir);
#
my @txi_maint_dirs = qw(Text-Unidecode Unicode-EastAsianWidth libintl-perl);
unshift (@INC, map { "$txi_libdir/maintain/lib/$_/lib" } @txi_maint_dirs );
}
use Texinfo::Parser;
use Texinfo::Convert::TextContent;
my $my_version = "0.1 (TP $Texinfo::Parser::VERSION)";
(my $real_command_name = $0) =~ s/.*\///;
$real_command_name =~ s/\.pl$//;
# determine the path separators
my $path_separator = $Config{'path_sep'};
$path_separator = ':' if (!defined($path_separator));
my $quoted_path_separator = quotemeta($path_separator);
my $force = 0;
my $use_sections = 0;
my $count_words = 0;
my $no_warn = 0;
# placeholder for future i18n.
sub __($) {
return $_[0];
}
my $format = 'info'; # make our counts from the Info output
# this is the format associated with the output format, which is replaced
# when the output format changes. It may also be removed if there is the
# corresponding --no-ifformat.
#my $default_expanded_format = [ $format ];
# directories specified on the command line.
my @include_dirs = ();
my @prepend_dirs = ();
my $parser_default_options = {
#'expanded_formats' => [],
'EXPANDED_FORMATS' => [ $format ],
'values' => {},
#'gettext' => \&__
};
sub set_expansion($$) {
my $region = shift;
my $set = shift;
$set = 1 if (!defined($set));
if ($set) {
push @{$parser_default_options->{'expanded_formats'}}, $region
unless (grep {$_ eq $region} @{$parser_default_options->{'expanded_formats'}});
} else {
@{$parser_default_options->{'expanded_formats'}} =
grep {$_ ne $region} @{$parser_default_options->{'expanded_formats'}};
# @{$default_expanded_format}
# = grep {$_ ne $region} @{$default_expanded_format};
}
}
my $result_options = Getopt::Long::GetOptions (
'help|h' => sub { print help(); exit 0; },
'version|V' => sub {print "$real_command_name $my_version\n\n";
printf __("Copyright (C) %s Free Software Foundation, Inc.
License GPLv3+: GNU GPL version 3 or later
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law.\n"), "2016";
exit 0;},
'force' => \$force,
'ifhtml!' => sub { set_expansion('html', $_[1]); },
'ifinfo!' => sub { set_expansion('info', $_[1]); },
'ifxml!' => sub { set_expansion('xml', $_[1]); },
'ifdocbook!' => sub { set_expansion('docbook', $_[1]); },
'iftex!' => sub { set_expansion('tex', $_[1]); },
'ifplaintext!' => sub { set_expansion('plaintext', $_[1]); },
'use-sections!' => \$use_sections,
'count-words!' => \$count_words,
'no-warn' => \$no_warn,
'D=s' => sub {$parser_default_options->{'values'}->{$_[1]} = 1;},
'U=s' => sub {delete $parser_default_options->{'values'}->{$_[1]};},
'I=s' => sub {
push @include_dirs, split(/$quoted_path_separator/, $_[1]); },
'P=s' => sub { unshift @prepend_dirs, split(/$quoted_path_separator/, $_[1]); },
'number-sections!' => sub { set_from_cmdline('NUMBER_SECTIONS', $_[1]); },
);
exit 1 if (!$result_options);
my @input_files = @ARGV;
# use STDIN if not a tty, like makeinfo does
@input_files = ('-') if (!scalar(@input_files) and !-t STDIN);
die sprintf(__("%s: missing file argument.\n"), $real_command_name)
.sprintf(__("Try `%s --help' for more information.\n"), $real_command_name)
unless (scalar(@input_files) >= 1);
if (scalar(@input_files) > 1) {
warn sprintf(__("%s: superfluous file arguments: @input_files\n"),
$real_command_name);
}
my $input_file_name = shift @input_files;
sub help() {
my $help =
sprintf(__("Usage: %s [OPTION]... TEXINFO-FILE...\n"), $real_command_name)
."\n".
__("Write to standard output a list of Texinfo elements (nodes or sections)
sorted by the number of lines (or words) they contain,
after translation to Info format.\n")
."\n";
$help .= __("General Options:
--count-words count words instead of lines.
--force keep going even if Texinfo file parsing fails.
--help display this help and exit.
--no-warn suppress warnings (but not errors).
--use-sections use sections as elements instead of nodes.
--version display version information and exit.\n")
."\n";
$help .= __("Input file options:
-D VAR define the variable VAR, as with \@set.
-I DIR append DIR to the \@include search path.
-P DIR prepend DIR to the \@include search path.
-U VAR undefine the variable VAR, as with \@clear.\n")
."\n";
$help .= __("Conditional processing in input:
--ifdocbook process \@ifdocbook and \@docbook.
--ifhtml process \@ifhtml and \@html.
--ifinfo process \@ifinfo.
--ifplaintext process \@ifplaintext.
--iftex process \@iftex and \@tex.
--ifxml process \@ifxml and \@xml.
--no-ifdocbook do not process \@ifdocbook and \@docbook text.
--no-ifhtml do not process \@ifhtml and \@html text.
--no-ifinfo do not process \@ifinfo text.
--no-ifplaintext do not process \@ifplaintext text.
--no-iftex do not process \@iftex and \@tex text.
--no-ifxml do not process \@ifxml and \@xml text.
Also, for the --no-ifFORMAT options, do process \@ifnotFORMAT text.\n");
return $help;
}
sub _exit($) {
my $error_count = shift;
exit (1) if ($error_count and !$force);
}
sub handle_errors($$) {
my $self = shift;
my $error_count = shift;
my ($errors, $new_error_count) = $self->errors();
$error_count += $new_error_count if ($new_error_count);
foreach my $error_message (@$errors) {
warn $error_message->{'error_line'} if ($error_message->{'type'} eq 'error'
or !$no_warn);
}
_exit($error_count);
return $error_count;
}
my $input_directory = '.';
if ($input_file_name =~ /(.*\/)/) {
$input_directory = $1;
}
my $parser_options = { %$parser_default_options };
$parser_options->{'INCLUDE_DIRECTORIES'} = [@include_dirs];
my @prepended_include_directories = ('.');
push @prepended_include_directories, $input_directory
if ($input_directory ne '.');
unshift @{$parser_options->{'INCLUDE_DIRECTORIES'}},
@prepended_include_directories;
unshift @{$parser_options->{'INCLUDE_DIRECTORIES'}}, @prepend_dirs;
my $error_count = 0;
my $parser = Texinfo::Parser::parser($parser_options);
my $document = $parser->parse_texi_file($input_file_name);
my $registrar = $parser->registered_errors();
if (!defined($document)) {
handle_errors($registrar, $error_count);
exit (1);
}
my $converter_options = {};
$converter_options->{'document'} = $document;
my $converter = Texinfo::Convert::TextContent->converter($converter_options);
my ($sorted_name_counts_array, $formatted_result)
= $converter->sort_element_counts($document, $use_sections,
$count_words);
print STDOUT $formatted_result;
exit (0);