# -*-perl-*- #+############################################################################## # # tex4ht.pm: use tex4ht to convert tex to html # # Copyright 2005, 2007, 2009, 2011-2023 Free Software Foundation, Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3 of the License, # or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # # Originally written by Patrice Dumas. # #-############################################################################## # To customize the command and the options, you can set the # T4H_MATH_CONVERSION, T4H_TEX_CONVERSION and T4H_LATEX_CONVERSION # customization variables and/or change # $Texinfo::TeX4HT::tex4ht_command_tex # and $Texinfo::TeX4HT::tex4ht_options_tex # $Texinfo::TeX4HT::tex4ht_command_latex, $Texinfo::TeX4HT::tex4ht_command_texi # and $Texinfo::TeX4HT::tex4ht_options_latex and $Texinfo::TeX4HT::tex4ht_options_texi use strict; # To check if there is no erroneous autovivification #no autovivification qw(fetch delete exists store strict); use Cwd; use Encode qw(encode); # Also for __( use Texinfo::Common; use Texinfo::Convert::Texinfo; # to implement CONVERT_TO_LATEX_IN_MATH use Texinfo::Convert::LaTeX; texinfo_register_handler('structure', \&tex4ht_prepare); # could probably be done also in the 'structure' phase texinfo_register_handler('init', \&tex4ht_process); texinfo_register_handler('finish', \&tex4ht_finish); texinfo_register_command_formatting('math', \&tex4ht_convert_command); texinfo_register_command_formatting('tex', \&tex4ht_convert_command); texinfo_register_command_formatting('latex', \&tex4ht_convert_command); texinfo_register_command_formatting('displaymath', \&tex4ht_convert_command); { package Texinfo::TeX4HT; use vars qw( $tex4ht_command_tex $tex4ht_command_latex $tex4ht_command_texi $tex4ht_options_tex $tex4ht_options_latex $tex4ht_options_texi ); if (!defined($tex4ht_command_tex)) { $tex4ht_command_tex = 'httex'; } if (!defined($tex4ht_command_latex)) { $tex4ht_command_latex = 'htlatex'; } if (!defined($tex4ht_command_texi)) { $tex4ht_command_texi = 'httexi'; } } my %commands; # style of output my %formats; my $tex4ht_initial_dir; my $tex4ht_out_dir; sub tex4ht_prepare($$) { # set file names my $self = shift; my $document_root = shift; # this initialization may not be needed, but it is cleaner anyway, # in case there is more than one texinfo file processed. # In that case, it is indeed better to clear the structures. Note that # even if the structures are not cleared, the results should be ok in most # cases, as everything that needs to be changed should be rewritten, so it # does not matter if there are remaining results from the previous file. %commands = (); %formats = (); return 0 if (defined($self->get_conf('OUTFILE')) and $Texinfo::Common::null_device_file{$self->get_conf('OUTFILE')}); $formats{'tex'} = {'exec' => $Texinfo::TeX4HT::tex4ht_command_tex, 'commands' => [], 'results' => {}}; $formats{'latex'} = {'exec' => $Texinfo::TeX4HT::tex4ht_command_latex, 'commands' => [], 'results' => {}}; $formats{'texi'} = {'exec' => $Texinfo::TeX4HT::tex4ht_command_texi, 'commands' => [], 'results' => {}}; my $math_conversion = $self->get_conf('T4H_MATH_CONVERSION'); if (defined($math_conversion) and !$formats{$math_conversion}) { $self->document_error($self, sprintf(__("tex4ht.pm: unknown conversion type for math: %s"), $math_conversion)); $math_conversion = undef; } $math_conversion = 'tex' if (!defined($math_conversion)); my $tex_conversion = $self->get_conf('T4H_TEX_CONVERSION'); if (defined($tex_conversion) and !$formats{$tex_conversion}) { $self->document_error($self, sprintf(__("tex4ht.pm: unknown conversion type for \@tex: %s"), $tex_conversion)); $tex_conversion = undef; } $tex_conversion = 'tex' if (!defined($tex_conversion)); my $latex_conversion = $self->get_conf('T4H_LATEX_CONVERSION'); if (defined($latex_conversion) and !$formats{$latex_conversion}) { $self->document_error($self, sprintf(__("tex4ht.pm: unknown conversion type for \@latex: %s"), $latex_conversion)); $latex_conversion = undef; } $latex_conversion = 'latex' if (!defined($latex_conversion)); $commands{'displaymath'} = {'style' => $math_conversion, 'results' => {}}; $commands{'math'} = {'style' => $math_conversion, 'results' => {}}; $commands{'latex'} = {'style' => $latex_conversion, 'results' => {}}; $commands{'tex'} = {'style' => $tex_conversion, 'results' => {}}; $tex4ht_initial_dir = Cwd::abs_path; $tex4ht_out_dir = $self->get_info('destination_directory'); $tex4ht_out_dir = File::Spec->curdir() if (!defined($tex4ht_out_dir) or $tex4ht_out_dir =~ /^\s*$/); my $document_name = $self->get_info('document_name'); my $tex4ht_basename = "${document_name}_tex4ht"; my @replaced_commands = sort(keys(%commands)); my $collected_commands = Texinfo::Common::collect_commands_list_in_tree( $document_root, \@replaced_commands); my %format_collected_commands = (); foreach my $element (@{$collected_commands}) { my $command = $element->{'cmdname'}; my $format = $commands{$command}->{'style'}; push @{$format_collected_commands{$format}}, $element; $commands{$command}->{'counter'}++; } # prepare tex4ht input files for each format foreach my $format (sort(keys(%format_collected_commands))) { $formats{$format}->{'basename'} = $tex4ht_basename . "_$format"; my $suffix = '.tex'; $suffix = '.texi' if ($format eq 'texi'); $formats{$format}->{'basefile_name'} = $formats{$format}->{'basename'} . $suffix; my ($encoded_basefile_name, $basefile_name_encoding) = $self->encoded_output_file_name($formats{$format}->{'basefile_name'}); $formats{$format}->{'basefile_path'} = $encoded_basefile_name; $formats{$format}->{'html_basefile_name'} = $formats{$format}->{'basename'} . '.html'; my ($encoded_html_basefile_name, $html_basefile_name_encoding) = $self->encoded_output_file_name($formats{$format}->{'html_basefile_name'}); $formats{$format}->{'html_basefile_path'} = $encoded_html_basefile_name; my $tex4ht_file_path_name = File::Spec->catfile($tex4ht_out_dir, $formats{$format}->{'basefile_name'}); my ($encoded_tex4ht_file_path_name, $tex4ht_path_encoding) = $self->encoded_output_file_name($tex4ht_file_path_name); $formats{$format}->{'counter'} = 0; $formats{$format}->{'output_counter'} = 0; # FIXME no clear way to use utf8 and support encoded characters. # An attempt to encode to utf8 lead to worse results than letting # perl encode to latin1. But with other non ascii characters than # latin1 characters, there will be utf8 output anyway. my $options_latex_math; if ($self->get_conf('CONVERT_TO_LATEX_IN_MATH')) { $options_latex_math = {Texinfo::Convert::LaTeX::copy_options_for_convert_to_latex_math($self)}; } if (scalar(@{$format_collected_commands{$format}}) > 0) { my $fh; foreach my $element (@{$format_collected_commands{$format}}) { my $cmdname = $element->{'cmdname'}; my $tree; if ($cmdname eq 'math') { $tree = $element->{'args'}->[0]; } elsif ($element->{'contents'}) { $tree = {'contents' => [@{$element->{'contents'}}]}; if (scalar(@{$tree->{'contents'}}) and $tree->{'contents'}->[0]->{'type'} and ($tree->{'contents'}->[0]->{'type'} eq 'empty_line_after_command' or $tree->{'contents'}->[0]->{'type'} eq 'elided_brace_command_arg' or $tree->{'contents'}->[0]->{'type'} eq 'elided_rawpreformatted')) { shift @{$tree->{'contents'}}; } if ($tree->{'contents'}->[-1]->{'cmdname'} and $tree->{'contents'}->[-1]->{'cmdname'} eq 'end') { pop @{$tree->{'contents'}}; } } else { next; } if (scalar(@{$tree->{'contents'}}) == 0) { # should correspond to an ignored block next; } if ($formats{$format}->{'counter'} == 0) { local *TEX4HT_TEXFILE; unless (open(*TEX4HT_TEXFILE, ">$encoded_tex4ht_file_path_name")) { $self->document_error($self, sprintf(__("tex4ht.pm: could not open %s: %s"), $tex4ht_file_path_name, $!)); return 1; } $fh = *TEX4HT_TEXFILE; my $comment = '@c'; $comment = '%' if ($format ne 'texi'); $comment .= " Automatically generated by the t4ht Texinfo HTML extension\n"; if ($format eq 'texi') { print $fh "\\input texinfo \@setfilename $formats{$format}->{'basename'}.info\n"; print $fh "$comment"; } else { print $fh "$comment"; if ($format eq 'latex') { print $fh "\\documentclass{article}\n\\begin{document}\n"; } elsif ($format eq 'tex') { print $fh "\\csname tex4ht\\endcsname\n"; } } } $formats{$format}->{'counter'}++; my $counter = $formats{$format}->{'counter'}; my $text; if ($options_latex_math) { $text = Texinfo::Convert::LaTeX::convert_to_latex_math(undef, $tree, $options_latex_math); } else { $text = Texinfo::Convert::Texinfo::convert_to_texinfo($tree); } $formats{$format}->{'commands'}->[$counter-1] = $element; # write to tex file my ($before_comment_open, $after_comment_open, $before_comment_close, $after_comment_close); if ($format eq 'texi') { $before_comment_open = "\@verbatim\n\n"; $after_comment_open = "\n\@end verbatim\n"; $before_comment_close = "\@verbatim\n"; $after_comment_close = "\n\n\@end verbatim\n"; } else { $before_comment_open = "\\HCode{\\Hnewline \\Hnewline "; $after_comment_open = "\\Hnewline}\n"; $before_comment_close = "\\HCode{\\Hnewline "; $after_comment_close = "\\Hnewline \\Hnewline}\n"; } my $begin_comment = ""; print $fh "$before_comment_open$begin_comment$after_comment_open"; if ($cmdname eq 'tex' or $cmdname eq 'latex') { print $fh $text; } elsif ($cmdname eq 'math') { if ($format eq 'texi') { print $fh '@math{' . $text . "}\n"; } else { print $fh "\\IgnorePar \$" . $text . "\$"; } } elsif ($cmdname eq 'displaymath') { if ($format eq 'texi') { print $fh "\@displaymath\n".$text."\@end displaymath\n"; } elsif ($format eq 'latex') { print $fh "\n\\[" . $text . "\\]\n"; } else { # tex print $fh "\n\$\$" . $text . "\$\$\n"; } } my $end_comment = ""; print $fh "$before_comment_close$end_comment$after_comment_close"; } if ($formats{$format}->{'counter'} > 0) { # finish the tex file if ($format eq 'latex') { print $fh "\\end{document}\n"; } elsif ($format eq 'tex') { print $fh "\n\\bye\n"; } else { print $fh "\n\@bye\n"; } # FIXME check close error close ($fh); } } } return 0; } # run tex4ht and extract HTML for all the formats sub tex4ht_process($) { my $self = shift; # check first that there are collected commands before attempting chdir. # Alternatively, could have checked if scalar(keys(%commands)) is 0. my $total_count = 0; foreach my $format (keys(%formats)) { $total_count += $formats{$format}->{'counter'} if ($formats{$format}->{'counter'}); } return 0 if ($total_count == 0); my ($encoded_tex4ht_out_dir, $tex4ht_out_dir_encoding) = $self->encoded_output_file_name($tex4ht_out_dir); unless (chdir $encoded_tex4ht_out_dir) { $self->document_warn($self, sprintf(__("tex4ht.pm: chdir %s failed: %s"), $tex4ht_out_dir, $!)); return 1; } print STDERR "cwd($encoded_tex4ht_out_dir): " . Cwd::cwd() ."\n" if ($self->get_conf('VERBOSE')); my $errors = 0; # sort for reproducible messages order and output foreach my $format (sort(keys(%formats))) { $errors += tex4ht_process_format($self, $format); } unless (chdir $tex4ht_initial_dir) { $self->document_warn($self, sprintf(__( "tex4ht.pm: unable to return to initial directory: %s"), $!)); return 1 + $errors; } return $errors; } # run tex4ht and extract HTML fragments from the result file sub tex4ht_process_format($$) { my $self = shift; my $format = shift; return 0 unless ($formats{$format}->{'counter'}); $self->document_warn($self, sprintf(__("tex4ht.pm: output file missing: %s"), $formats{$format}->{'basefile_name'})) unless (-f $formats{$format}->{'basefile_path'}); # run tex4ht my $options = ''; if ($format eq 'tex' and defined($Texinfo::TeX4HT::tex4ht_options_tex)) { $options = $Texinfo::TeX4HT::tex4ht_options_tex; } elsif ($format eq 'latex' and defined($Texinfo::TeX4HT::tex4ht_options_latex)) { $options = $Texinfo::TeX4HT::tex4ht_options_latex; } elsif ($format eq 'texi' and defined($Texinfo::TeX4HT::tex4ht_options_texi)) { $options = $Texinfo::TeX4HT::tex4ht_options_texi; } my $cmd = "$formats{$format}->{'exec'} $formats{$format}->{'basefile_name'} $options"; my $encoding = $self->get_conf('MESSAGE_ENCODING'); my $encoded_exec; my $encoded_options; if (defined($encoding)) { $encoded_exec = encode($encoding, $formats{$format}->{'exec'}); $encoded_options = encode($encoding, $options); } else { $encoded_exec = $formats{$format}->{'exec'}; $encoded_options = $options; } my $encoded_cmd = $encoded_exec . " " . $formats{$format}->{'basefile_path'} . " $encoded_options"; print STDERR "tex4ht command: $encoded_cmd\n" if ($self->get_conf('VERBOSE')); # do not use system in order to be sure that tex STDIN is not # mixed up with the main script STDIN. It is important because # if tex fails, it will read from STDIN and the input may trigger # diverse actions by tex. if (not(open(TEX4HT, "|-", $encoded_cmd))) { $self->document_error($self, sprintf(__( "tex4ht.pm: command failed: %s"), $cmd)); return 1; } if (!close(TEX4HT)) { $self->document_warn($self, sprintf(__( "tex4ht.pm: closing communication failed: %s: %s"), $cmd, $!)); return 1; } $self->css_add_info('imports', "\@import \"$formats{$format}->{'basename'}.css\";\n"); # extract the html from the file created by tex4ht my $html_basefile = $formats{$format}->{'html_basefile_name'}; my $encoded_html_basefile = $formats{$format}->{'html_basefile_path'}; unless (open(TEX4HT_HTMLFILE, $encoded_html_basefile)) { $self->document_warn($self, sprintf(__("tex4ht.pm: could not open %s: %s"), $html_basefile, $!)); return 1; } my $got_count = 0; my $line; # FIXME decode? while ($line = ) { #print STDERR "$encoded_html_basefile: while $line"; if ($line =~ /!-- tex4ht_begin $formats{$format}->{'basename'} (\w+) (\d+) --/) { my $cmdname = $1; my $count = $2; my $text = ''; my $end_found = 0; while ($line = ) { #print STDERR "while search $cmdname $count $line"; if ($line =~ /!-- tex4ht_end $formats{$format}->{'basename'} $cmdname $count --/) { $got_count++; if ($cmdname eq 'math') { chomp($text); # tex4ht may add spaces that we do not need. Actual content # should in any case be in HTML elements. $text =~ s/^\s*//; $text =~ s/\s*$//; } $commands{$cmdname}->{'results'}->{ $formats{$format}->{'commands'}->[$count-1]} = $text; $end_found = 1; last; } else { $text .= $line; } } unless ($end_found) { # should be a bug or mangled output $self->document_warn($self, sprintf(__( "tex4ht.pm: end of %s item %d for \@%s not found"), $format, $count, $cmdname)); } } } if ($got_count != $formats{$format}->{'counter'}) { # unless tex4ht somehow mangles the output, this should # never happen, could also be considered as en error or a bug. $self->document_warn($self, sprintf(__( "tex4ht.pm: processing produced %d items in HTML; expected %d for format %s"), $got_count, $formats{$format}->{'counter'}, $format)); } close (TEX4HT_HTMLFILE); return 0; } sub tex4ht_convert_command($$$;$$) { my $self = shift; my $cmdname = shift; my $command = shift; my $args = shift; my $content = shift; if (not defined ($commands{$cmdname})) { # nothing was collected, do the usual output return &{$self->default_command_conversion($cmdname)}($self, $cmdname, $command, $args, $content); } elsif (not defined ($commands{$cmdname}->{'results'})) { # no results at all for that command, also do the usual output warn "tex4ht.pm: no results at all for \@$cmdname (element $command)\n" if ($self->get_conf('VERBOSE')); return &{$self->default_command_conversion($cmdname)}($self, $cmdname, $command, $args, $content); } elsif (exists($commands{$cmdname}->{'results'}->{$command}) and defined($commands{$cmdname}->{'results'}->{$command})) { # return the resulting html $commands{$cmdname}->{'output_counter'}++; return $commands{$cmdname}->{'results'}->{$command}; } else { # probably a bug in that case, unless the format is ignored if ($self->is_format_expanded($cmdname)) { $self->document_warn($self, sprintf(__( "tex4ht.pm: output has no HTML item for \@%s %s"), $cmdname, $command)); } return ''; } } sub tex4ht_finish($) { my $self = shift; # this is different from the warning in tex4ht_process_command as, here, # this is the number of retrieved fragment, not processed fragment. if ($self->get_conf('VERBOSE')) { foreach my $command (sort(keys(%commands))) { if (not defined($commands{$command}->{'output_counter'})) { if (defined($commands{$command}->{'counter'})) { $self->document_warn($self, sprintf(__( "tex4ht.pm: output counter UNDEF; expected %d for \@%s"), $commands{$command}->{'counter'}, $command)); } else { warn sprintf(__( "tex4ht.pm: no expected items in the document for \@%s"), $command)."\n"; } } elsif ($commands{$command}->{'output_counter'} != $commands{$command}->{'counter'}) { # NOTE with math commands in @copying and multiple @insertcopying, # there may be more items output than found in the document tree warn sprintf(__( "tex4ht.pm: processing retrieved %d items in HTML; collected %d for \@%s"), $commands{$command}->{'output_counter'}, $commands{$command}->{'counter'}, $command)."\n"; } } } return 0; } 1;