#!/usr/bin/env perl # This file is part of the authorindex package for LaTeX + BibTeX. # Version: 10. August 2008 use warnings 'all'; use strict; # configuration: apart from the command to invoke perl above you might want to # change: my $bstenv="BSTINPUTS"; # Environment Variable holding .bst search path my $bibenv="BIBINPUTS"; # Env Variable holding search path for BibTeX databases my $tmp="_autidx_"; # Name base for temporary files my $cwdpath="."; # Directory where files are generated my $pathsep=($^O eq "MSWin32") ? ';' : ":"; # Seperator between paths in a list # This script takes LaTeX .aux files as input. It extracts all citations made # with page number information. These, together with the bibliography data base # extracted from the input and a .bst file are processed by bibtex to get a # file that associates each citation label with corresponding authors. Together # with the knowledge of which work is cited on which page, this is used to # compute which author is cited on which page. This information is written to # the output in form suitable to be included in a LaTeX document. # Alternatively, the script can also extract the label that appears in the # references for each work instead of the pages. Also, output for further # processing by makeindex can be generated instead of a 'ready' LaTeX file. # examine command line for options my %opt; # hash to contain options found use Getopt::Std; getopts('dhikpr',\%opt); # handle -h option: a short description of the script. exists $opt{'h'} && die < hash marking pages with work author is 1st my %printname; # sort-key -> printed representation of author my %plainname; # sort-key -> author name my %PageTypeOrder; # Page type code -> number giving relative order my %PageOrder; # page string -> array used to sort pages my $see=""; # string to separate other and first author, or undef. my $bst=""; # name of BibTeX program to extract author names my $output=""; # name of file to which author index is written my $twoabbrev=""; # string to append to page for 2 subsequent pages my $also=""; # string to cross-refer to first authors my $alsosep=""; # seperator for referenced first authors my $useaibibcite; # flag: watch for \aibibcite, but not for \bibcite. # scan input files and # - build the file later to be processed by BibTeX, # - generate a temporary bibtex database of the explicit author names given, # - assemble for each citation the page where it was referenced and # - look for data base specification, output file name, and so on. open(AUXFILE,">$tmp.aux") || die "Can't open temporary file $tmp.aux\n"; open(BIBFILE,">$tmp.bib") || die "Can't open temporary file $tmp.bib\n"; while(<>){ if(/^\\citationpage\{\s*([^{ ]+)\s*\}\{(.+)\}$/){ $citationcount++; # used for statistics only. $workscount++ unless ($1 eq '*') or (exists $Lab2Pag{$1}) or (exists $Lab2Bib{$1}) or (exists $Lab2Num{$1}); $pnlist{$2}=""; push @{$Lab2Pag{$1}},$2; print AUXFILE "\\citation{$1}\n"; }elsif(/^\\aibibcite\{([^{]+)\}\{(.+)\}$/){ $workscount++ unless ($1 eq '*') or (exists $Lab2Pag{$1}) or (exists $Lab2Bib{$1}) or (exists $Lab2Num{$1}); $pnlist{$2}=""; push @{$Lab2Num{$1}},$2; print AUXFILE "\\citation{$1}\n" if $usenum; # using \aibibcite implies we want to ignore \bibcite. For this to # work, in the .aux files, the first \aibibcite must appear before any # \bibcite. $useaibibcite="yes"; }elsif(/^\\bibcite\{([^{]+)\}\{(.+)\}$/){ unless($useaibibcite){ $workscount++ unless ($1 eq '*') or (exists $Lab2Pag{$1}) or (exists $Lab2Bib{$1}) or (exists $Lab2Num{$1}); $pnlist{$2}=""; push @{$Lab2Num{$1}},$2; print AUXFILE "\\citation{$1}\n" if $usenum; } }elsif(/^\\bibpage\{([^{]+)\}\{(.+)\}$/){ $workscount++ unless (exists $Lab2Pag{$1}) or (exists $Lab2Bib{$1}) or (exists $Lab2Num{$1}); $pnlist{$2}=""; push @{$Lab2Bib{$1}},$2; print AUXFILE "\\citation{$1}\n"; }elsif(/^\\aiexplicit\{(.+)\}\{(.+)\}$/ and !$usenum){ # above: page number might not contain '}{' print BIBFILE "\@MISC{$tmp$explicits,author=\"$1\"}\n"; $pnlist{$2}=""; push @{$Lab2Pag{"$tmp$explicits"}},$2; print AUXFILE "\\citation{$tmp$explicits}\n"; $explicits++; }elsif(/^\\bibdata\{(.+)\}$/){ # keep the order of data base files, but remove duplicates; # BibTeX complains about them. my $bibdb; for $bibdb (split(",",$1)){ push @bib, $bibdb unless $biboccured{$bibdb}; $biboccured{$bibdb}=1; } }elsif(/^\\aistyle\{(.+)\}$/){ warn "Multiple \\authorindexstyle\n" if $bst && ($bst ne $1); $bst=$1; }elsif(/^\\aioptions\{(.*)\}$/){ ($editors,$nameformat,$maxnames,$truncnames,$labeltype)=split /\|/,$1; $usenum|=($labeltype eq "labels"); if($editors>0){ $altedit="{ peditor format }"; $addedit=$altedit if $editors==2; } }elsif(/^\\aifilename\{(.+)\}$/){ warn "Warning: Multiple authorindices\n" if $output; $output=$1; }elsif(/^\\\@input\{(.+)\}$/){ push(@ARGV,$1) unless exists $opt{'r'}; }elsif(/^\\pagetypeorder\{([rRaAn]+)\}$/){ $PageTypeOrder=$1; }elsif(/^\\aiseestring\{(.+)\}$/){ $see=$1; }elsif(/^\\aialsostrings\{(.+)\}\{(.+)\}$/){ $also=$1; $alsosep=$2; }elsif(/^\\aitwostring\{(.+)\}$/){ $twoabbrev=$1; }elsif(/^\\aiinbibflag$/){ $miniindex=1; }elsif(/^\\ainocompressflag$/){ $nocompress=1; } } close BIBFILE; # output can go to stdout or a filename found in the input files. $output || die "You have to include .aux file produced by .tex file containing \\begin{document}\nin the argument list and you have to \\usepackage{authorindex}!\n"; $output="-" if exists $opt{'p'}; # We need at least one BibTeX database push @bib, $tmp if($explicits); my $bibfiles=join(",",@bib) || die "You must specify at least one BibTeX database\n"; print AUXFILE "\\bibdata{$bibfiles}\n"; $see && $also && die "\\aisee and \\aialso are mutually exlusive!\n"; # if the user hasn't explicitly given a .bst style for formatting author names, # we generate our own based on the style options found in the input files. unless($bst){ my @nameformat=split /;/,$nameformat; my ($printkey,$namefmtcmd)=("cite\$ write\$ termline\n",""); for (@nameformat){ my ($namerep,$sortrep)=split /:/; $sortrep=$namerep unless $sortrep; $namefmtcmd.= "duplicate\$ names swap\$ \"$namerep\" format.name\$ " . "write\$ termline\n" . "duplicate\$ names swap\$ \"$sortrep\" format.name\$ " . "purify\$ \"u\" change.case\$ write\$ termline\n$printkey"; $printkey="termline\n"; } $ENV{$bstenv}="$cwdpath$pathsep" . (exists $ENV{$bstenv} ? $ENV{$bstenv} : ""); $bst=$tmp; open(BSTFILE,">$bst.bst") || die "Can't open $bst.bst\n"; print BSTFILE < % too many names in list? { pop\$ #$truncnames } % yes, truncate. 'skip\$ %' no, keep them all if\$ 'numnames := %'save number of names #0 % start index { duplicate\$ numnames < } % test for "while\$" { #1 + % next name duplicate\$ names swap\$ % get name list and index "{ll}" format.name\$ % format curr. name "others" = % et al part? 'skip\$ %' yes, do not output { $namefmtcmd } % no: format all if\$ } while\$ pop\$ % loop until index is 0 } if\$ } function{default.type}{pauthor format pauthor empty\$ $altedit $addedit if\$ } function{article}{default.type} function{book}{default.type} function{booklet}{default.type} function{inbook}{default.type} function{incollection}{default.type} function{inproceedings}{default.type} function{conference}{default.type} function{manual}{default.type} function{mastersthesis}{default.type} function{misc}{default.type} function{phdthesis}{default.type} function{proceedings}{default.type} function{techreport}{default.type} function{unpublished}{default.type} read iterate{call.type\$} END # ... and here comes perl again. close BSTFILE; } # Now we have decided on our .bst file and can finish the temporary .aux file # we prepared for BibTeX. print AUXFILE "\\bibstyle{$bst}\n"; close AUXFILE; # if we have written to the temporary database, make sure BibTeX can find it. $ENV{$bibenv}="$cwdpath$pathsep" . (exists $ENV{$bibenv} ? $ENV{$bibenv} : "") if $explicits; # We now give BibTeX all the citation labels. In return we get a file whose # lines in turn contain an author name and a label of a work of that author. # The format the author names are given are determined by the BibTeX style file # $bst.bst. print STDERR `bibtex $tmp`; die "BibTeX error. Aborting leaving all temporary files $tmp.*\n" if $?; # if things went well, we can delete all these temporary files made for BibTeX. # The generated .bst file is kept if the user wishes so (-k option). unlink "$bst.bst" if ($bst eq $tmp && !(exists $opt{'k'})); unlink "$tmp.aux","$tmp.bib"; # Decide wether pages or citation labels go to the index my %Lab2Ent=%Lab2Pag; if($usenum){ %Lab2Ent=%Lab2Num; %Lab2Bib=(); }else{ %Lab2Num=(); } # We have now labels associated with page numbers and labels associated with # author names (in the file generated by the BibTeX run). Now we can bring # together the previous two main steps and compute for each author the pages # where she is cited. In draft mode, we also remember for each author the # labels of her works and the pages where these works are cited. my ($firstauthor,$firstsortname,$Lab,$PrevLab)=("","","",""); open(BIBFILE,"$tmp.bbl") || die "Can't open $tmp.bbl\n"; my $author; while($author=&readtosep()){ map s/[\[\]]//g, $author; my $sortname=&readtosep(); my $LabOrEmpty=&readtosep(); if($LabOrEmpty){ $PrevLab=$Lab; $Lab=$LabOrEmpty; } my $printname=$author; if($Lab ne $PrevLab){ @{$LeadAutPag{$author}}{@{$Lab2Ent{$Lab}}}="" if exists $Lab2Ent{$Lab}; @{$LeadAutPag{$author}}{@{$Lab2Ent{'*'}}}="" if exists $Lab2Ent{'*'}; @{$LeadAutPag{$author}}{@{$Lab2Bib{$Lab}}}="" if exists $Lab2Bib{$Lab}; $firstauthor=$author; $firstsortname=$sortname; }else{ ${$Aut2First{$sortname}}{$firstsortname}=""; if($see){ $printname="{$author}$see\\aifirst{$firstauthor}"; $sortname="$sortname$see$firstsortname"; } } @{$Aut2Pag{$printname}}{@{$Lab2Ent{$Lab}}}="" if exists $Lab2Ent{$Lab}; @{$Aut2Pag{$printname}}{@{$Lab2Ent{'*'}}}="" if exists $Lab2Ent{'*'}; @{$Aut2Pag{$printname}}{@{$Lab2Bib{$Lab}}}="" if exists $Lab2Bib{$Lab}; @{$Aut2Bib{$printname}}{@{$Lab2Bib{$Lab}}}="" if exists $Lab2Bib{$Lab}; push @{$Aut2Lab{$printname}},$Lab if (exists $opt{'d'}) and $LabOrEmpty; $printname{$sortname}=$printname; $plainname{$sortname}=$author; } close BIBFILE; unlink "$tmp.blg","$tmp.bbl"; # Last not least, output the results, properly sorted if needed. open(AIFILE,">$output") || die "Can't create author index file $output\n"; # convert page type order into numerical values my ($i,$page); $PageTypeOrder{$i}=length($PageTypeOrder) while($i=chop $PageTypeOrder); # create table that relates page to page order info for $page (keys %pnlist){ $PageOrder{$page}=&parse_pagenumber($page); } if($also){ my $coauthorname; for $coauthorname (keys %Aut2First){ my @namelist; my $sortname; for $sortname (sort keys %{$Aut2First{$coauthorname}}){ push @namelist, $printname{$sortname}; } # we rely on the fact that " " is alphabetically first, so that # $justbehind will end up directly after $coauthorname my $justbehind=$coauthorname." "; my $pseudoname=$also.join($alsosep, @namelist); $printname{$justbehind}=$pseudoname; $plainname{$justbehind}=$pseudoname; } } if(exists $opt{'i'}){ # generate file for makeindex: leave the work for makeindex. my $name; for $name (keys %printname){ my $author=$printname{$name}; my $page; for $page (keys %{$Aut2Pag{$author}}){ print AIFILE "\\indexentry{$name\@$author}{$page}\n"; } } }else{ # sort result, throw away duplicate page numbers and generate LaTeX file. print AIFILE "\\begin{theauthorindex}\n"; my ($prevfirstchar,$prevplain,$name)=("","",""); for $name (sort keys %printname){ my $thisfirstchar=substr($name,0,1); if($thisfirstchar ne $prevfirstchar){ print AIFILE "\\indexspace\n" if $prevfirstchar; $prevfirstchar=$thisfirstchar; } my $author=$printname{$name}; my $plain=$plainname{$name}; my $rep=$author; if($Aut2Lab{$author}){ print AIFILE "% @{$Aut2Lab{$author}}\n" if(exists $opt{'d'}); $rep=($plain eq $prevplain) ? "\\airep$author" : "\\aitop$author" if $plain ne $author; $prevplain=$plain; $authorcount++; } print AIFILE "\\item[$rep]"; $Aut2Bib{$author}={} unless exists $Aut2Bib{$author}; $LeadAutPag{$author}={} unless exists $LeadAutPag{$author}; my %b2p=%{$Aut2Bib{$author}}; my %lp=%{$LeadAutPag{$author}}; my %pagerep; my $page; for $page (keys %{$Aut2Pag{$author}}){ my $prep=$page; $prep="\\aifirstpage{$prep}" if exists $lp{$page}; $prep="\\aibibpage{$prep}" if exists $b2p{$page}; $pagerep{$page}=$prep; } my $res=&compressed_pages($Aut2Pag{$author},\%pagerep); print AIFILE " \\aipages{$res}\n"; } print AIFILE "\\end{theauthorindex}\n"; if(exists $opt{'d'}){ # in draft mode, include some statistics print AIFILE "%\n% $citationcount citations "; print AIFILE "of $workscount distinct works\n"; print AIFILE "% $explicits times \\aimention\n" if $explicits; print AIFILE "% $authorcount different authors\n"; } } close AIFILE; # merge mini indices into the .bbl-Files if it was requested. if($miniindex){ map s/aux$/bbl/,@SAVEARGV; my $file; for $file (@SAVEARGV){ open(BBLINPUT,$file) || next; open(BBLHELP,">$tmp.bbl") || die "Can't create temp file $tmp.bbl\n"; my $currlabel=""; while(){ if(/\\bibitem(\[.*\])*\{(.*)\}|\\end\{thebibliography\}/){ if($currlabel){ my $pagelist=&pages_for_label($currlabel); print BBLHELP "\\bibindex{$pagelist}\n"; } $currlabel=$2; print BBLHELP "$_"; }elsif(/\\bibindex\{(.*)\}/){ if($currlabel){ my $pagelist=&pages_for_label($currlabel); print BBLHELP "$`\\bibindex{$pagelist}$'"; } $currlabel=""; }else{ print BBLHELP "$_" if "$_" ne "\n"; } } close BBLHELP; close BBLINPUT; rename "$tmp.bbl","$file" || die "Can't replace old $file\n"; } } # auxiliary functions # convert roman numeral string to integer sub romanvalue { local($_)=shift; tr/IVXLCDM/ivxlcdm/; my %romandigits = ("i", 1, "v", 5, "x", 10, "l", 50, "c", 100, "d", 500, "m", 1000); my ($i,$sum,$prev)=("",0,1); while($i=chop){ my $this=$romandigits{$i}; $sum=$sum+(($this<$prev) ? -$this : $this); $prev=$this; } return $sum; } # convert letter to numeric value sub alphavalue { local($_)=@_; tr/A-Z/a-z/; return ord($_)-ord("a"); } # split page number in components and replace each component by a number for # the page type and the page number as an integer. sub parse_pagenumber { local($_)=@_; my $res=""; while($_){ s/^[^\\A-Za-z0-9]*//; if(exists $PageTypeOrder{'n'} && s/(^\d+)//){ $res.="$PageTypeOrder{'n'}".sprintf "%0.6d",$1; }elsif(exists $PageTypeOrder{'R'} && s/^\\uppercase\s*\{([ivxlcdm]+)\}//){ $res.="$PageTypeOrder{'R'}".sprintf "%0.4d",&romanvalue($1); }elsif(exists $PageTypeOrder{'R'} && s/(^[IVXLCDM]+)//){ $res.="$PageTypeOrder{'R'}".sprintf "%0.4d",&romanvalue($1); }elsif(exists $PageTypeOrder{'A'} && s/(^[A-Z])//){ $res.="$PageTypeOrder{'A'}".sprintf "%0.2d",&alphavalue($1); }elsif(exists $PageTypeOrder{'r'} && s/(^[ivxlcdm]+)//){ $res.="$PageTypeOrder{'r'}".sprintf "%0.4d",&romanvalue($1); }elsif(exists $PageTypeOrder{'a'} && s/(^[a-z])//){ $res.="$PageTypeOrder{'a'}".sprintf "%0.2d",&alphavalue($1); }else{ s/^.//; } } return $res; } # test wether 2 pages are subsequent sub a_follows_b { my ($i,$j)=@PageOrder{@_}; $i++; return($i eq $j); } # make a sorted, maybe compressed, list of pages sub compressed_pages { my ($A,$B)=@_; my %pages=%{$A}; my %pagerep=%{$B}; my ($prevpage,$pendrep,$res,$pagepending,$page)=("","","","",""); for $page (sort { $PageOrder{$a} cmp $PageOrder{$b} } keys %pages){ # handle compression of page ranges. At the moment, we # also compress ranges that might be displayed in different faces my $pagerep=(exists $pagerep{$page}) ? $pagerep{$page} : $page; if($prevpage){ if(!$nocompress and &a_follows_b($prevpage,$page)){ $pendrep=$pagepending ? "--$pagerep" : ($twoabbrev ? "$twoabbrev" : ", $pagerep"); $pagepending=1; }else{ $res.=($pagepending ? "$pendrep" : "").", $pagerep"; $pagepending=0; } }else{ $res.="$pagerep"; } $prevpage=$page; } $res.="$pendrep" if $pagepending; return $res; } sub pages_for_label { my ($label)=@_; if(exists $Lab2Pag{$label}){ my (%pages,%empty); @pages{@{$Lab2Pag{$label}}}=""; return &compressed_pages(\%pages,\%empty); } return ""; } sub readtosep { my $sum=""; while(){ return $sum if(/^%$/); chop; s/%$//; $sum.=$_; } }