#! /usr/bin/gawk -f # CVS version control block - do not edit manually # $RCSfile: bibfile-reformat-pages,v $ # $Revision: 1.4 $ # $Date: 2004/09/27 13:54:47 $ # $Source: /home/cvs/papers/sty/bibfile-reformat-pages,v $ # bibfile-reformat-pages - put file.bib page ranges into uniform style # Copyright (C) 2004 Barak A. Pearlmutter # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or (at # your option) any later version. You can find a copy of this license # at http://www.fsf.org/copyleft/gpl.html, or in the file COPYING, or # on Debian systems in /usr/share/common-licenses/GPL, or by writing # to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, # Boston, MA 02111-1307 USA # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. function usage() { print "Usage: bibfile-reformat-pages [abbrev=1] [separator=X] [files.bib or stdin]"; print; print "Summary: reformat the \"pages=\" bits of a .bib file."; print; print "Version: '$RCSfile: bibfile-reformat-pages,v $ $Revision: 1.4 $"; print; print "Description:"; print; print " Passes through input file or stdin, which is assumed to be a bibtex"; print " .bib file, changing page ranges to a uniform format. The converted"; print " file is sent to standard output. Vacuous ranges are always compressed,"; print " so 1234-1234 becomes just 1234. Blanks around in the range separator"; print " are always squeezed out, so \"12 - 34 \" becomes \"12-34\". Ranges are"; print " by default put into expanded style, so 1234-56 becomes 1234-1256, but"; print " if the abbrev=1 option is given the conversion is reversed and 1234-1256"; print " is instead abbreviated to 1234-56. The range separator \"-\" or \"--\""; print " is preserved, unless a separator=- or separator=-- option is given in"; print" which case they are homogenised."; print; print " Efforts, albeit not exhaustive ones, are taken to only modify \"pages=\""; print " lines and to err on the side of caution (first, do no harm)."; print; print "Options:"; print; print " -h Print this help message and exit"; print " abbrev=1 Abbreviate page ranges instead of unabbreviating them"; print " separator=X Replaces page range separators with X, which must be - or --."; print; print "Examples:"; print; print " bibfile-reformat-pages abbrev=0 separator=-- foo.bib > foo-new.bib"; print " diff foo.bib foo-new.bib"; print " mv foo-new.bib foo.bib"; print; print " bibfile-reformat-pages foo.bib > foo-new.bib"; print; print " bibfile-reformat-pages < foo.bib > foo-new.bib"; print; print " bibfile-reformat-pages abbrev=1 separator=- foo.bib > foo-new.bib"; print; print "Useful commands for testing:"; print; print " bibfile-reformat-pages foo.bib | diff foo.bib - | more" print " for f in *.bib; do echo $f; bibfile-reformat-pages <$f | diff $f -; done"; print " find . -name '*.bib' | xargs bibfile-reformat-pages | egrep pages | more"; } BEGIN { if ( ARGV[1] == "-h" || ARGV[1] ~ /^--?help$/ ) { usage(); exit; } IGNORECASE=1; abbrev=0; # avoid uninitialised variable warning separator=0; # avoid uninitialised variable warning } ## Would code as "if" in BEGIN, but var=x opts processed after BEGIN. (separator && !(separator ~ /^--?$/)) { print "error: illegal option separator=" separator > "/dev/stderr"; print " valid values: \"-\" or \"--\"." > "/dev/stderr"; close("/dev/stderr"); exit 1; } function high_page_unabbrev(lo, hi) { llo = length(lo); lhi = length(hi); if (llo > lhi) hi = substr(lo, 1, llo-lhi) hi; return hi; } function numsame(x, y) { i = 1; while (1) { xc = substr(x, i, 1); yc = substr(y, i, 1); if (!xc || !yc || xc != yc) return i-1; i += 1; } } function high_page_abbrev(lo, hi) { hi = high_page_unabbrev(lo, hi); llo = length(lo); lhi = length(hi); if (llo == lhi) { k = numsame(lo, hi); if ( k > 0 ) hi = substr(hi, 1+k); } return hi; } match($0, /^([[:blank:]]*pages[[:blank:]]*=[^[:digit:]]*)([[:digit:]]+)[[:blank:]]*(--?)[[:blank:]]*([[:digit:]]+)([^[:digit:]]*)$/, a) { low = a[2]; sep = a[3]; high = a[4]; high = high_page_unabbrev(low, high); # put into canonical (unabbrev) form if (low == high) { print a[1] low a[5]; # 123-123 becomes just 123 } else { if (abbrev) high = high_page_abbrev(low, high); if (separator) sep=separator; print a[1] low sep high a[5]; } next; } { print; }