#!/usr/bin/env ruby # encoding: utf-8 Myname = File.basename($0) Version = '1.44' Help= <<'DOC' = match_parens - find mismatches of various brackets and quotes = Synopsis match_parens [filename] == Options -h print this help and exit -H,--help print full documentation and exit -V,--version print version and exit -l,--latex convert ``...'' to “...” before testing -n,--number=N set number of mismatching characters shown to N (default: 10) -p,--pairs=S set matching pairs to S (default: {}[]()""“”''‘’) --test do an internal test and exit = Description Mismatches of parentheses, braces, (angle) brackets, especially in TeX sources which may be rich in those, may be difficult to trace. This little script helps you by writing your text to standard output, after adding a left margin to your text, which will normally be almost empty, but will clearly show up to 10 mismatches. (Just try me on myself to see that the parenthesis starting this sentence will not appear to be matched at the end of the file. If you look at me in the vim editor, then select this paragraph and try the command: |:!%|. By default, the following pairs are tested: () round brackets or parentheses {} curly brackets or braces [] square brackets <> angle brackets (within html text only) "" ASCII double quotes “” Unicode double quotation marks '' ASCII single quotes ‘’ Unicode single quotation marks The exit value of the script is 0 when there are no mismatches, 1 otherwise. Angle brackets are only looked for inside HTML text, where HTML is supposed to start with || or |=begin rdoc| and to end with || or |=end|. = Options -h,--help print short help information and exit. -H,--Help print full documentation via less and exit. -V,--version print this script's version and exit. -l,--latex convert |``...''| to |“...”| before testing. -n,--number=N Set number of mismatching characters shown to N. By default, only the first 10 are shown. -p,--pairs=S Set matching pairs to S (default: |{}[]()""“”''‘’|). For example, if you want to look for mismatching ASCII single quotes /only/, use |--pairs="''"|. Or, if you want to match braces and guillemets only, use |-p «»|. Note that if html is detected in your text, |<>| is automatically added to the pairs list. So by default, |<...>| is tested only in html, but you can test that in other text by specifying the |<>| pair in the |--pairs| option. --test do an internal test and exit. Note that if, with the |--pairs| option, you specify an other pairs list than the default, the test will probably fail, but you can still see the effects of your pairs list on the test data. = Examples Suppose we have two files, |good| and |bad|, containing these texts: good: This is a (simple) test without mismatches bad: This is a (simple test containing mismatches then here are some usage examples. First a simple test on these files: $ matchparens good 1 | | This is a (simple) test 2 | | without mismatches $ echo $? 0 $ matchparens bad 1 | ( | This is a (simple test 2 | ( | containing mismatches $ echo $? 1 Just report if there are mismatches: $ matchparens good >/dev/null && echo fine || echo problems fine $ matchparens bad >/dev/null && echo fine || echo problems problems Report all tex files with mismatches in the current directory: $ for i in *.tex; do matchparens $i >/dev/null || echo $i; done Matches must be in correct order: $ echo -e "This is a ([simple)] test\n" | match_parens 1 ([)] This is a ([simple)] test 2 ([)] = Author [Wybo Dekker](wybo@dekkerdocumenten.nl) = Copyright Released under the [GNU General Public License](www.gnu.org/copyleft/gpl.html) DOC require 'optparse' require 'ostruct' def help system("echo \"#{Help}\" | less -P#{Myname}-#{Version.tr('.','·')}") end # Option defaults: $o = OpenStruct.new( :number => 10, :input => STDIN, :latex => false, :lineno => 0, :s => '', :html => false, :test => false, :parenstext => %q{{}[]()""“”''‘’} ) OptionParser.new( banner = <<~EOD, #{Myname} A find mismatches of various brackets and quotes\n Usage: match_parens [filename]\n Options: EOD width = 15, # 2x8-1 indent = '' ) do |opts| opts.on('-h', 'print this help and exit' ) do puts opts.to_s.sub(/^ *-I\n/,'').gsub(/ /,'') exit end opts.on('-H','--help', 'print full documentation via less and exit' ) do help exit end opts.on('-V','--version', 'print version and exit' ) do puts Version exit end opts.on('-l','--latex', 'convert ``...'' to “...” before testing' ) do $o.latex=true end opts.on('-n','--number=N',Integer, "set number of mismatching characters shown to N (default: #{$o.number})" ) do |v| $o.number = v end opts.on('-p','--pairs=S',String, "set matching pairs to S (default: #{$o.parenstext})" ) do |v| $o.parenstext = v end opts.on('--test', 'do an internal test and exit' ) do $o.input = DATA $o.test = true end opts.on('-I') do system("instscript #{Myname}") exit end opts.parse! end parenshtml = $o.parenstext =~ /<>/ ? $o.parenstext : $o.parenstext + '<>' parens = $o.parenstext arg = ARGV[0] || '' unless arg.empty? test(?e,arg) or raise("file #{arg} does not exist") test(?r,arg) or raise("file #{arg} is not readable") $o.input = File.new(arg) end while x = $o.input.gets() # convert LaTeX's ``...'' to “...” if $o.latex x = x.gsub(/``/,'“').gsub(/''/,'”') end # only inside html text we check <>, too: if $o.html && (x=~/^([# ]*=end)/ || x=~/<\/html>/i) $o.html = false parens = $o.parenstext elsif x=~/^([# ]*=begin rdoc)/ || x=~//i $o.html = true parens = parenshtml end # match any pair like (), {}, [], “”, <> in parens re = Regexp.new(parens.scan(/../).join('|').gsub(/[{}()\[\]$]/,'\\\\\&')) # move parens' characters into s $o.s << x.tr('^'+parens,'') # remove matches from inside while $o.s.gsub!(re,'') do end puts "%4d | %-*s | %s" % [$o.lineno+=1,$o.number,$o.s.slice(0..$o.number-1),x] end if $o.test if s != '“{”}' raise("test failed! (final mismatch should be “{”})") else puts "test succeeded" end else exit $o.s.empty? ? 0 : 1 end __END__ This first line “{()}” has no mismatch but here ( we have one. It is even extended “{() with two more. Here comes an untested angle bracket <, but here }” all) mismatches are erased. =begin rdoc Inside html text now. ‘{There is an unmatched} single quote here. A brace { is added to it here, and an angle bracket < is added. But > all mismatches }’ cancelled here. =end Four mismatches start ({"'start {here} but are cancelled '" here}). We end here with a mismatch caused by “{(improper)”} nesting! But the test succeeds because we expected that.