PROGRAM Wsltex; { ********************************************************************** Copy an 8-bit file to a 7-bit file, turning characters with bit 8 set into 4-character octal escape sequences \nnn. This is useful in analyzing WordStar microcomputer word processor files. Many sequences which can be recognized as doing something useful are turned into LaTeX command sequences. Comments below describe the sequences which are recognized and translated. Usage: @WSLTEX Old 8-bit file: filespec New 7-bit file: filespec [11-Oct-85] ********************************************************************** } { ^L } CONST nul = 0; bel = 7; ht = 9; lf = 10; ff = 12; cr = 13; esc = 27; maxring = 256; { limit for character ring } maxspecial = 100; { limit for warning when special strings (underline, bold, etc) exceed this length } unset = MAXINT; VAR wordstar_file,latex_file : PACKED FILE OF integer; ring_index : integer; ring : PACKED ARRAY [0 .. maxring] OF integer; hyphen_break : boolean; eol,is_underline,is_subscript,is_superscript,is_bold, is_alternate : boolean; bi_eol,bi_underline,bi_subscript,bi_superscript,bi_bold, bi_alternate : integer; bo_eol,bo_underline,bo_subscript,bo_superscript,bo_bold, bo_alternate : integer; input_byte_offset,output_byte_offset : integer; empty_count,last_c,c : integer; { ^L } PROCEDURE Warning(s : PACKED ARRAY [k1 .. k2 : integer] OF char; b1,b2 : integer { output byte range, or unset,unset } ); VAR k : integer; BEGIN Writeln(tty); { !!!! temporary for debugging !!!! } Writeln(tty); { !!!! temporary for debugging !!!! } Writeln(tty,'----------------------------------------------------------'); Write(tty,'%'); FOR k := k1 TO k2 DO Write(tty,s[k]); Writeln(tty); if b1 <> unset then Writeln(tty,' output byte range = ',b1:0,' .. ',b2:0); Writeln(tty,' input byte offset = ',input_byte_offset); Writeln(tty,' output byte offset = ',output_byte_offset); Writeln(tty,' current context = ['); FOR k := ring_index TO maxring DO Write(tty,Chr(ring[k])); FOR k := 0 TO ((maxring + ring_index - 1) MOD maxring) DO Write(tty,Chr(ring[k])); Writeln(tty); Writeln(tty,' ]'); Writeln(tty,'----------------------------------------------------------'); Writeln(tty) END; { Warning } { ^L } PROCEDURE W_byte(c : integer); BEGIN ring[ring_index] := c; ring_index := (ring_index + 1) MOD maxring; latex_file^ := c; eol := (c = lf); Put(latex_file); output_byte_offset := output_byte_offset + 1; IF eol THEN BEGIN bi_eol := input_byte_offset; bo_eol := output_byte_offset END END; { W_byte } PROCEDURE W_char(c : char); BEGIN W_byte(Ord(c)) END; { W_char } { ^L } PROCEDURE W_octal; BEGIN W_byte(Ord('\')); W_byte((wordstar_file^ DIV 64) + Ord('0')); W_byte(((wordstar_file^ DIV 8) MOD 8) + Ord('0')); W_byte((wordstar_file^ MOD 8) + Ord('0')) END; { W_octal } { ^L } PROCEDURE W_string(s : PACKED ARRAY [k1 .. k2 : integer] OF char); VAR k : integer; BEGIN FOR k := k1 TO k2 DO W_byte(Ord(s[k])) END; { W_string } { ^L } PROCEDURE Begin_alternate; BEGIN IF is_alternate THEN Warning( 'BEGIN ALTERNATE CHARACTER SET command encounted inside alternate sequence', bo_alternate,output_byte_offset); bi_alternate := input_byte_offset; bo_alternate := output_byte_offset; is_alternate := true; W_string('{\alt ') END; { Begin_alternate } { ^L } PROCEDURE Begin_boldface; BEGIN IF is_bold THEN Warning('BEGIN BOLDFACE command encountered inside boldface', bo_bold,output_byte_offset); bi_bold := input_byte_offset; bo_bold := output_byte_offset; is_bold := true; W_string('{\bf ') END; { Begin_boldface } { ^L } PROCEDURE End_alternate; BEGIN IF is_alternate THEN BEGIN IF (bo_alternate + maxspecial) < output_byte_offset THEN Warning('Long ALTERNATE CHARACTER SET sequence', bo_alternate,output_byte_offset) END ELSE Warning( 'END ALTERNATE CHARACTER SET command encountered outside alternate text', bo_alternate,output_byte_offset); is_alternate := false; W_byte(Ord('}')) END; { End_alternate } { ^L } PROCEDURE End_boldface; BEGIN IF is_bold THEN BEGIN IF (bo_bold + maxspecial) < output_byte_offset THEN Warning('Long BOLDFACE sequence',bo_bold,output_byte_offset) END ELSE Warning('END BOLDFACE command encountered outside boldface', bo_bold,output_byte_offset); is_bold := false; W_byte(Ord('}')) END; { End_boldface } { ^L } PROCEDURE Line_feed; BEGIN { line_feed } IF hyphen_break THEN { discard line break } hyphen_break := false ELSE BEGIN IF bo_eol < output_byte_offset THEN { text on line } W_byte(lf) ELSE { empty line } BEGIN IF last_c <> (128 + cr) THEN W_byte(lf) END END END; { Line_feed } { ^L } PROCEDURE Superscript; BEGIN IF is_superscript THEN { ending old superscript } BEGIN IF (bo_superscript + maxspecial) < output_byte_offset THEN Warning('Long SUPERSCRIPT sequence', bo_superscript,output_byte_offset); W_byte(Ord('}')) END ELSE { beginning new superscript } BEGIN bi_superscript := input_byte_offset; bo_superscript := output_byte_offset; W_string('^{') END; is_superscript := NOT is_superscript END; { Superscipt } { ^L } PROCEDURE Subscript; BEGIN IF is_subscript THEN { ending old superscript } BEGIN IF (bo_subscript + maxspecial) < output_byte_offset THEN Warning('Long SUBSCRIPT sequence', bo_subscript,output_byte_offset); W_byte(Ord('}')) END ELSE { beginning new superscript } BEGIN bi_subscript := input_byte_offset; bo_subscript := output_byte_offset; W_string('_{'); END; is_subscript := NOT is_subscript END; { Subscript } { ^L } PROCEDURE Underline; BEGIN IF is_underline THEN { ending old underline } BEGIN IF (bo_underline + maxspecial) < output_byte_offset THEN Warning('Long UNDERLINE sequence', bo_underline,output_byte_offset); W_byte(Ord('}')) END ELSE { beginning new underline } BEGIN bi_underline := input_byte_offset; bo_underline := output_byte_offset; W_string('{\em ') END; { underlined text is \emphasized in LaTeX } is_underline := NOT is_underline END; { Underline } { ^L } BEGIN { Main -- WSLTEX } Rewrite(output,'tty:'); Write('Input WordStar 8-bit file: '); Reset(wordstar_file,'':@,'/e/b:8'); Write('Output LaTeX 7-bit file: '); Rewrite(latex_file,'':@,'/e/b:7'); { Initializations -- in alphabetical order } bi_alternate := -1; bi_bold := -1; bi_subscript := -1; bi_superscript := -1; bi_underline := -1; bo_alternate := -1; bo_bold := -1; bo_subscript := -1; bo_superscript := -1; bo_underline := -1; eol := true; hyphen_break := false; input_byte_offset := -1; is_bold := false; is_subscript := false; is_superscript := false; is_underline := false; last_c := -1; output_byte_offset := -1; for ring_index := 0 to maxring do ring[ring_index] := ord(' '); ring_index := 0; { ======================================================================== WordStar makes heavy use of the 8-th (high-order) bit of 8-bit ASCII characters. In general, it turns on that bit for -- any character (0..127) at end-of-word -- any character (0..127) at end-of-line -- "hard" line break on CTL-M -- "hard" page break on CTL-L A discretionary hyphen (which can be removed on joining the lines) at end-of-line is encoded as "-\215" (i.e. "-<\200|CTL-M>"). A required hyphen falling at end-of-line is encoded as "-\040\215" (i.e. followed by a space). Comment lines and WordStar command lines begin with a dot. "Hard" line breaks are encoded as \215\015 (i.e. "<\200|CTL-M>"); these occur between double-space lines. "Soft" line breaks are encoded as ; these occur at paragraph ends. Centered text is stored centered without any special markings. Underlined text is bracketed by CTL-S. Boldface text is bracketed by CTL-E ..text.. CTL-R. Superscripts are bracketed by CTL-T. Subscripts are bracketed by CTL-V. Alternate typewheel strings appear as CTL-Q ..text.. CTL-W. Some of these at least are mnemonic (e = epsilon, m = mu, . = centered dot). CTL-R also appears after all sub/superscript sequences, apparently functioning as a "return to normal" state. ======================================================================== } c := wordstar_file^; WHILE NOT Eof(wordstar_file) DO BEGIN IF c < 32 THEN { ordinary control character } BEGIN CASE c OF 0,1,2,3,4 : { CTL-@ .. CTL-D } W_octal; 5 : { CTL-E } Begin_boldface; 6,7,8 : { CTL-F .. CTL-H } W_octal; 9 : { CTL-I } W_byte(ht); 10 : { CTL-J } Line_feed; 11 : { CTL-K } W_octal; 12 : { CTL-L } W_byte(ff); 13 : { CTL-M } IF last_c = Ord('-') THEN hyphen_break := true ELSE IF NOT hyphen_break THEN W_byte(cr); 14,15,16 : { CTL-N .. CTL-P } W_octal; 17 : { CTL-Q } Begin_alternate; 18 : { CTL-R } IF is_bold THEN End_boldface; { ELSE discard character } 19 : { CTL-S } Underline; 20 : { CTL-T } Superscript; 21 : { CTL-U } W_octal; 22 : { CTL-V } Subscript; 23 : { CTL-W } End_alternate; 24,25,26,27,28,29,30,31 : { CTL-X .. CTL-_ } W_octal END { case } END ELSE IF chr(c) IN ['#','$','%','&','~','_','^','\','{','}'] THEN { LaTeX special characters must be quoted by backslash } BEGIN W_char('\'); W_byte(c) END ELSE IF c = Ord('.') THEN { check for WordStar comment } BEGIN IF eol THEN W_string('%-WS-% '); W_byte(c) END ELSE IF c < 128 THEN { c in 32 .. 127 } W_byte(c) ELSE { c > 127 } BEGIN c := c - 128; { lop off 8-th bit } CASE c OF 0 : { CTL-@ } W_octal; 1 : { CTL-A } W_octal; 2 : { CTL-B } W_octal; 3 : { CTL-C } W_octal; 4 : { CTL-D } W_octal; 5 : { CTL-E } Begin_boldface; 6 : { CTL-F } W_octal; 7 : { CTL-G } W_octal; 8 : { CTL-H } W_octal; 9 : { CTL-I } W_byte(ht); 10 : { CTL-J } Line_feed; 11 : { CTL-K } W_octal; 12 : { CTL-L } W_byte(ff); 13 : { CTL-M } IF last_c = Ord('-') THEN hyphen_break := true ELSE IF NOT hyphen_break THEN BEGIN IF bo_eol < output_byte_offset THEN { not double space} W_byte(cr); END; 14 : { CTL-N } W_octal; 15 : { CTL-O } W_octal; 16 : { CTL-P } W_octal; 17 : { CTL-Q } Begin_alternate; 18 : { CTL-R } IF is_bold THEN End_boldface; { ELSE discard character } 19 : { CTL-S } Underline; 20 : { CTL-T } Superscript; 21 : { CTL-U } W_octal; 22 : { CTL-V } Subscript; 23 : { CTL-W } End_alternate; 24 : { CTL-X } W_octal; 25 : { CTL-Y } W_octal; 26 : { CTL-Z } W_octal; 27 : { CTL-[ } W_octal; 28 : { CTL-\ } W_octal; 29 : { CTL-] } W_octal; 30 : { CTL-^ } W_octal; 31 : { CTL-_ } W_octal; 32,33,34,35,36,37,38,39,40,41,42,43,44,45 : W_byte(c); 46 : { period } BEGIN IF eol THEN W_string('%-WS-% '); W_byte(c) END; 47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63 : W_byte(c); 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80, 81,82,83,84,85,86,87,88,89,90,91,92,93,94,95 : W_byte(c); 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110, 111,112,113,114,115,116,117,118,119,120,121,122,123,124, 125,126 : W_byte(c); 127 : W_octal; OTHERS : W_octal; END { CASE } END; last_c := wordstar_file^; input_byte_offset := input_byte_offset + 1; Get(wordstar_file); c := wordstar_file^ END; END. { Main -- WSLTEX }