/* file: utf7.l */ %{ #ifdef __MSDOS__ # include # include # include #else # include #endif #include #include #include #include int printcode = 0; unsigned int outcode [2], instep, outstep; char *programname; void get_programname (char const *argv0), syntax (void), errit (char const *format, ...), utf (void), nextout (void), outchar (long unsigned); #ifdef __MSDOS__ #define strcasecmp(s1, s2) (stricmp(s1, s2)) #endif #define YY_NO_UNPUT #define YY_SKIP_YYWRAP #ifdef yywrap # undef yywrap #endif int yywrap() { return 1; } %} %Start _utf %% "+-" { fputc ('+', stdout); } "+" { instep = outstep = 0; BEGIN _utf; } <_utf>[A-Za-z0-9+/] { utf (); } <_utf>"-" { BEGIN INITIAL; } <_utf>.|\n { fputc (yytext [0], stdout); BEGIN INITIAL; } %% void utf () { unsigned i, c; i = yytext [0]; if (i >= 'A' && i <= 'Z') c = i - 'A'; else if (i >= 'a' && i <= 'z') c = i + 26 - 'a'; else if (i >= '0' && i <= '9') c = i + 52 - '0'; else if (i == '+') c = 62; else if (i == '/') c = 63; switch (instep) { case 0: outcode [outstep] = (c << 2); break; case 1: outcode [outstep] |= (c >> 4); nextout (); outcode [outstep] = (c << 4); break; case 2: outcode [outstep] |= (c >> 2); nextout (); outcode [outstep] = (c << 6); break; case 3: outcode [outstep] |= c; nextout (); break; } if (++instep == 4) instep = 0; } void nextout () { unsigned c; if (outstep == 0) { outstep = 1; } else { outstep = 0; c = ((outcode [0] & 0xFF) << 8) | (outcode [1] & 0xFF); outchar (c); } } void outchar (long unsigned c) { int i; char *s; /* * iso-8859-1 */ if (c < 256) { fputc (c, stdout); return; } /* * iso-8859-15 */ i = 0; switch (c) { case 0x20Ac: i = 0xA4; break; /* euro */ case 0x0160: i = 0xA6; break; /* S caron */ case 0x0161: i = 0xA8; break; /* s caron */ case 0x017D: i = 0xB4; break; /* Z caron */ case 0x017E: i = 0xB8; break; /* z caron */ case 0x0152: i = 0xBC; break; /* OE ligature */ case 0x0153: i = 0xBD; break; /* oe ligature */ case 0x0178: i = 0xBE; break; /* Y diaeresis */ } if (i) { fputc (i, stdout); return; } /* * substitutions */ s = NULL; switch (c) { case 0x0132: s = "IJ"; break; case 0x0133: s = "ij"; break; } if (s) { fputs (s, stdout); return; } if (printcode) { if (c < 0x10000) printf ("U+%04X", (unsigned) c); else printf ("U+%08lX", c); } else fputc (191, stdout); } int main (int argc, char *argv []) { get_programname (argv [0]); while (argc > 1) if (! strcmp (argv [1], "-c")) { printcode = 1; argv++; argc--; } else break; switch (argc) { case 1: if (isatty (fileno (stdin))) syntax (); yyin = stdin; break; case 2: yyin = fopen (argv [1], "r"); if (! yyin) errit ("Opening file \"%s\": %s", argv [1], strerror (errno)); break; default: syntax (); } yylex (); if (yyin != stdin) fclose (yyin); return 0; } void get_programname (char const *argv0) { #ifdef __MSDOS__ char name [MAXFILE]; fnsplit (argv0, NULL, NULL, name, NULL); programname = strdup (name); #else /* unix */ char *p; p = strrchr (argv0, '/'); if (p) programname = strdup (p + 1); else programname = strdup (argv0); #endif } void errit (char const *format, ...) { va_list list; fprintf (stderr, "\nError %s: ", programname); va_start (list, format); vfprintf (stderr, format, list); fprintf (stderr, "\n\n"); exit (1); } void syntax () { fprintf ( stderr, "\n" "Syntax: %s [-c] [utf-7 encoded file]\n" "\n" "The file will be translated to iso-8859-1 *and* iso-8859-15\n" "\n" " -c : print U+code for characters not in iso-8859-1/15\n" "\n", programname ); exit (1); }