/* * file: utfcode.c * * (c) Peter Kleiweg 2000 * * This is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as * published by the Free Software Foundation; either version 2, * or (at your option) any later version. */ #define UTFcodeVERSION "1.0" #ifdef __MSDOS__ #ifndef __COMPACT__ #error Memory model COMPACT required #endif /* __COMPACT__ */ #include #endif /* __MSDOS__ */ #include #include #include #include int obits = 0, utf7 = 1, utf8 = 1, nr; unsigned octal; long unsigned *lu; char *programname, *no_mem_buffer, out_of_memory [] = "Out of memory"; void utf_7 (void), utf_8 (void), outbyte (unsigned i), outoct (void), get_programname (char const *argv0), errit (char const *format, ...), syntax (void), *s_malloc (size_t size), *s_realloc (void *block, size_t size); char *s_strdup (char const *s); int main (int argc, char *argv []) { int i; no_mem_buffer = (char *) malloc (1024); get_programname (argv [0]); while (argc > 1) { if (! strcmp (argv [1], "-7")) { utf7 = 1; utf8 = 0; } else if (! strcmp (argv [1], "-8")) { utf7 = 0; utf8 = 1; } else break; argv++; argc--; } if (argc == 1) syntax (); nr = argc - 1; lu = (long unsigned *) s_malloc (nr * sizeof (long unsigned)); for (i = 0; i < nr; i++) { if ((argv [i + 1][0] == 'U' || argv [i + 1][0] == 'u') && argv [i + 1][1] == '+') { argv [i + 1][0] = '0'; argv [i + 1][1] = 'x'; } lu [i] = strtoul (argv [i + 1], NULL, 0); } if (utf7) utf_7 (); if (utf8) utf_8 (); return 0; } void utf_7 () { int i; fputc ('+', stdout); for (i = 0; i < nr; i++) if (lu [i] < 0x10000) { outbyte (lu [i] >> 8); outbyte (lu [i] & 0xFF); } else errit ("Too large for UTF-7: 0x%lX", lu [i]); if (obits) outoct (); fputs ("-\n", stdout); } void outbyte (unsigned u) { switch (obits) { case 0: octal = (u >> 2); outoct (); octal = ((u & 0x03) << 4); obits = 2; break; case 2: octal |= (u >> 4); outoct (); octal = ((u & 0x0F) << 2); obits = 4; break; case 4: octal |= (u >> 6); outoct (); octal = (u & 0x3F); outoct (); obits = 0; break; } } void outoct () { char c; if (octal < 26) c = octal + 'A'; else if (octal < 52) c = octal - 26 + 'a'; else if (octal < 62) c = octal - 52 + '0'; else if (octal == 62) c = '+'; else c = '/'; fputc (c, stdout); } void utf_8 () { int i; for (i = 0; i < nr; i++) { /* 1 byte */ if (lu [i] < 0x80) fputc (lu [i], stdout); /* 2 bytes */ else if (lu [i] < 0x800) { fputc (0xC0 | (lu [i] >> 6), stdout); fputc (0x80 | (lu [i] & 0x3F), stdout); } /* 3 bytes */ else if (lu [i] < 0x10000) { fputc (0xE0 | (lu [i] >> 12), stdout); fputc (0x80 | ((lu [i] >> 6) & 0x3F), stdout); fputc (0x80 | (lu [i] & 0x3F), stdout); } /* 4 bytes */ else if (lu [i] < 0x200000) { fputc (0xF0 | (lu [i] >> 18), stdout); fputc (0x80 | ((lu [i] >> 12) & 0x3F), stdout); fputc (0x80 | ((lu [i] >> 6) & 0x3F), stdout); fputc (0x80 | (lu [i] & 0x3F), stdout); } /* 5 bytes */ else if (lu [i] < 0x4000000) { fputc (0xF8 | (lu [i] >> 24), stdout); fputc (0x80 | ((lu [i] >> 18) & 0x3F), stdout); fputc (0x80 | ((lu [i] >> 12) & 0x3F), stdout); fputc (0x80 | ((lu [i] >> 6) & 0x3F), stdout); fputc (0x80 | (lu [i] & 0x3F), stdout); } /* 6 bytes */ else if (lu [i] < 0x80000000) { fputc (0xFC | (lu [i] >> 30), stdout); fputc (0x80 | ((lu [i] >> 24) & 0x3F), stdout); fputc (0x80 | ((lu [i] >> 18) & 0x3F), stdout); fputc (0x80 | ((lu [i] >> 12) & 0x3F), stdout); fputc (0x80 | ((lu [i] >> 6) & 0x3F), stdout); fputc (0x80 | (lu [i] & 0x3F), stdout); } else errit ("Too large for UTF-8: 0x%lX", lu [i]); } fputc ('\n', stdout); } void errit (char const *format, ...) { va_list list; fprintf (stderr, "\nError %s: ", programname); va_start (list, format); vfprintf (stderr, format, list); fprintf (stderr, "\n\n"); exit (1); } void get_programname (char const *argv0) { #ifdef __MSDOS__ char name [MAXFILE]; fnsplit (argv0, NULL, NULL, name, NULL); programname = strdup (name); #else /* unix */ char *p; p = strrchr (argv0, '/'); if (p) programname = strdup (p + 1); else programname = strdup (argv0); #endif } void *s_malloc (size_t size) { void *p; p = malloc (size); if (! p) { free (no_mem_buffer); errit (out_of_memory); } return p; } void *s_realloc (void *block, size_t size) { void *p; p = realloc (block, size); if (! p) { free (no_mem_buffer); errit (out_of_memory); } return p; } char *s_strdup (char const *s) { char *s1; if (s) { s1 = (char *) s_malloc (strlen (s) + 1); strcpy (s1, s); } else { s1 = (char *) s_malloc (1); s1 [0] = '\0'; } return s1; } void syntax () { fprintf ( stderr, "\n" "This is utfcode, version " UTFcodeVERSION "\n" "\n" "Usage: %s [-7|-8] [number...]\n" "\n" " -7: utf-7 only\n" " -8: utf-8 only\n" "\n", programname ); exit (1); }