/* Copyright 2010-2023 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #ifdef HAVE_CONFIG_H #include #endif #include #include #include #include #ifndef _WIN32 #include #else /* _WIN32 */ /* Workaround for problems caused in mingw.org's MinGW build by Gnulib's wchar.h overriding the wint_t type definition, which causes compilation errors when perl.h is included below, because perl.h includes ctype.h. */ #include #endif #include #include /* See "How do I use all this in extensions" in 'man perlguts'. */ #define PERL_NO_GET_CONTEXT #include "EXTERN.h" #include "perl.h" #if defined _WIN32 && !defined __CYGWIN__ # undef free #endif #include "XSUB.h" #include "ppport.h" #include "miscxs.h" const char *whitespace_chars = " \t\f\v\r\n"; char * xs_process_text (char *text) { static char *new; char *p, *q; dTHX; new = realloc (new, strlen (text) + 1); strcpy (new, text); p = q = new; while (*p) { if (*p == '-' && p[1] == '-') { if (p[2] == '-') { *q = '-'; q[1] = '-'; p += 3; q += 2; } else { *q = '-'; p += 2; q += 1; } } else if (*p == '\'' && p[1] == '\'') { *q = '"'; p += 2; q += 1; } else if (*p == '`') { if (p[1] == '`') { *q = '"'; p += 2; q += 1; } else { *q = '\''; p += 1; q += 1; } } else { *q++ = *p++; } } *q = '\0'; return new; } char * xs_unicode_text (char *text, int in_code) { char *p, *q; static char *new; int new_space, new_len; dTHX; /* Perl boilerplate. */ if (in_code) return text; p = text; new_space = strlen (text); new = realloc (new, new_space + 1); new_len = 0; #define ADD3(s) \ if (new_len + 2 >= new_space - 1) \ { \ new_space += 2; \ new = realloc (new, new_space *= 2); \ } \ new[new_len++] = s[0]; \ new[new_len++] = s[1]; \ new[new_len++] = s[2]; #define ADD1(s) \ if (new_len >= new_space - 1) \ new = realloc (new, (new_space *= 2) + 1); \ new[new_len++] = s; #define ADDN(s, n) \ if (new_len + n - 1 >= new_space - 1) \ { \ new_space += n; \ new = realloc (new, (new_space *= 2) + 1); \ } \ memcpy(new + new_len, s, n); \ new_len += n; while (1) { q = p + strcspn (p, "-`'"); ADDN(p, q - p); if (!*q) break; switch (*q) { case '-': if (!memcmp (q, "---", 3)) { p = q + 3; /* Unicode em dash U+2014 (0xE2 0x80 0x94) */ ADD3("\xE2\x80\x94"); } else if (!memcmp (q, "--", 2)) { p = q + 2; /* Unicode en dash U+2013 (0xE2 0x80 0x93) */ ADD3("\xE2\x80\x93"); } else { p = q + 1; ADD1(*q); } break; case '`': if (!memcmp (q, "``", 2)) { p = q + 2; /* U+201C E2 80 9C */ ADD3("\xE2\x80\x9C"); } else { p = q + 1; /* U+2018 E2 80 98 */ ADD3("\xE2\x80\x98"); } break; case '\'': if (!memcmp (q, "''", 2)) { p = q + 2; /* U+201D E2 80 9D */ ADD3("\xE2\x80\x9D"); } else { p = q + 1; /* U+2019 E2 80 99 */ ADD3("\xE2\x80\x99"); } break; } } new[new_len] = '\0'; return new; } char * xs_entity_text (char *text) { char *p, *q; static char *new; int new_space, new_len; dTHX; /* Perl boilerplate. */ p = text; new_space = strlen (text); new = realloc (new, new_space + 1); new_len = 0; #define ADDN(s, n) \ if (new_len + n - 1 >= new_space - 1) \ { \ new_space += n; \ new = realloc (new, (new_space *= 2) + 1); \ } \ memcpy(new + new_len, s, n); \ new_len += n; while (1) { q = p + strcspn (p, "-`'"); ADDN(p, q - p); if (!*q) break; switch (*q) { case '-': if (!memcmp (q, "---", 3)) { p = q + 3; ADDN("—", 7); } else if (!memcmp (q, "--", 2)) { p = q + 2; ADDN("–", 7); } else { p = q + 1; ADD1(*q); } break; case '`': if (!memcmp (q, "``", 2)) { p = q + 2; ADDN("“", 7); } else { p = q + 1; ADDN("‘", 7); } break; case '\'': if (!memcmp (q, "''", 2)) { p = q + 2; ADDN("”", 7); } else { p = q + 1; ADDN("’", 7); } break; } } new[new_len] = '\0'; return new; } void xs_parse_command_name (SV *text_in, char **command, int *is_single_letter) { char *text; dTHX; /* Make sure the input is in UTF8. */ if (!SvUTF8 (text_in)) sv_utf8_upgrade (text_in); text = SvPV_nolen (text_in); *command = 0; *is_single_letter = 0; if (isalnum(text[0])) { char *p, *q; static char *s; p = text; q = text + 1; while (isalnum (*q) || *q == '-' || *q == '_') q++; s = realloc (s, q - p + 1); memcpy (s, p, q - p); s[q - p] = '\0'; *command = s; } else if (text[0] && strchr ("([\"'~@&}{,.!?" " \t\n" "*-^`=:|/\\", text[0])) { static char a[2]; *command = a; a[0] = text[0]; a[1] = '\0'; *is_single_letter = 1; } return; } /* Return list ($at_command, $open_brace, ....) */ void xs_parse_texi_regex (SV *text_in, char **arobase, char **open_brace, char **close_brace, char **comma, char **asterisk, char **form_feed, char **menu_only_separator, char **new_text) { char *text; dTHX; /* Make sure the input is in UTF8. */ if (!SvUTF8 (text_in)) sv_utf8_upgrade (text_in); text = SvPV_nolen (text_in); *arobase = *open_brace = *close_brace = *comma = *asterisk = *form_feed = *menu_only_separator = *new_text = 0; if (*text == '@') { *arobase = "@"; } else if (*text == '{') { *open_brace = "{"; } else if (*text == '}') { *close_brace = "}"; } else if (*text == ',') { *comma = ","; } else if (strchr (":\t.", *text)) { static char a[2]; *menu_only_separator = a; a[0] = *text; a[1] = '\0'; } else if (*text == '\f') { *form_feed = "\f"; } else { char *p; if (*text == '*') *asterisk = "*"; p = text; p += strcspn (p, "{}@,:\t.\n\f"); if (p > text) { static char *s; s = realloc (s, p - text + 1); memcpy (s, text, p - text); s[p - text] = '\0'; *new_text = s; } } return; } char * xs_default_format_protect_text (char *text) { char *p, *q; static char *new; int new_space, new_len; dTHX; /* Perl boilerplate. */ p = text; new_space = strlen (text); new = realloc (new, new_space + 1); new_len = 0; #define ADDN(s, n) \ if (new_len + n - 1 >= new_space - 1) \ { \ new_space += n; \ new = realloc (new, (new_space *= 2) + 1); \ } \ memcpy(new + new_len, s, n); \ new_len += n; while (1) { q = p + strcspn (p, "<>&\"\f"); ADDN(p, q - p); if (!*q) break; switch (*q) { case '<': ADDN("<", 4); break; case '>': ADDN(">", 4); break; case '&': ADDN("&", 5); break; case '"': ADDN(""", 6); break; case '\f': ADDN(" ", 5); break; } p = q + 1; } new[new_len] = '\0'; return new; }