?login_element?

Subversion Repositories NedoOS

Rev

Rev 1229 | Blame | Compare with Previous | Last modification | View Log | Download | RSS feed

  1. /* charconv.c
  2.  
  3.    Copyright (C) 2010 Alexander Korolkov <alexander.korolkov@gmail.com>
  4.    Copyright (C) 2018-2020 Pali Rohár <pali.rohar@gmail.com>
  5.  
  6.    This program is free software: you can redistribute it and/or modify
  7.    it under the terms of the GNU General Public License as published by
  8.    the Free Software Foundation, either version 3 of the License, or
  9.    (at your option) any later version.
  10.  
  11.    This program is distributed in the hope that it will be useful,
  12.    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13.    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14.    GNU General Public License for more details.
  15.  
  16.    You should have received a copy of the GNU General Public License
  17.    along with this program. If not, see <http://www.gnu.org/licenses/>.
  18.  
  19.    The complete text of the GNU General Public License
  20.    can be found in /usr/share/common-licenses/GPL-3 file.
  21. */
  22.  
  23. #include "charconv.h"
  24. //#include <langinfo.h>
  25. #include <locale.h>
  26. #include <stdio.h>
  27. #include <stdlib.h>
  28. #include <string.h>
  29. #include <errno.h>
  30. #include <wchar.h>
  31.  
  32. #ifdef HAVE_ICONV
  33. #include <iconv.h>
  34. #endif
  35.  
  36. /* CP850 table for 0x80-0xFF range from:
  37.  * http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP850.TXT
  38.  */
  39. static const wchar_t cp850_table[128] = {
  40.     0x00c7, 0x00fc, 0x00e9, 0x00e2, 0x00e4, 0x00e0, 0x00e5, 0x00e7,
  41.     0x00ea, 0x00eb, 0x00e8, 0x00ef, 0x00ee, 0x00ec, 0x00c4, 0x00c5,
  42.     0x00c9, 0x00e6, 0x00c6, 0x00f4, 0x00f6, 0x00f2, 0x00fb, 0x00f9,
  43.     0x00ff, 0x00d6, 0x00dc, 0x00f8, 0x00a3, 0x00d8, 0x00d7, 0x0192,
  44.     0x00e1, 0x00ed, 0x00f3, 0x00fa, 0x00f1, 0x00d1, 0x00aa, 0x00ba,
  45.     0x00bf, 0x00ae, 0x00ac, 0x00bd, 0x00bc, 0x00a1, 0x00ab, 0x00bb,
  46.     0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x00c1, 0x00c2, 0x00c0,
  47.     0x00a9, 0x2563, 0x2551, 0x2557, 0x255d, 0x00a2, 0x00a5, 0x2510,
  48.     0x2514, 0x2534, 0x252c, 0x251c, 0x2500, 0x253c, 0x00e3, 0x00c3,
  49.     0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256c, 0x00a4,
  50.     0x00f0, 0x00d0, 0x00ca, 0x00cb, 0x00c8, 0x0131, 0x00cd, 0x00ce,
  51.     0x00cf, 0x2518, 0x250c, 0x2588, 0x2584, 0x00a6, 0x00cc, 0x2580,
  52.     0x00d3, 0x00df, 0x00d4, 0x00d2, 0x00f5, 0x00d5, 0x00b5, 0x00fe,
  53.     0x00de, 0x00da, 0x00db, 0x00d9, 0x00fd, 0x00dd, 0x00af, 0x00b4,
  54.     0x00ad, 0x00b1, 0x2017, 0x00be, 0x00b6, 0x00a7, 0x00f7, 0x00b8,
  55.     0x00b0, 0x00a8, 0x00b7, 0x00b9, 0x00b3, 0x00b2, 0x25a0, 0x00a0,
  56. };
  57.  
  58. /* CP850 translit table to 7bit ASCII for 0x80-0xFF range */
  59. static const char *const cp850_translit_table[128] = {
  60.     "C",   "u",   "e",  "a",     "a",     "a", "a",   "c",
  61.     "e",   "e",   "e",  "i",     "i",     "i", "A",   "A",
  62.     "E",   "ae",  "AE", "o",     "o",     "o", "u",   "u",
  63.     "y",   "O",   "U",  "o",     "GBP",   "O", "x",   "f",
  64.     "a",   "i",   "o",  "u",     "n",     "N", "a",   "o",
  65.     "?",   "(R)", "!",  " 1/2 ", " 1/4 ", "!", "<<",  ">>",
  66.     "?",   "?",   "?",  "|",     "+",     "A", "A",   "A",
  67.     "(C)", "?",   "?",  "?",     "?",     "c", "JPY", "+",
  68.     "+",   "+",   "+",  "+",     "-",     "+", "a",   "A",
  69.     "?",   "?",   "?",  "?",     "?",     "?", "?",   "?",
  70.     "d",   "D",   "E",  "E",     "E",     "i", "I",   "I",
  71.     "I",   "+",   "+",  "?",     "?",     "|", "I",   "?",
  72.     "O",   "ss",  "O",  "O",     "o",     "O", "u",   "th",
  73.     "TH",  "U",   "U",  "U",     "y",     "Y", "?",   "'",
  74.     "-",   "+-",  "?",  " 3/4 ", "?",     "?", "/",   ",",
  75.     "?",   "?",   ".",  "1",     "3",     "2", "?",   " ",
  76. };
  77.  
  78. static int wchar_string_to_cp850_string(char *out, const wchar_t *in, unsigned int out_size)
  79. {
  80.     unsigned i, j;
  81.     for (i = 0; i < out_size-1 && in[i]; ++i) {
  82.         if (in[i] > 0 && in[i] < 0x80) {
  83.             out[i] = in[i];
  84.             continue;
  85.         }
  86.         for (j = 0; j < 0x80; ++j) {
  87.             if (in[i] == cp850_table[j]) {
  88.                 out[i] = (0x80 | j);
  89.                 break;
  90.             }
  91.         }
  92.         if (j == 0x80) {
  93.             fprintf(stderr, "Cannot convert input character 0x%04x to 'CP850': %s\n", (unsigned int)in[i], strerror(EILSEQ));
  94.             return 0;
  95.         }
  96.     }
  97.     if (in[i]) {
  98.         fprintf(stderr, "Cannot convert input string to 'CP850': String is too long\n");
  99.         return 0;
  100.     }
  101.     out[i] = 0;
  102.     return 1;
  103. }
  104.  
  105. static int cp850_string_to_wchar_string(wchar_t *out, const char *in, unsigned int out_size)
  106. {
  107.     unsigned i;
  108.     for (i = 0; i < out_size-1 && i < 11 && in[i]; ++i) {
  109.         out[i] = (in[i] & 0x80) ? cp850_table[in[i] & 0x7F] : in[i];
  110.     }
  111.     if (i < 11 && in[i]) {
  112.         fprintf(stderr, "Cannot convert input string to 'CP850': String is too long\n");
  113.         return 0;
  114.     }
  115.     out[i] = L'\0';
  116.     return 1;
  117. }
  118.  
  119. static int cp850_char_to_printable(char **p, unsigned char c, unsigned int out_size)
  120. {
  121.     size_t ret;
  122.     wchar_t wcs[2];
  123.     wcs[0] = (c & 0x80) ? cp850_table[c & 0x7F] : c;
  124.     wcs[1] = 0;
  125.     ret = wcstombs(*p, wcs, out_size);
  126.     if (ret == 0)
  127.         return 0;
  128.     if (ret != (size_t)-1)
  129.         *p += ret;
  130.     else if (!(c & 0x80))
  131.         *(*p++) = c;
  132.     else {
  133.         ret = strlen(cp850_translit_table[c & 0x7F]);
  134.         if (ret > out_size)
  135.             return 0;
  136.         memcpy(*p, cp850_translit_table[c & 0x7F], ret);
  137.         *p += ret;
  138.     }
  139.     return 1;
  140. }
  141.  
  142. static int local_string_to_cp850_string(char *out, const char *in, unsigned int out_size)
  143. {
  144.     int ret;
  145.     wchar_t *wcs;
  146.     if (strlen(in) >= out_size) {
  147.         fprintf(stderr, "Cannot convert input string '%s' to 'CP850': String is too long\n", in);
  148.         return 0;
  149.     }
  150.     wcs = calloc(out_size, sizeof(wchar_t));
  151.     if (!wcs) {
  152.         fprintf(stderr, "Cannot convert input string '%s' to 'CP850': %s\n", in, strerror(ENOMEM));
  153.         return 0;
  154.     }
  155.     if (mbstowcs(wcs, in, out_size) == (size_t)-1) {
  156.         fprintf(stderr, "Cannot convert input string '%s' to 'CP850': %s\n", in, strerror(errno));
  157.         free(wcs);
  158.         return 0;
  159.     }
  160.     ret = wchar_string_to_cp850_string(out, wcs, out_size);
  161.     free(wcs);
  162.     return ret;
  163. }
  164.  
  165. #ifdef HAVE_ICONV
  166.  
  167. static int iconv_init_codepage(int codepage, const char *local, iconv_t *to_local, iconv_t *from_local)
  168. {
  169.     char codepage_name[32];
  170.     snprintf(codepage_name, sizeof(codepage_name), "CP%d//TRANSLIT", codepage);
  171.     *to_local = iconv_open(local, codepage_name);
  172.     if (*to_local == (iconv_t) - 1) {
  173.         snprintf(codepage_name, sizeof(codepage_name), "CP%d", codepage);
  174.         *to_local = iconv_open(local, codepage_name);
  175.     }
  176.     if (*to_local == (iconv_t) - 1)
  177.         fprintf(stderr, "Cannot initialize conversion from codepage %d to %s: %s\n", codepage, local, strerror(errno));
  178.     snprintf(codepage_name, sizeof(codepage_name), "CP%d", codepage);
  179.     *from_local = iconv_open(codepage_name, local);
  180.     if (*from_local == (iconv_t) - 1)
  181.         fprintf(stderr, "Cannot initialize conversion from %s to codepage %d: %s\n", local, codepage, strerror(errno));
  182.     return (*to_local != (iconv_t)-1 && *from_local != (iconv_t)-1) ? 1 : 0;
  183. }
  184.  
  185. static iconv_t dos_to_local;
  186. static iconv_t local_to_dos;
  187. static iconv_t dos_to_wchar;
  188. static iconv_t wchar_to_dos;
  189. static int used_codepage;
  190. static int internal_cp850;
  191.  
  192. /*
  193.  * Initialize conversion from codepage.
  194.  * codepage = -1 means default codepage.
  195.  * Returns non-zero on success, 0 on failure
  196.  */
  197. static int init_conversion(int codepage)
  198. {
  199.     static int initialized = -1;
  200.     if (initialized < 0) {
  201.         initialized = 1;
  202.         if (codepage < 0)
  203.             codepage = DEFAULT_DOS_CODEPAGE;
  204.         setlocale(LC_CTYPE, "");        /* initialize locale for CODESET */
  205.         if (!iconv_init_codepage(codepage, nl_langinfo(CODESET), &dos_to_local, &local_to_dos))
  206.             initialized = 0;
  207.         if (initialized && !iconv_init_codepage(codepage, "WCHAR_T", &dos_to_wchar, &wchar_to_dos))
  208.             initialized = 0;
  209.         if (!initialized && codepage == 850) {
  210.             fprintf(stderr, "Using internal CP850 conversion table\n");
  211.             internal_cp850 = 1; /* use internal CP850 conversion table */
  212.             initialized = 1;
  213.         }
  214.         if (initialized)
  215.             used_codepage = codepage;
  216.     }
  217.     return initialized;
  218. }
  219.  
  220. int set_dos_codepage(int codepage)
  221. {
  222.     return init_conversion(codepage);
  223. }
  224.  
  225. int dos_char_to_printable(char **p, unsigned char c, unsigned int out_size)
  226. {
  227.     char in[1] = { c };
  228.     ICONV_CONST char *pin = in;
  229.     size_t bytes_in = 1;
  230.     size_t bytes_out = out_size;
  231.     if (!init_conversion(-1))
  232.         return 0;
  233.     if (internal_cp850)
  234.         return cp850_char_to_printable(p, c, out_size);
  235.     return iconv(dos_to_local, &pin, &bytes_in, p, &bytes_out) != (size_t)-1;
  236. }
  237.  
  238. int local_string_to_dos_string(char *out, char *in, unsigned int out_size)
  239. {
  240.     ICONV_CONST char *pin = in;
  241.     char *pout = out;
  242.     size_t bytes_in = strlen(in);
  243.     size_t bytes_out = out_size-1;
  244.     size_t ret;
  245.     if (!init_conversion(-1))
  246.         return 0;
  247.     if (internal_cp850)
  248.         return local_string_to_cp850_string(out, in, out_size);
  249.     ret = iconv(local_to_dos, &pin, &bytes_in, &pout, &bytes_out);
  250.     if (ret == (size_t)-1) {
  251.         if (errno == E2BIG)
  252.             fprintf(stderr, "Cannot convert input string '%s' to 'CP%d': String is too long\n",
  253.                     in, used_codepage);
  254.         else
  255.             fprintf(stderr, "Cannot convert input sequence '\\x%.02hhX' from codeset '%s' to 'CP%d': %s\n",
  256.                     *pin, nl_langinfo(CODESET), used_codepage, strerror(errno));
  257.         iconv(local_to_dos, NULL, NULL, &pout, &bytes_out);
  258.         return 0;
  259.     } else {
  260.         ret = iconv(local_to_dos, NULL, NULL, &pout, &bytes_out);
  261.         if (ret == (size_t)-1) {
  262.             fprintf(stderr, "Cannot convert input string '%s' to 'CP%d': String is too long\n",
  263.                     in, used_codepage);
  264.             return 0;
  265.         }
  266.     }
  267.     out[out_size-1-bytes_out] = 0;
  268.     return 1;
  269. }
  270.  
  271. int dos_string_to_wchar_string(wchar_t *out, char *in, unsigned int out_size)
  272. {
  273.     ICONV_CONST char *pin = in;
  274.     char *pout = (char *)out;
  275.     size_t bytes_in = strnlen(in, 11);
  276.     size_t bytes_out = out_size-sizeof(wchar_t);
  277.     size_t ret;
  278.     if (!init_conversion(-1))
  279.         return 0;
  280.     if (internal_cp850)
  281.         return cp850_string_to_wchar_string(out, in, out_size);
  282.     ret = iconv(dos_to_wchar, &pin, &bytes_in, &pout, &bytes_out);
  283.     if (ret == (size_t)-1) {
  284.         if (errno == E2BIG)
  285.             fprintf(stderr, "Cannot convert input string from 'CP%d': String is too long\n",
  286.                     used_codepage);
  287.         else
  288.             fprintf(stderr, "Cannot convert input sequence '\\x%.02hhX' from 'CP%d': %s\n",
  289.                     *pin, used_codepage, strerror(errno));
  290.         iconv(dos_to_wchar, NULL, NULL, &pout, &bytes_out);
  291.         return 0;
  292.     } else {
  293.         ret = iconv(dos_to_wchar, NULL, NULL, &pout, &bytes_out);
  294.         if (ret == (size_t)-1) {
  295.             fprintf(stderr, "Cannot convert input string from 'CP%d': String is too long\n",
  296.                     used_codepage);
  297.             return 0;
  298.         }
  299.     }
  300.     out[(out_size-sizeof(wchar_t)-bytes_out)/sizeof(wchar_t)] = L'\0';
  301.     return 1;
  302. }
  303.  
  304. int wchar_string_to_dos_string(char *out, wchar_t *in, unsigned int out_size)
  305. {
  306.     ICONV_CONST char *pin = (char *)in;
  307.     char *pout = out;
  308.     size_t bytes_in = wcslen(in)*sizeof(wchar_t);
  309.     size_t bytes_out = out_size-1;
  310.     size_t ret;
  311.     if (!init_conversion(-1))
  312.         return 0;
  313.     if (internal_cp850)
  314.         return wchar_string_to_cp850_string(out, in, out_size);
  315.     ret = iconv(wchar_to_dos, &pin, &bytes_in, &pout, &bytes_out);
  316.     if (ret == (size_t)-1) {
  317.         if (errno == E2BIG)
  318.             fprintf(stderr, "Cannot convert input string '%ls' to 'CP%d': String is too long\n",
  319.                     in, used_codepage);
  320.         else
  321.             fprintf(stderr, "Cannot convert input character '%lc' to 'CP%d': %s\n",
  322.                     (wint_t)*(wchar_t *)pin, used_codepage, strerror(errno));
  323.         iconv(wchar_to_dos, NULL, NULL, &pout, &bytes_out);
  324.         return 0;
  325.     } else {
  326.         ret = iconv(wchar_to_dos, NULL, NULL, &pout, &bytes_out);
  327.         if (ret == (size_t)-1) {
  328.             fprintf(stderr, "Cannot convert input string '%ls' to 'CP%d': String is too long\n",
  329.                     in, used_codepage);
  330.             return 0;
  331.         }
  332.     }
  333.     out[out_size-1-bytes_out] = 0;
  334.     return 1;
  335. }
  336.  
  337. #else
  338.  
  339. int set_dos_codepage(int codepage)
  340. {
  341.     static int initialized = -1;
  342.     if (initialized < 0) {
  343.         setlocale(LC_CTYPE, ""); /* initialize locale for wide character functions */
  344.         if (codepage < 0)
  345.             codepage = DEFAULT_DOS_CODEPAGE;
  346.         initialized = (codepage == 850) ? 1 : 0;
  347.         if (!initialized)
  348.             fprintf(stderr, "Cannot initialize unsupported codepage %d, only codepage 850 is supported\n", codepage);
  349.     }
  350.     return initialized;
  351. }
  352.  
  353. int dos_char_to_printable(char **p, unsigned char c, unsigned int out_size)
  354. {
  355.     return cp850_char_to_printable(p, c, out_size);
  356. }
  357.  
  358. int local_string_to_dos_string(char *out, char *in, unsigned int out_size)
  359. {
  360.     return local_string_to_cp850_string(out, in, out_size);
  361. }
  362.  
  363. int dos_string_to_wchar_string(wchar_t *out, char *in, unsigned int out_size)
  364. {
  365.     return cp850_string_to_wchar_string(out, in, out_size);
  366. }
  367.  
  368. int wchar_string_to_dos_string(char *out, wchar_t *in, unsigned int out_size)
  369. {
  370.     return wchar_string_to_cp850_string(out, in, out_size);
  371. }
  372.  
  373. #endif
  374.