Login

Subversion Repositories NedoOS

Rev

Rev 1229 | Blame | Compare with Previous | Last modification | View Log | Download | RSS feed

/* charconv.c

   Copyright (C) 2010 Alexander Korolkov <alexander.korolkov@gmail.com>
   Copyright (C) 2018-2020 Pali Roh├бr <pali.rohar@gmail.com>

   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation, either version 3 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program. If not, see <http://www.gnu.org/licenses/>.

   The complete text of the GNU General Public License
   can be found in /usr/share/common-licenses/GPL-3 file.
*/


#include "charconv.h"
//#include <langinfo.h>
#include <locale.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <wchar.h>

#ifdef HAVE_ICONV
#include <iconv.h>
#endif

/* CP850 table for 0x80-0xFF range from:
 * http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP850.TXT
 */

static const wchar_t cp850_table[128] = {
    0x00c7, 0x00fc, 0x00e9, 0x00e2, 0x00e4, 0x00e0, 0x00e5, 0x00e7,
    0x00ea, 0x00eb, 0x00e8, 0x00ef, 0x00ee, 0x00ec, 0x00c4, 0x00c5,
    0x00c9, 0x00e6, 0x00c6, 0x00f4, 0x00f6, 0x00f2, 0x00fb, 0x00f9,
    0x00ff, 0x00d6, 0x00dc, 0x00f8, 0x00a3, 0x00d8, 0x00d7, 0x0192,
    0x00e1, 0x00ed, 0x00f3, 0x00fa, 0x00f1, 0x00d1, 0x00aa, 0x00ba,
    0x00bf, 0x00ae, 0x00ac, 0x00bd, 0x00bc, 0x00a1, 0x00ab, 0x00bb,
    0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x00c1, 0x00c2, 0x00c0,
    0x00a9, 0x2563, 0x2551, 0x2557, 0x255d, 0x00a2, 0x00a5, 0x2510,
    0x2514, 0x2534, 0x252c, 0x251c, 0x2500, 0x253c, 0x00e3, 0x00c3,
    0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256c, 0x00a4,
    0x00f0, 0x00d0, 0x00ca, 0x00cb, 0x00c8, 0x0131, 0x00cd, 0x00ce,
    0x00cf, 0x2518, 0x250c, 0x2588, 0x2584, 0x00a6, 0x00cc, 0x2580,
    0x00d3, 0x00df, 0x00d4, 0x00d2, 0x00f5, 0x00d5, 0x00b5, 0x00fe,
    0x00de, 0x00da, 0x00db, 0x00d9, 0x00fd, 0x00dd, 0x00af, 0x00b4,
    0x00ad, 0x00b1, 0x2017, 0x00be, 0x00b6, 0x00a7, 0x00f7, 0x00b8,
    0x00b0, 0x00a8, 0x00b7, 0x00b9, 0x00b3, 0x00b2, 0x25a0, 0x00a0,
};

/* CP850 translit table to 7bit ASCII for 0x80-0xFF range */
static const char *const cp850_translit_table[128] = {
    "C",   "u",   "e",  "a",     "a",     "a", "a",   "c",
    "e",   "e",   "e",  "i",     "i",     "i", "A",   "A",
    "E",   "ae",  "AE", "o",     "o",     "o", "u",   "u",
    "y",   "O",   "U",  "o",     "GBP",   "O", "x",   "f",
    "a",   "i",   "o",  "u",     "n",     "N", "a",   "o",
    "?",   "(R)", "!",  " 1/2 ", " 1/4 ", "!", "<<",  ">>",
    "?",   "?",   "?",  "|",     "+",     "A", "A",   "A",
    "(C)", "?",   "?",  "?",     "?",     "c", "JPY", "+",
    "+",   "+",   "+",  "+",     "-",     "+", "a",   "A",
    "?",   "?",   "?",  "?",     "?",     "?", "?",   "?",
    "d",   "D",   "E",  "E",     "E",     "i", "I",   "I",
    "I",   "+",   "+",  "?",     "?",     "|", "I",   "?",
    "O",   "ss",  "O",  "O",     "o",     "O", "u",   "th",
    "TH",  "U",   "U",  "U",     "y",     "Y", "?",   "'",
    "-",   "+-",  "?",  " 3/4 ", "?",     "?", "/",   ",",
    "?",   "?",   ".",  "1",     "3",     "2", "?",   " ",
};

static int wchar_string_to_cp850_string(char *out, const wchar_t *in, unsigned int out_size)
{
    unsigned i, j;
    for (i = 0; i < out_size-1 && in[i]; ++i) {
        if (in[i] > 0 && in[i] < 0x80) {
            out[i] = in[i];
            continue;
        }
        for (j = 0; j < 0x80; ++j) {
            if (in[i] == cp850_table[j]) {
                out[i] = (0x80 | j);
                break;
            }
        }
        if (j == 0x80) {
            fprintf(stderr, "Cannot convert input character 0x%04x to 'CP850': %s\n", (unsigned int)in[i], strerror(EILSEQ));
            return 0;
        }
    }
    if (in[i]) {
        fprintf(stderr, "Cannot convert input string to 'CP850': String is too long\n");
        return 0;
    }
    out[i] = 0;
    return 1;
}

static int cp850_string_to_wchar_string(wchar_t *out, const char *in, unsigned int out_size)
{
    unsigned i;
    for (i = 0; i < out_size-1 && i < 11 && in[i]; ++i) {
        out[i] = (in[i] & 0x80) ? cp850_table[in[i] & 0x7F] : in[i];
    }
    if (i < 11 && in[i]) {
        fprintf(stderr, "Cannot convert input string to 'CP850': String is too long\n");
        return 0;
    }
    out[i] = L'\0';
    return 1;
}

static int cp850_char_to_printable(char **p, unsigned char c, unsigned int out_size)
{
    size_t ret;
    wchar_t wcs[2];
    wcs[0] = (c & 0x80) ? cp850_table[c & 0x7F] : c;
    wcs[1] = 0;
    ret = wcstombs(*p, wcs, out_size);
    if (ret == 0)
        return 0;
    if (ret != (size_t)-1)
        *p += ret;
    else if (!(c & 0x80))
        *(*p++) = c;
    else {
        ret = strlen(cp850_translit_table[c & 0x7F]);
        if (ret > out_size)
            return 0;
        memcpy(*p, cp850_translit_table[c & 0x7F], ret);
        *p += ret;
    }
    return 1;
}

static int local_string_to_cp850_string(char *out, const char *in, unsigned int out_size)
{
    int ret;
    wchar_t *wcs;
    if (strlen(in) >= out_size) {
        fprintf(stderr, "Cannot convert input string '%s' to 'CP850': String is too long\n", in);
        return 0;
    }
    wcs = calloc(out_size, sizeof(wchar_t));
    if (!wcs) {
        fprintf(stderr, "Cannot convert input string '%s' to 'CP850': %s\n", in, strerror(ENOMEM));
        return 0;
    }
    if (mbstowcs(wcs, in, out_size) == (size_t)-1) {
        fprintf(stderr, "Cannot convert input string '%s' to 'CP850': %s\n", in, strerror(errno));
        free(wcs);
        return 0;
    }
    ret = wchar_string_to_cp850_string(out, wcs, out_size);
    free(wcs);
    return ret;
}

#ifdef HAVE_ICONV

static int iconv_init_codepage(int codepage, const char *local, iconv_t *to_local, iconv_t *from_local)
{
    char codepage_name[32];
    snprintf(codepage_name, sizeof(codepage_name), "CP%d//TRANSLIT", codepage);
    *to_local = iconv_open(local, codepage_name);
    if (*to_local == (iconv_t) - 1) {
        snprintf(codepage_name, sizeof(codepage_name), "CP%d", codepage);
        *to_local = iconv_open(local, codepage_name);
    }
    if (*to_local == (iconv_t) - 1)
        fprintf(stderr, "Cannot initialize conversion from codepage %d to %s: %s\n", codepage, local, strerror(errno));
    snprintf(codepage_name, sizeof(codepage_name), "CP%d", codepage);
    *from_local = iconv_open(codepage_name, local);
    if (*from_local == (iconv_t) - 1)
        fprintf(stderr, "Cannot initialize conversion from %s to codepage %d: %s\n", local, codepage, strerror(errno));
    return (*to_local != (iconv_t)-1 && *from_local != (iconv_t)-1) ? 1 : 0;
}

static iconv_t dos_to_local;
static iconv_t local_to_dos;
static iconv_t dos_to_wchar;
static iconv_t wchar_to_dos;
static int used_codepage;
static int internal_cp850;

/*
 * Initialize conversion from codepage.
 * codepage = -1 means default codepage.
 * Returns non-zero on success, 0 on failure
 */

static int init_conversion(int codepage)
{
    static int initialized = -1;
    if (initialized < 0) {
        initialized = 1;
        if (codepage < 0)
            codepage = DEFAULT_DOS_CODEPAGE;
        setlocale(LC_CTYPE, "");        /* initialize locale for CODESET */
        if (!iconv_init_codepage(codepage, nl_langinfo(CODESET), &dos_to_local, &local_to_dos))
            initialized = 0;
        if (initialized && !iconv_init_codepage(codepage, "WCHAR_T", &dos_to_wchar, &wchar_to_dos))
            initialized = 0;
        if (!initialized && codepage == 850) {
            fprintf(stderr, "Using internal CP850 conversion table\n");
            internal_cp850 = 1; /* use internal CP850 conversion table */
            initialized = 1;
        }
        if (initialized)
            used_codepage = codepage;
    }
    return initialized;
}

int set_dos_codepage(int codepage)
{
    return init_conversion(codepage);
}

int dos_char_to_printable(char **p, unsigned char c, unsigned int out_size)
{
    char in[1] = { c };
    ICONV_CONST char *pin = in;
    size_t bytes_in = 1;
    size_t bytes_out = out_size;
    if (!init_conversion(-1))
        return 0;
    if (internal_cp850)
        return cp850_char_to_printable(p, c, out_size);
    return iconv(dos_to_local, &pin, &bytes_in, p, &bytes_out) != (size_t)-1;
}

int local_string_to_dos_string(char *out, char *in, unsigned int out_size)
{
    ICONV_CONST char *pin = in;
    char *pout = out;
    size_t bytes_in = strlen(in);
    size_t bytes_out = out_size-1;
    size_t ret;
    if (!init_conversion(-1))
        return 0;
    if (internal_cp850)
        return local_string_to_cp850_string(out, in, out_size);
    ret = iconv(local_to_dos, &pin, &bytes_in, &pout, &bytes_out);
    if (ret == (size_t)-1) {
        if (errno == E2BIG)
            fprintf(stderr, "Cannot convert input string '%s' to 'CP%d': String is too long\n",
                    in, used_codepage);
        else
            fprintf(stderr, "Cannot convert input sequence '\\x%.02hhX' from codeset '%s' to 'CP%d': %s\n",
                    *pin, nl_langinfo(CODESET), used_codepage, strerror(errno));
        iconv(local_to_dos, NULL, NULL, &pout, &bytes_out);
        return 0;
    } else {
        ret = iconv(local_to_dos, NULL, NULL, &pout, &bytes_out);
        if (ret == (size_t)-1) {
            fprintf(stderr, "Cannot convert input string '%s' to 'CP%d': String is too long\n",
                    in, used_codepage);
            return 0;
        }
    }
    out[out_size-1-bytes_out] = 0;
    return 1;
}

int dos_string_to_wchar_string(wchar_t *out, char *in, unsigned int out_size)
{
    ICONV_CONST char *pin = in;
    char *pout = (char *)out;
    size_t bytes_in = strnlen(in, 11);
    size_t bytes_out = out_size-sizeof(wchar_t);
    size_t ret;
    if (!init_conversion(-1))
        return 0;
    if (internal_cp850)
        return cp850_string_to_wchar_string(out, in, out_size);
    ret = iconv(dos_to_wchar, &pin, &bytes_in, &pout, &bytes_out);
    if (ret == (size_t)-1) {
        if (errno == E2BIG)
            fprintf(stderr, "Cannot convert input string from 'CP%d': String is too long\n",
                    used_codepage);
        else
            fprintf(stderr, "Cannot convert input sequence '\\x%.02hhX' from 'CP%d': %s\n",
                    *pin, used_codepage, strerror(errno));
        iconv(dos_to_wchar, NULL, NULL, &pout, &bytes_out);
        return 0;
    } else {
        ret = iconv(dos_to_wchar, NULL, NULL, &pout, &bytes_out);
        if (ret == (size_t)-1) {
            fprintf(stderr, "Cannot convert input string from 'CP%d': String is too long\n",
                    used_codepage);
            return 0;
        }
    }
    out[(out_size-sizeof(wchar_t)-bytes_out)/sizeof(wchar_t)] = L'\0';
    return 1;
}

int wchar_string_to_dos_string(char *out, wchar_t *in, unsigned int out_size)
{
    ICONV_CONST char *pin = (char *)in;
    char *pout = out;
    size_t bytes_in = wcslen(in)*sizeof(wchar_t);
    size_t bytes_out = out_size-1;
    size_t ret;
    if (!init_conversion(-1))
        return 0;
    if (internal_cp850)
        return wchar_string_to_cp850_string(out, in, out_size);
    ret = iconv(wchar_to_dos, &pin, &bytes_in, &pout, &bytes_out);
    if (ret == (size_t)-1) {
        if (errno == E2BIG)
            fprintf(stderr, "Cannot convert input string '%ls' to 'CP%d': String is too long\n",
                    in, used_codepage);
        else
            fprintf(stderr, "Cannot convert input character '%lc' to 'CP%d': %s\n",
                    (wint_t)*(wchar_t *)pin, used_codepage, strerror(errno));
        iconv(wchar_to_dos, NULL, NULL, &pout, &bytes_out);
        return 0;
    } else {
        ret = iconv(wchar_to_dos, NULL, NULL, &pout, &bytes_out);
        if (ret == (size_t)-1) {
            fprintf(stderr, "Cannot convert input string '%ls' to 'CP%d': String is too long\n",
                    in, used_codepage);
            return 0;
        }
    }
    out[out_size-1-bytes_out] = 0;
    return 1;
}

#else

int set_dos_codepage(int codepage)
{
    static int initialized = -1;
    if (initialized < 0) {
        setlocale(LC_CTYPE, ""); /* initialize locale for wide character functions */
        if (codepage < 0)
            codepage = DEFAULT_DOS_CODEPAGE;
        initialized = (codepage == 850) ? 1 : 0;
        if (!initialized)
            fprintf(stderr, "Cannot initialize unsupported codepage %d, only codepage 850 is supported\n", codepage);
    }
    return initialized;
}

int dos_char_to_printable(char **p, unsigned char c, unsigned int out_size)
{
    return cp850_char_to_printable(p, c, out_size);
}

int local_string_to_dos_string(char *out, char *in, unsigned int out_size)
{
    return local_string_to_cp850_string(out, in, out_size);
}

int dos_string_to_wchar_string(wchar_t *out, char *in, unsigned int out_size)
{
    return cp850_string_to_wchar_string(out, in, out_size);
}

int wchar_string_to_dos_string(char *out, wchar_t *in, unsigned int out_size)
{
    return wchar_string_to_cp850_string(out, in, out_size);
}

#endif