/* winduni.c -- unicode support for the windres program.
- Copyright 1997, 1998, 2000, 2001, 2003, 2007
- Free Software Foundation, Inc.
+ Copyright (C) 1997-2019 Free Software Foundation, Inc.
Written by Ian Lance Taylor, Cygnus Support.
Rewritten by Kai Tietz, Onevision.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
+ the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
02110-1301, USA. */
+
/* This file contains unicode support routines for the windres
program. Ideally, we would have generic unicode support which
would work on all systems. However, we don't. Instead, on a
#include "winduni.h"
#include "safe-ctype.h"
-#if HAVE_ICONV_H
+#if HAVE_ICONV
#include <iconv.h>
#endif
static rc_uint_type wind_WideCharToMultiByte (rc_uint_type, const unichar *, char *, rc_uint_type);
static rc_uint_type wind_MultiByteToWideChar (rc_uint_type, const char *, unichar *, rc_uint_type);
-
-/* Prototypes. */
static int unichar_isascii (const unichar *, rc_uint_type);
+/* Convert an ASCII string to a unicode string. We just copy it,
+ expanding chars to shorts, rather than doing something intelligent. */
+
#if !defined (_WIN32) && !defined (__CYGWIN__)
/* Codepages mapped. */
static local_iconv_map codepages[] =
{
- { 0, "MS-ANSI" },
+ { 0, "cp1252" },
{ 1, "WINDOWS-1252" },
{ 437, "MS-ANSI" },
{ 737, "MS-GREEK" },
{ 1258, "WINDOWS-1258" },
{ CP_UTF7, "UTF-7" },
{ CP_UTF8, "UTF-8" },
- { CP_UTF16, "UTF-16" },
+ { CP_UTF16, "UTF-16LE" },
{ (rc_uint_type) -1, NULL }
};
{ 0x040D, 862, 1255, "Hebrew", "Israel" }, { 0x040E, 852, 1250, "Hungarian", "Hungary" },
{ 0x040F, 850, 1252, "Icelandic", "Iceland" }, { 0x0410, 850, 1252, "Italian", "Italy" },
{ 0x0411, 932, 932, "Japanese", "Japan" }, { 0x0412, 949, 949, "Korean", "Korea (south)" },
- { 0x0413, 850, 1252, "Dutch", "Netherlands" }, { 0x0414, 850, 1252, "Norwegian (Bokmål)", "Norway" },
+ { 0x0413, 850, 1252, "Dutch", "Netherlands" }, { 0x0414, 850, 1252, "Norwegian (Bokm\345l)", "Norway" },
{ 0x0415, 852, 1250, "Polish", "Poland" }, { 0x0416, 850, 1252, "Portuguese", "Brazil" },
{ 0x0418, 852, 1250, "Romanian", "Romania" }, { 0x0419, 866, 1251, "Russian", "Russia" },
{ 0x041A, 852, 1250, "Croatian", "Croatia" }, { 0x041B, 852, 1250, "Slovak", "Slovakia" },
#endif
+/* Specifies the default codepage to be used for unicode
+ transformations. By default this is CP_ACP. */
+rc_uint_type wind_default_codepage = CP_ACP;
+
+/* Specifies the currently used codepage for unicode
+ transformations. By default this is CP_ACP. */
+rc_uint_type wind_current_codepage = CP_ACP;
+
/* Convert an ASCII string to a unicode string. We just copy it,
expanding chars to shorts, rather than doing something intelligent. */
void
unicode_from_ascii (rc_uint_type *length, unichar **unicode, const char *ascii)
{
- unicode_from_codepage (length, unicode, ascii, 0 /*CP_ACP*/);
+ unicode_from_codepage (length, unicode, ascii, wind_current_codepage);
+}
+
+/* Convert an ASCII string with length A_LENGTH to a unicode string. We just
+ copy it, expanding chars to shorts, rather than doing something intelligent.
+ This routine converts also \0 within a string. */
+
+void
+unicode_from_ascii_len (rc_uint_type *length, unichar **unicode, const char *ascii, rc_uint_type a_length)
+{
+ char *tmp, *p;
+ rc_uint_type tlen, elen, idx = 0;
+
+ *unicode = NULL;
+
+ if (!a_length)
+ {
+ if (length)
+ *length = 0;
+ return;
+ }
+
+ /* Make sure we have zero terminated string. */
+ p = tmp = (char *) xmalloc (a_length + 1);
+ memcpy (tmp, ascii, a_length);
+ tmp[a_length] = 0;
+
+ while (a_length > 0)
+ {
+ unichar *utmp, *up;
+
+ tlen = strlen (p);
+
+ if (tlen > a_length)
+ tlen = a_length;
+ if (*p == 0)
+ {
+ /* Make room for one more character. */
+ utmp = (unichar *) res_alloc (sizeof (unichar) * (idx + 1));
+ if (idx > 0)
+ {
+ memcpy (utmp, *unicode, idx * sizeof (unichar));
+ }
+ *unicode = utmp;
+ utmp[idx++] = 0;
+ --a_length;
+ p++;
+ continue;
+ }
+ utmp = NULL;
+ elen = 0;
+ elen = wind_MultiByteToWideChar (wind_current_codepage, p, NULL, 0);
+ if (elen)
+ {
+ utmp = ((unichar *) res_alloc (elen + sizeof (unichar) * 2));
+ wind_MultiByteToWideChar (wind_current_codepage, p, utmp, elen);
+ elen /= sizeof (unichar);
+ elen --;
+ }
+ else
+ {
+ /* Make room for one more character. */
+ utmp = (unichar *) res_alloc (sizeof (unichar) * (idx + 1));
+ if (idx > 0)
+ {
+ memcpy (utmp, *unicode, idx * sizeof (unichar));
+ }
+ *unicode = utmp;
+ utmp[idx++] = ((unichar) *p) & 0xff;
+ --a_length;
+ p++;
+ continue;
+ }
+ p += tlen;
+ a_length -= tlen;
+
+ up = (unichar *) res_alloc (sizeof (unichar) * (idx + elen));
+ if (idx > 0)
+ memcpy (up, *unicode, idx * sizeof (unichar));
+
+ *unicode = up;
+ if (elen)
+ memcpy (&up[idx], utmp, sizeof (unichar) * elen);
+
+ idx += elen;
+ }
+
+ if (length)
+ *length = idx;
+
+ free (tmp);
}
/* Convert an unicode string to an ASCII string. We just copy it,
void
ascii_from_unicode (rc_uint_type *length, const unichar *unicode, char **ascii)
{
- codepage_from_unicode (length, unicode, ascii, 0/*CP_ACP*/);
+ codepage_from_unicode (length, unicode, ascii, wind_current_codepage);
}
/* Print the unicode string UNICODE to the file E. LENGTH is the
else if ((ch & 0xff) == ch)
fprintf (e, "\\%03o", (unsigned int) ch);
else
- fprintf (e, "\\x%x", (unsigned int) ch);
+ fprintf (e, "\\x%04x", (unsigned int) ch);
}
}
*length = len;
}
-#ifdef HAVE_ICONV_H
+#if defined (HAVE_ICONV) && !defined (_WIN32) && !defined (__CYGWIN__)
static int
-iconv_onechar (iconv_t cd, const char *s, char *d, int d_len, const char **n_s, char **n_d)
+iconv_onechar (iconv_t cd, ICONV_CONST char *s, char *d, int d_len, const char **n_s, char **n_d)
{
int i;
for (i = 1; i <= 32; i++)
{
char *tmp_d = d;
- const char *tmp_s = s;
+ ICONV_CONST char *tmp_s = s;
size_t ret;
size_t s_left = (size_t) i;
size_t d_left = (size_t) d_len;
return NULL;
return lim->iconv_name;
}
-#endif /* HAVE_ICONV_H */
+#endif /* HAVE_ICONV */
static rc_uint_type
wind_MultiByteToWideChar (rc_uint_type cp, const char *mb,
rc_uint_type ret = 0;
#if defined (_WIN32) || defined (__CYGWIN__)
- ret = (rc_uint_type) MultiByteToWideChar (cp, MB_PRECOMPOSED,
+ rc_uint_type conv_flags = MB_PRECOMPOSED;
+
+ /* MB_PRECOMPOSED is not allowed for UTF-7 or UTF-8.
+ MultiByteToWideChar will set the last error to
+ ERROR_INVALID_FLAGS if we do. */
+ if (cp == CP_UTF8 || cp == CP_UTF7)
+ conv_flags = 0;
+
+ ret = (rc_uint_type) MultiByteToWideChar (cp, conv_flags,
mb, -1, u, u_len);
/* Convert to bytes. */
ret *= sizeof (unichar);
-#elif defined (HAVE_ICONV_H)
+#elif defined (HAVE_ICONV)
int first = 1;
char tmp[32];
char *p_tmp;
if (!mb || !iconv_name)
return 0;
- iconv_t cd = iconv_open ("UTF-16", iconv_name);
+ iconv_t cd = iconv_open ("UTF-16LE", iconv_name);
while (1)
{
int iret;
- const char *n_mb;
- char *n_tmp;
+ const char *n_mb = "";
+ char *n_tmp = "";
p_tmp = tmp;
- iret = iconv_onechar (cd, (const char *) mb, p_tmp, 32, & n_mb, & n_tmp);
+ iret = iconv_onechar (cd, (ICONV_CONST char *) mb, p_tmp, 32, & n_mb, & n_tmp);
if (first)
{
first = 0;
ret = (rc_uint_type) WideCharToMultiByte (cp, 0, u, -1, mb, mb_len,
NULL, & used_def);
-#elif defined (HAVE_ICONV_H)
+#elif defined (HAVE_ICONV)
int first = 1;
char tmp[32];
char *p_tmp;
if (!u || !iconv_name)
return 0;
- iconv_t cd = iconv_open (iconv_name, "UTF-16");
+ iconv_t cd = iconv_open (iconv_name, "UTF-16LE");
while (1)
{
int iret;
- const char *n_u;
- char *n_tmp;
+ const char *n_u = "";
+ char *n_tmp = "";
p_tmp = tmp;
- iret = iconv_onechar (cd, (const char *) u, p_tmp, 32, &n_u, & n_tmp);
+ iret = iconv_onechar (cd, (ICONV_CONST char *) u, p_tmp, 32, &n_u, & n_tmp);
if (first)
{
first = 0;