1 /* winduni.c -- unicode support for the windres program.
2 Copyright 1997, 1998, 2000, 2001, 2003, 2007
3 Free Software Foundation, Inc.
4 Written by Ian Lance Taylor, Cygnus Support.
5 Rewritten by Kai Tietz, Onevision.
7 This file is part of GNU Binutils.
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
24 /* This file contains unicode support routines for the windres
25 program. Ideally, we would have generic unicode support which
26 would work on all systems. However, we don't. Instead, on a
27 Windows host, we are prepared to call some Windows routines. This
28 means that we will generate different output on Windows and Unix
29 hosts, but that seems better than not really supporting unicode at
34 #include "libiberty.h" /* for xstrdup */
36 /* Must be include before windows.h and winnls.h. */
37 #if defined (_WIN32) || defined (__CYGWIN__)
42 #include "safe-ctype.h"
48 static rc_uint_type
wind_WideCharToMultiByte (rc_uint_type
, const unichar
*, char *, rc_uint_type
);
49 static rc_uint_type
wind_MultiByteToWideChar (rc_uint_type
, const char *, unichar
*, rc_uint_type
);
52 static int unichar_isascii (const unichar
*, rc_uint_type
);
54 #if !defined (_WIN32) && !defined (__CYGWIN__)
56 /* Codepages mapped. */
57 static local_iconv_map codepages
[] =
60 { 1, "WINDOWS-1252" },
63 { 775, "WINBALTRIM" },
70 { 874, "WINDOWS-874" },
75 { 1250, "WINDOWS-1250" },
76 { 1251, "WINDOWS-1251" },
77 { 1252, "WINDOWS-1252" },
78 { 1253, "WINDOWS-1253" },
79 { 1254, "WINDOWS-1254" },
80 { 1255, "WINDOWS-1255" },
81 { 1256, "WINDOWS-1256" },
82 { 1257, "WINDOWS-1257" },
83 { 1258, "WINDOWS-1258" },
86 { CP_UTF16
, "UTF-16" },
87 { (rc_uint_type
) -1, NULL
}
90 /* Languages supported. */
91 static const wind_language_t languages
[] =
93 { 0x0000, 437, 1252, "Neutral", "Neutral" },
94 { 0x0401, 864, 1256, "Arabic", "Saudi Arabia" }, { 0x0402, 866, 1251, "Bulgarian", "Bulgaria" },
95 { 0x0403, 850, 1252, "Catalan", "Spain" }, { 0x0404, 950, 950, "Chinese", "Taiwan" },
96 { 0x0405, 852, 1250, "Czech", "Czech Republic" }, { 0x0406, 850, 1252, "Danish", "Denmark" },
97 { 0x0407, 850, 1252, "German", "Germany" }, { 0x0408, 737, 1253, "Greek", "Greece" },
98 { 0x0409, 437, 1252, "English", "United States" }, { 0x040A, 850, 1252, "Spanish - Traditional Sort", "Spain" },
99 { 0x040B, 850, 1252, "Finnish", "Finland" }, { 0x040C, 850, 1252, "French", "France" },
100 { 0x040D, 862, 1255, "Hebrew", "Israel" }, { 0x040E, 852, 1250, "Hungarian", "Hungary" },
101 { 0x040F, 850, 1252, "Icelandic", "Iceland" }, { 0x0410, 850, 1252, "Italian", "Italy" },
102 { 0x0411, 932, 932, "Japanese", "Japan" }, { 0x0412, 949, 949, "Korean", "Korea (south)" },
103 { 0x0413, 850, 1252, "Dutch", "Netherlands" }, { 0x0414, 850, 1252, "Norwegian (Bokmål)", "Norway" },
104 { 0x0415, 852, 1250, "Polish", "Poland" }, { 0x0416, 850, 1252, "Portuguese", "Brazil" },
105 { 0x0418, 852, 1250, "Romanian", "Romania" }, { 0x0419, 866, 1251, "Russian", "Russia" },
106 { 0x041A, 852, 1250, "Croatian", "Croatia" }, { 0x041B, 852, 1250, "Slovak", "Slovakia" },
107 { 0x041C, 852, 1250, "Albanian", "Albania" }, { 0x041D, 850, 1252, "Swedish", "Sweden" },
108 { 0x041E, 874, 874, "Thai", "Thailand" }, { 0x041F, 857, 1254, "Turkish", "Turkey" },
109 { 0x0421, 850, 1252, "Indonesian", "Indonesia" }, { 0x0422, 866, 1251, "Ukrainian", "Ukraine" },
110 { 0x0423, 866, 1251, "Belarusian", "Belarus" }, { 0x0424, 852, 1250, "Slovene", "Slovenia" },
111 { 0x0425, 775, 1257, "Estonian", "Estonia" }, { 0x0426, 775, 1257, "Latvian", "Latvia" },
112 { 0x0427, 775, 1257, "Lithuanian", "Lithuania" },
113 { 0x0429, 864, 1256, "Arabic", "Farsi" }, { 0x042A,1258, 1258, "Vietnamese", "Vietnam" },
114 { 0x042D, 850, 1252, "Basque", "Spain" },
115 { 0x042F, 866, 1251, "Macedonian", "Former Yugoslav Republic of Macedonia" },
116 { 0x0436, 850, 1252, "Afrikaans", "South Africa" },
117 { 0x0438, 850, 1252, "Faroese", "Faroe Islands" },
118 { 0x043C, 437, 1252, "Irish", "Ireland" },
119 { 0x043E, 850, 1252, "Malay", "Malaysia" },
120 { 0x0801, 864, 1256, "Arabic", "Iraq" },
121 { 0x0804, 936, 936, "Chinese (People's republic of China)", "People's republic of China" },
122 { 0x0807, 850, 1252, "German", "Switzerland" },
123 { 0x0809, 850, 1252, "English", "United Kingdom" }, { 0x080A, 850, 1252, "Spanish", "Mexico" },
124 { 0x080C, 850, 1252, "French", "Belgium" },
125 { 0x0810, 850, 1252, "Italian", "Switzerland" },
126 { 0x0813, 850, 1252, "Dutch", "Belgium" }, { 0x0814, 850, 1252, "Norwegian (Nynorsk)", "Norway" },
127 { 0x0816, 850, 1252, "Portuguese", "Portugal" },
128 { 0x081A, 852, 1252, "Serbian (latin)", "Yugoslavia" },
129 { 0x081D, 850, 1252, "Swedish (Finland)", "Finland" },
130 { 0x0C01, 864, 1256, "Arabic", "Egypt" },
131 { 0x0C04, 950, 950, "Chinese", "Hong Kong" },
132 { 0x0C07, 850, 1252, "German", "Austria" },
133 { 0x0C09, 850, 1252, "English", "Australia" }, { 0x0C0A, 850, 1252, "Spanish - International Sort", "Spain" },
134 { 0x0C0C, 850, 1252, "French", "Canada"},
135 { 0x0C1A, 855, 1251, "Serbian (Cyrillic)", "Serbia" },
136 { 0x1001, 864, 1256, "Arabic", "Libya" },
137 { 0x1004, 936, 936, "Chinese", "Singapore" },
138 { 0x1007, 850, 1252, "German", "Luxembourg" },
139 { 0x1009, 850, 1252, "English", "Canada" },
140 { 0x100A, 850, 1252, "Spanish", "Guatemala" },
141 { 0x100C, 850, 1252, "French", "Switzerland" },
142 { 0x1401, 864, 1256, "Arabic", "Algeria" },
143 { 0x1407, 850, 1252, "German", "Liechtenstein" },
144 { 0x1409, 850, 1252, "English", "New Zealand" }, { 0x140A, 850, 1252, "Spanish", "Costa Rica" },
145 { 0x140C, 850, 1252, "French", "Luxembourg" },
146 { 0x1801, 864, 1256, "Arabic", "Morocco" },
147 { 0x1809, 850, 1252, "English", "Ireland" }, { 0x180A, 850, 1252, "Spanish", "Panama" },
148 { 0x180C, 850, 1252, "French", "Monaco" },
149 { 0x1C01, 864, 1256, "Arabic", "Tunisia" },
150 { 0x1C09, 437, 1252, "English", "South Africa" }, { 0x1C0A, 850, 1252, "Spanish", "Dominican Republic" },
151 { 0x2001, 864, 1256, "Arabic", "Oman" },
152 { 0x2009, 850, 1252, "English", "Jamaica" }, { 0x200A, 850, 1252, "Spanish", "Venezuela" },
153 { 0x2401, 864, 1256, "Arabic", "Yemen" },
154 { 0x2409, 850, 1252, "English", "Caribbean" }, { 0x240A, 850, 1252, "Spanish", "Colombia" },
155 { 0x2801, 864, 1256, "Arabic", "Syria" },
156 { 0x2809, 850, 1252, "English", "Belize" }, { 0x280A, 850, 1252, "Spanish", "Peru" },
157 { 0x2C01, 864, 1256, "Arabic", "Jordan" },
158 { 0x2C09, 437, 1252, "English", "Trinidad & Tobago" },{ 0x2C0A, 850, 1252, "Spanish", "Argentina" },
159 { 0x3001, 864, 1256, "Arabic", "Lebanon" },
160 { 0x3009, 437, 1252, "English", "Zimbabwe" }, { 0x300A, 850, 1252, "Spanish", "Ecuador" },
161 { 0x3401, 864, 1256, "Arabic", "Kuwait" },
162 { 0x3409, 437, 1252, "English", "Philippines" }, { 0x340A, 850, 1252, "Spanish", "Chile" },
163 { 0x3801, 864, 1256, "Arabic", "United Arab Emirates" },
164 { 0x380A, 850, 1252, "Spanish", "Uruguay" },
165 { 0x3C01, 864, 1256, "Arabic", "Bahrain" },
166 { 0x3C0A, 850, 1252, "Spanish", "Paraguay" },
167 { 0x4001, 864, 1256, "Arabic", "Qatar" },
168 { 0x400A, 850, 1252, "Spanish", "Bolivia" },
169 { 0x440A, 850, 1252, "Spanish", "El Salvador" },
170 { 0x480A, 850, 1252, "Spanish", "Honduras" },
171 { 0x4C0A, 850, 1252, "Spanish", "Nicaragua" },
172 { 0x500A, 850, 1252, "Spanish", "Puerto Rico" },
173 { (unsigned) -1, 0, 0, NULL
, NULL
}
178 /* Convert an ASCII string to a unicode string. We just copy it,
179 expanding chars to shorts, rather than doing something intelligent. */
182 unicode_from_ascii (rc_uint_type
*length
, unichar
**unicode
, const char *ascii
)
184 unicode_from_codepage (length
, unicode
, ascii
, 0 /*CP_ACP*/);
187 /* Convert an unicode string to an ASCII string. We just copy it,
188 shrink shorts to chars, rather than doing something intelligent.
189 Shorts with not within the char range are replaced by '_'. */
192 ascii_from_unicode (rc_uint_type
*length
, const unichar
*unicode
, char **ascii
)
194 codepage_from_unicode (length
, unicode
, ascii
, 0/*CP_ACP*/);
197 /* Print the unicode string UNICODE to the file E. LENGTH is the
198 number of characters to print, or -1 if we should print until the
199 end of the string. FIXME: On a Windows host, we should be calling
200 some Windows function, probably WideCharToMultiByte. */
203 unicode_print (FILE *e
, const unichar
*unicode
, rc_uint_type length
)
211 if ((bfd_signed_vma
) length
> 0)
216 if (ch
== 0 && (bfd_signed_vma
) length
< 0)
221 if ((ch
& 0x7f) == ch
)
227 else if (ISPRINT (ch
))
262 fprintf (e
, "\\%03o", (unsigned int) ch
);
267 else if ((ch
& 0xff) == ch
)
268 fprintf (e
, "\\%03o", (unsigned int) ch
);
270 fprintf (e
, "\\x%x", (unsigned int) ch
);
274 /* Print a unicode string to a file. */
277 ascii_print (FILE *e
, const char *s
, rc_uint_type length
)
285 if ((bfd_signed_vma
) length
> 0)
290 if (ch
== 0 && (bfd_signed_vma
) length
< 0)
295 if ((ch
& 0x7f) == ch
)
301 else if (ISPRINT (ch
))
336 fprintf (e
, "\\%03o", (unsigned int) ch
);
342 fprintf (e
, "\\%03o", (unsigned int) ch
& 0xff);
347 unichar_len (const unichar
*unicode
)
352 while (unicode
[r
] != 0)
360 unichar_dup (const unichar
*unicode
)
367 for (len
= 0; unicode
[len
] != 0; ++len
)
370 r
= ((unichar
*) res_alloc (len
* sizeof (unichar
)));
371 memcpy (r
, unicode
, len
* sizeof (unichar
));
376 unichar_dup_uppercase (const unichar
*u
)
378 unichar
*r
= unichar_dup (u
);
384 for (i
= 0; r
[i
] != 0; ++i
)
386 if (r
[i
] >= 'a' && r
[i
] <= 'z')
393 unichar_isascii (const unichar
*u
, rc_uint_type len
)
397 if ((bfd_signed_vma
) len
< 0)
400 len
= (rc_uint_type
) unichar_len (u
);
405 for (i
= 0; i
< len
; i
++)
406 if ((u
[i
] & 0xff80) != 0)
412 unicode_print_quoted (FILE *e
, const unichar
*u
, rc_uint_type len
)
414 if (! unichar_isascii (u
, len
))
417 unicode_print (e
, u
, len
);
422 unicode_is_valid_codepage (rc_uint_type cp
)
424 if ((cp
& 0xffff) != cp
)
426 if (cp
== CP_UTF16
|| cp
== CP_ACP
)
429 #if !defined (_WIN32) && !defined (__CYGWIN__)
430 if (! wind_find_codepage_info (cp
))
434 return !! IsValidCodePage ((UINT
) cp
);
438 #if defined (_WIN32) || defined (__CYGWIN__)
440 #define max_cp_string_len 6
443 codepage_from_langid (unsigned short langid
)
445 char cp_string
[max_cp_string_len
];
448 memset (cp_string
, 0, max_cp_string_len
);
449 /* LOCALE_RETURN_NUMBER flag would avoid strtoul conversion,
450 but is unavailable on Win95. */
451 c
= GetLocaleInfoA (MAKELCID (langid
, SORT_DEFAULT
),
452 LOCALE_IDEFAULTANSICODEPAGE
,
453 cp_string
, max_cp_string_len
);
454 /* If codepage data for an LCID is not installed on users's system,
455 GetLocaleInfo returns an empty string. Fall back to system ANSI
459 return strtoul (cp_string
, 0, 10);
463 wincodepage_from_langid (unsigned short langid
)
465 char cp_string
[max_cp_string_len
];
468 memset (cp_string
, 0, max_cp_string_len
);
469 /* LOCALE_RETURN_NUMBER flag would avoid strtoul conversion,
470 but is unavailable on Win95. */
471 c
= GetLocaleInfoA (MAKELCID (langid
, SORT_DEFAULT
),
472 LOCALE_IDEFAULTCODEPAGE
,
473 cp_string
, max_cp_string_len
);
474 /* If codepage data for an LCID is not installed on users's system,
475 GetLocaleInfo returns an empty string. Fall back to system ANSI
479 return strtoul (cp_string
, 0, 10);
483 lang_from_langid (unsigned short langid
)
488 memset (cp_string
, 0, 261);
489 c
= GetLocaleInfoA (MAKELCID (langid
, SORT_DEFAULT
),
492 /* If codepage data for an LCID is not installed on users's system,
493 GetLocaleInfo returns an empty string. Fall back to system ANSI
496 strcpy (cp_string
, "Neutral");
497 return xstrdup (cp_string
);
501 country_from_langid (unsigned short langid
)
506 memset (cp_string
, 0, 261);
507 c
= GetLocaleInfoA (MAKELCID (langid
, SORT_DEFAULT
),
510 /* If codepage data for an LCID is not installed on users's system,
511 GetLocaleInfo returns an empty string. Fall back to system ANSI
514 strcpy (cp_string
, "Neutral");
515 return xstrdup (cp_string
);
520 const wind_language_t
*
521 wind_find_language_by_id (unsigned id
)
523 #if !defined (_WIN32) && !defined (__CYGWIN__)
528 for (i
= 0; languages
[i
].id
!= (unsigned) -1 && languages
[i
].id
!= id
; i
++)
530 if (languages
[i
].id
== id
)
531 return &languages
[i
];
534 static wind_language_t wl
;
537 wl
.doscp
= codepage_from_langid ((unsigned short) id
);
538 wl
.wincp
= wincodepage_from_langid ((unsigned short) id
);
539 wl
.name
= lang_from_langid ((unsigned short) id
);
540 wl
.country
= country_from_langid ((unsigned short) id
);
546 const local_iconv_map
*
547 wind_find_codepage_info (unsigned cp
)
549 #if !defined (_WIN32) && !defined (__CYGWIN__)
552 for (i
= 0; codepages
[i
].codepage
!= (rc_uint_type
) -1 && codepages
[i
].codepage
!= cp
; i
++)
554 if (codepages
[i
].codepage
== (rc_uint_type
) -1)
556 return &codepages
[i
];
558 static local_iconv_map lim
;
559 if (!unicode_is_valid_codepage (cp
))
567 /* Convert an Codepage string to a unicode string. */
570 unicode_from_codepage (rc_uint_type
*length
, unichar
**u
, const char *src
, rc_uint_type cp
)
574 len
= wind_MultiByteToWideChar (cp
, src
, NULL
, 0);
577 *u
= ((unichar
*) res_alloc (len
));
578 wind_MultiByteToWideChar (cp
, src
, *u
, len
);
580 /* Discount the trailing '/0'. If MultiByteToWideChar failed,
581 this will set *length to -1. */
582 len
-= sizeof (unichar
);
585 *length
= len
/ sizeof (unichar
);
588 /* Convert an unicode string to an codepage string. */
591 codepage_from_unicode (rc_uint_type
*length
, const unichar
*unicode
, char **ascii
, rc_uint_type cp
)
595 len
= wind_WideCharToMultiByte (cp
, unicode
, NULL
, 0);
598 *ascii
= (char *) res_alloc (len
* sizeof (char));
599 wind_WideCharToMultiByte (cp
, unicode
, *ascii
, len
);
601 /* Discount the trailing '/0'. If MultiByteToWideChar failed,
602 this will set *length to -1. */
611 iconv_onechar (iconv_t cd
, const char *s
, char *d
, int d_len
, const char **n_s
, char **n_d
)
615 for (i
= 1; i
<= 32; i
++)
618 const char *tmp_s
= s
;
620 size_t s_left
= (size_t) i
;
621 size_t d_left
= (size_t) d_len
;
623 ret
= iconv (cd
, & tmp_s
, & s_left
, & tmp_d
, & d_left
);
625 if (ret
!= (size_t) -1)
637 wind_iconv_cp (rc_uint_type cp
)
639 const local_iconv_map
*lim
= wind_find_codepage_info (cp
);
643 return lim
->iconv_name
;
645 #endif /* HAVE_ICONV_H */
648 wind_MultiByteToWideChar (rc_uint_type cp
, const char *mb
,
649 unichar
*u
, rc_uint_type u_len
)
651 rc_uint_type ret
= 0;
653 #if defined (_WIN32) || defined (__CYGWIN__)
654 ret
= (rc_uint_type
) MultiByteToWideChar (cp
, MB_PRECOMPOSED
,
656 /* Convert to bytes. */
657 ret
*= sizeof (unichar
);
659 #elif defined (HAVE_ICONV_H)
663 const char *iconv_name
= wind_iconv_cp (cp
);
665 if (!mb
|| !iconv_name
)
667 iconv_t cd
= iconv_open ("UTF-16", iconv_name
);
676 iret
= iconv_onechar (cd
, (const char *) mb
, p_tmp
, 32, & n_mb
, & n_tmp
);
684 size_t l_tmp
= (size_t) (n_tmp
- p_tmp
);
688 if ((size_t) u_len
< l_tmp
)
690 memcpy (u
, tmp
, l_tmp
);
698 if (tmp
[0] == 0 && tmp
[1] == 0)
706 ret
= strlen (mb
) + 1;
707 ret
*= sizeof (unichar
);
708 if (u
!= NULL
&& u_len
!= 0)
712 *u
++ = ((unichar
) *mb
) & 0xff;
715 while (u_len
!= 0 && mb
[-1] != 0);
717 if (u
!= NULL
&& u_len
!= 0)
724 wind_WideCharToMultiByte (rc_uint_type cp
, const unichar
*u
, char *mb
, rc_uint_type mb_len
)
726 rc_uint_type ret
= 0;
727 #if defined (_WIN32) || defined (__CYGWIN__)
728 WINBOOL used_def
= FALSE
;
730 ret
= (rc_uint_type
) WideCharToMultiByte (cp
, 0, u
, -1, mb
, mb_len
,
732 #elif defined (HAVE_ICONV_H)
736 const char *iconv_name
= wind_iconv_cp (cp
);
738 if (!u
|| !iconv_name
)
740 iconv_t cd
= iconv_open (iconv_name
, "UTF-16");
749 iret
= iconv_onechar (cd
, (const char *) u
, p_tmp
, 32, &n_u
, & n_tmp
);
757 size_t l_tmp
= (size_t) (n_tmp
- p_tmp
);
761 if ((size_t) mb_len
< l_tmp
)
763 memcpy (mb
, tmp
, l_tmp
);
773 u
= (const unichar
*) n_u
;
787 while (*u
!= 0 && mb_len
!= 0)
789 if (u
[0] == (u
[0] & 0x7f))