Commit | Line | Data |
---|---|---|
252b5132 | 1 | /* winduni.c -- unicode support for the windres program. |
250d07de | 2 | Copyright (C) 1997-2021 Free Software Foundation, Inc. |
252b5132 | 3 | Written by Ian Lance Taylor, Cygnus Support. |
4a594fce | 4 | Rewritten by Kai Tietz, Onevision. |
252b5132 RH |
5 | |
6 | This file is part of GNU Binutils. | |
7 | ||
8 | This program is free software; you can redistribute it and/or modify | |
9 | it under the terms of the GNU General Public License as published by | |
32866df7 | 10 | the Free Software Foundation; either version 3 of the License, or |
252b5132 RH |
11 | (at your option) any later version. |
12 | ||
13 | This program is distributed in the hope that it will be useful, | |
14 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 | GNU General Public License for more details. | |
17 | ||
18 | You should have received a copy of the GNU General Public License | |
19 | along with this program; if not, write to the Free Software | |
b43b5d5f NC |
20 | Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA |
21 | 02110-1301, USA. */ | |
252b5132 | 22 | |
32866df7 | 23 | |
252b5132 RH |
24 | /* This file contains unicode support routines for the windres |
25 | program. Ideally, we would have generic unicode support which | |
26 | would work on all systems. However, we don't. Instead, on a | |
27 | Windows host, we are prepared to call some Windows routines. This | |
28 | means that we will generate different output on Windows and Unix | |
29 | hosts, but that seems better than not really supporting unicode at | |
30 | all. */ | |
31 | ||
3db64b00 | 32 | #include "sysdep.h" |
252b5132 | 33 | #include "bfd.h" |
5ba684e2 | 34 | #include "libiberty.h" /* for xstrdup */ |
4a594fce | 35 | #include "bucomm.h" |
5ba684e2 NC |
36 | /* Must be include before windows.h and winnls.h. */ |
37 | #if defined (_WIN32) || defined (__CYGWIN__) | |
38 | #include <windows.h> | |
39 | #include <winnls.h> | |
40 | #endif | |
252b5132 | 41 | #include "winduni.h" |
3882b010 | 42 | #include "safe-ctype.h" |
252b5132 | 43 | |
6f4c2146 | 44 | #if HAVE_ICONV |
5ba684e2 | 45 | #include <iconv.h> |
252b5132 RH |
46 | #endif |
47 | ||
5ba684e2 NC |
48 | static rc_uint_type wind_WideCharToMultiByte (rc_uint_type, const unichar *, char *, rc_uint_type); |
49 | static rc_uint_type wind_MultiByteToWideChar (rc_uint_type, const char *, unichar *, rc_uint_type); | |
4a594fce NC |
50 | static int unichar_isascii (const unichar *, rc_uint_type); |
51 | ||
d856f2dd NC |
52 | /* Convert an ASCII string to a unicode string. We just copy it, |
53 | expanding chars to shorts, rather than doing something intelligent. */ | |
a2b903b7 | 54 | |
5ba684e2 NC |
55 | #if !defined (_WIN32) && !defined (__CYGWIN__) |
56 | ||
57 | /* Codepages mapped. */ | |
58 | static local_iconv_map codepages[] = | |
59 | { | |
54387c7f | 60 | { 0, "cp1252" }, |
5ba684e2 NC |
61 | { 1, "WINDOWS-1252" }, |
62 | { 437, "MS-ANSI" }, | |
63 | { 737, "MS-GREEK" }, | |
64 | { 775, "WINBALTRIM" }, | |
65 | { 850, "MS-ANSI" }, | |
66 | { 852, "MS-EE" }, | |
67 | { 857, "MS-TURK" }, | |
68 | { 862, "CP862" }, | |
69 | { 864, "CP864" }, | |
70 | { 866, "MS-CYRL" }, | |
71 | { 874, "WINDOWS-874" }, | |
72 | { 932, "CP932" }, | |
73 | { 936, "CP936" }, | |
74 | { 949, "CP949" }, | |
75 | { 950, "CP950" }, | |
76 | { 1250, "WINDOWS-1250" }, | |
77 | { 1251, "WINDOWS-1251" }, | |
78 | { 1252, "WINDOWS-1252" }, | |
79 | { 1253, "WINDOWS-1253" }, | |
80 | { 1254, "WINDOWS-1254" }, | |
81 | { 1255, "WINDOWS-1255" }, | |
82 | { 1256, "WINDOWS-1256" }, | |
83 | { 1257, "WINDOWS-1257" }, | |
84 | { 1258, "WINDOWS-1258" }, | |
85 | { CP_UTF7, "UTF-7" }, | |
86 | { CP_UTF8, "UTF-8" }, | |
7e93ea4b | 87 | { CP_UTF16, "UTF-16LE" }, |
5ba684e2 NC |
88 | { (rc_uint_type) -1, NULL } |
89 | }; | |
90 | ||
91 | /* Languages supported. */ | |
92 | static const wind_language_t languages[] = | |
93 | { | |
94 | { 0x0000, 437, 1252, "Neutral", "Neutral" }, | |
95 | { 0x0401, 864, 1256, "Arabic", "Saudi Arabia" }, { 0x0402, 866, 1251, "Bulgarian", "Bulgaria" }, | |
96 | { 0x0403, 850, 1252, "Catalan", "Spain" }, { 0x0404, 950, 950, "Chinese", "Taiwan" }, | |
97 | { 0x0405, 852, 1250, "Czech", "Czech Republic" }, { 0x0406, 850, 1252, "Danish", "Denmark" }, | |
98 | { 0x0407, 850, 1252, "German", "Germany" }, { 0x0408, 737, 1253, "Greek", "Greece" }, | |
99 | { 0x0409, 437, 1252, "English", "United States" }, { 0x040A, 850, 1252, "Spanish - Traditional Sort", "Spain" }, | |
100 | { 0x040B, 850, 1252, "Finnish", "Finland" }, { 0x040C, 850, 1252, "French", "France" }, | |
101 | { 0x040D, 862, 1255, "Hebrew", "Israel" }, { 0x040E, 852, 1250, "Hungarian", "Hungary" }, | |
102 | { 0x040F, 850, 1252, "Icelandic", "Iceland" }, { 0x0410, 850, 1252, "Italian", "Italy" }, | |
103 | { 0x0411, 932, 932, "Japanese", "Japan" }, { 0x0412, 949, 949, "Korean", "Korea (south)" }, | |
a2b903b7 | 104 | { 0x0413, 850, 1252, "Dutch", "Netherlands" }, { 0x0414, 850, 1252, "Norwegian (Bokm\345l)", "Norway" }, |
5ba684e2 NC |
105 | { 0x0415, 852, 1250, "Polish", "Poland" }, { 0x0416, 850, 1252, "Portuguese", "Brazil" }, |
106 | { 0x0418, 852, 1250, "Romanian", "Romania" }, { 0x0419, 866, 1251, "Russian", "Russia" }, | |
107 | { 0x041A, 852, 1250, "Croatian", "Croatia" }, { 0x041B, 852, 1250, "Slovak", "Slovakia" }, | |
108 | { 0x041C, 852, 1250, "Albanian", "Albania" }, { 0x041D, 850, 1252, "Swedish", "Sweden" }, | |
109 | { 0x041E, 874, 874, "Thai", "Thailand" }, { 0x041F, 857, 1254, "Turkish", "Turkey" }, | |
110 | { 0x0421, 850, 1252, "Indonesian", "Indonesia" }, { 0x0422, 866, 1251, "Ukrainian", "Ukraine" }, | |
111 | { 0x0423, 866, 1251, "Belarusian", "Belarus" }, { 0x0424, 852, 1250, "Slovene", "Slovenia" }, | |
112 | { 0x0425, 775, 1257, "Estonian", "Estonia" }, { 0x0426, 775, 1257, "Latvian", "Latvia" }, | |
113 | { 0x0427, 775, 1257, "Lithuanian", "Lithuania" }, | |
114 | { 0x0429, 864, 1256, "Arabic", "Farsi" }, { 0x042A,1258, 1258, "Vietnamese", "Vietnam" }, | |
115 | { 0x042D, 850, 1252, "Basque", "Spain" }, | |
116 | { 0x042F, 866, 1251, "Macedonian", "Former Yugoslav Republic of Macedonia" }, | |
117 | { 0x0436, 850, 1252, "Afrikaans", "South Africa" }, | |
118 | { 0x0438, 850, 1252, "Faroese", "Faroe Islands" }, | |
119 | { 0x043C, 437, 1252, "Irish", "Ireland" }, | |
120 | { 0x043E, 850, 1252, "Malay", "Malaysia" }, | |
121 | { 0x0801, 864, 1256, "Arabic", "Iraq" }, | |
122 | { 0x0804, 936, 936, "Chinese (People's republic of China)", "People's republic of China" }, | |
123 | { 0x0807, 850, 1252, "German", "Switzerland" }, | |
124 | { 0x0809, 850, 1252, "English", "United Kingdom" }, { 0x080A, 850, 1252, "Spanish", "Mexico" }, | |
125 | { 0x080C, 850, 1252, "French", "Belgium" }, | |
126 | { 0x0810, 850, 1252, "Italian", "Switzerland" }, | |
127 | { 0x0813, 850, 1252, "Dutch", "Belgium" }, { 0x0814, 850, 1252, "Norwegian (Nynorsk)", "Norway" }, | |
128 | { 0x0816, 850, 1252, "Portuguese", "Portugal" }, | |
129 | { 0x081A, 852, 1252, "Serbian (latin)", "Yugoslavia" }, | |
130 | { 0x081D, 850, 1252, "Swedish (Finland)", "Finland" }, | |
131 | { 0x0C01, 864, 1256, "Arabic", "Egypt" }, | |
132 | { 0x0C04, 950, 950, "Chinese", "Hong Kong" }, | |
133 | { 0x0C07, 850, 1252, "German", "Austria" }, | |
134 | { 0x0C09, 850, 1252, "English", "Australia" }, { 0x0C0A, 850, 1252, "Spanish - International Sort", "Spain" }, | |
135 | { 0x0C0C, 850, 1252, "French", "Canada"}, | |
136 | { 0x0C1A, 855, 1251, "Serbian (Cyrillic)", "Serbia" }, | |
137 | { 0x1001, 864, 1256, "Arabic", "Libya" }, | |
138 | { 0x1004, 936, 936, "Chinese", "Singapore" }, | |
139 | { 0x1007, 850, 1252, "German", "Luxembourg" }, | |
140 | { 0x1009, 850, 1252, "English", "Canada" }, | |
141 | { 0x100A, 850, 1252, "Spanish", "Guatemala" }, | |
142 | { 0x100C, 850, 1252, "French", "Switzerland" }, | |
143 | { 0x1401, 864, 1256, "Arabic", "Algeria" }, | |
144 | { 0x1407, 850, 1252, "German", "Liechtenstein" }, | |
145 | { 0x1409, 850, 1252, "English", "New Zealand" }, { 0x140A, 850, 1252, "Spanish", "Costa Rica" }, | |
146 | { 0x140C, 850, 1252, "French", "Luxembourg" }, | |
147 | { 0x1801, 864, 1256, "Arabic", "Morocco" }, | |
148 | { 0x1809, 850, 1252, "English", "Ireland" }, { 0x180A, 850, 1252, "Spanish", "Panama" }, | |
149 | { 0x180C, 850, 1252, "French", "Monaco" }, | |
150 | { 0x1C01, 864, 1256, "Arabic", "Tunisia" }, | |
151 | { 0x1C09, 437, 1252, "English", "South Africa" }, { 0x1C0A, 850, 1252, "Spanish", "Dominican Republic" }, | |
152 | { 0x2001, 864, 1256, "Arabic", "Oman" }, | |
153 | { 0x2009, 850, 1252, "English", "Jamaica" }, { 0x200A, 850, 1252, "Spanish", "Venezuela" }, | |
154 | { 0x2401, 864, 1256, "Arabic", "Yemen" }, | |
155 | { 0x2409, 850, 1252, "English", "Caribbean" }, { 0x240A, 850, 1252, "Spanish", "Colombia" }, | |
156 | { 0x2801, 864, 1256, "Arabic", "Syria" }, | |
157 | { 0x2809, 850, 1252, "English", "Belize" }, { 0x280A, 850, 1252, "Spanish", "Peru" }, | |
158 | { 0x2C01, 864, 1256, "Arabic", "Jordan" }, | |
159 | { 0x2C09, 437, 1252, "English", "Trinidad & Tobago" },{ 0x2C0A, 850, 1252, "Spanish", "Argentina" }, | |
160 | { 0x3001, 864, 1256, "Arabic", "Lebanon" }, | |
161 | { 0x3009, 437, 1252, "English", "Zimbabwe" }, { 0x300A, 850, 1252, "Spanish", "Ecuador" }, | |
162 | { 0x3401, 864, 1256, "Arabic", "Kuwait" }, | |
163 | { 0x3409, 437, 1252, "English", "Philippines" }, { 0x340A, 850, 1252, "Spanish", "Chile" }, | |
164 | { 0x3801, 864, 1256, "Arabic", "United Arab Emirates" }, | |
165 | { 0x380A, 850, 1252, "Spanish", "Uruguay" }, | |
166 | { 0x3C01, 864, 1256, "Arabic", "Bahrain" }, | |
167 | { 0x3C0A, 850, 1252, "Spanish", "Paraguay" }, | |
168 | { 0x4001, 864, 1256, "Arabic", "Qatar" }, | |
169 | { 0x400A, 850, 1252, "Spanish", "Bolivia" }, | |
170 | { 0x440A, 850, 1252, "Spanish", "El Salvador" }, | |
171 | { 0x480A, 850, 1252, "Spanish", "Honduras" }, | |
172 | { 0x4C0A, 850, 1252, "Spanish", "Nicaragua" }, | |
173 | { 0x500A, 850, 1252, "Spanish", "Puerto Rico" }, | |
174 | { (unsigned) -1, 0, 0, NULL, NULL } | |
175 | }; | |
176 | ||
177 | #endif | |
178 | ||
d856f2dd NC |
179 | /* Specifies the default codepage to be used for unicode |
180 | transformations. By default this is CP_ACP. */ | |
181 | rc_uint_type wind_default_codepage = CP_ACP; | |
182 | ||
183 | /* Specifies the currently used codepage for unicode | |
184 | transformations. By default this is CP_ACP. */ | |
185 | rc_uint_type wind_current_codepage = CP_ACP; | |
186 | ||
252b5132 RH |
187 | /* Convert an ASCII string to a unicode string. We just copy it, |
188 | expanding chars to shorts, rather than doing something intelligent. */ | |
189 | ||
190 | void | |
4a594fce | 191 | unicode_from_ascii (rc_uint_type *length, unichar **unicode, const char *ascii) |
252b5132 | 192 | { |
d856f2dd | 193 | unicode_from_codepage (length, unicode, ascii, wind_current_codepage); |
4a594fce NC |
194 | } |
195 | ||
81472056 KT |
196 | /* Convert an ASCII string with length A_LENGTH to a unicode string. We just |
197 | copy it, expanding chars to shorts, rather than doing something intelligent. | |
198 | This routine converts also \0 within a string. */ | |
199 | ||
200 | void | |
201 | unicode_from_ascii_len (rc_uint_type *length, unichar **unicode, const char *ascii, rc_uint_type a_length) | |
202 | { | |
203 | char *tmp, *p; | |
204 | rc_uint_type tlen, elen, idx = 0; | |
205 | ||
206 | *unicode = NULL; | |
207 | ||
208 | if (!a_length) | |
209 | { | |
210 | if (length) | |
211 | *length = 0; | |
212 | return; | |
213 | } | |
214 | ||
215 | /* Make sure we have zero terminated string. */ | |
e1fa0163 | 216 | p = tmp = (char *) xmalloc (a_length + 1); |
81472056 KT |
217 | memcpy (tmp, ascii, a_length); |
218 | tmp[a_length] = 0; | |
219 | ||
220 | while (a_length > 0) | |
221 | { | |
222 | unichar *utmp, *up; | |
223 | ||
224 | tlen = strlen (p); | |
225 | ||
226 | if (tlen > a_length) | |
227 | tlen = a_length; | |
228 | if (*p == 0) | |
229 | { | |
230 | /* Make room for one more character. */ | |
231 | utmp = (unichar *) res_alloc (sizeof (unichar) * (idx + 1)); | |
232 | if (idx > 0) | |
233 | { | |
234 | memcpy (utmp, *unicode, idx * sizeof (unichar)); | |
235 | } | |
236 | *unicode = utmp; | |
237 | utmp[idx++] = 0; | |
238 | --a_length; | |
239 | p++; | |
240 | continue; | |
241 | } | |
242 | utmp = NULL; | |
243 | elen = 0; | |
244 | elen = wind_MultiByteToWideChar (wind_current_codepage, p, NULL, 0); | |
245 | if (elen) | |
246 | { | |
247 | utmp = ((unichar *) res_alloc (elen + sizeof (unichar) * 2)); | |
248 | wind_MultiByteToWideChar (wind_current_codepage, p, utmp, elen); | |
249 | elen /= sizeof (unichar); | |
250 | elen --; | |
251 | } | |
252 | else | |
253 | { | |
254 | /* Make room for one more character. */ | |
255 | utmp = (unichar *) res_alloc (sizeof (unichar) * (idx + 1)); | |
256 | if (idx > 0) | |
257 | { | |
258 | memcpy (utmp, *unicode, idx * sizeof (unichar)); | |
259 | } | |
260 | *unicode = utmp; | |
261 | utmp[idx++] = ((unichar) *p) & 0xff; | |
262 | --a_length; | |
263 | p++; | |
264 | continue; | |
265 | } | |
266 | p += tlen; | |
267 | a_length -= tlen; | |
268 | ||
269 | up = (unichar *) res_alloc (sizeof (unichar) * (idx + elen)); | |
270 | if (idx > 0) | |
271 | memcpy (up, *unicode, idx * sizeof (unichar)); | |
272 | ||
273 | *unicode = up; | |
274 | if (elen) | |
275 | memcpy (&up[idx], utmp, sizeof (unichar) * elen); | |
276 | ||
277 | idx += elen; | |
278 | } | |
279 | ||
280 | if (length) | |
281 | *length = idx; | |
e1fa0163 NC |
282 | |
283 | free (tmp); | |
81472056 KT |
284 | } |
285 | ||
4a594fce NC |
286 | /* Convert an unicode string to an ASCII string. We just copy it, |
287 | shrink shorts to chars, rather than doing something intelligent. | |
288 | Shorts with not within the char range are replaced by '_'. */ | |
289 | ||
290 | void | |
291 | ascii_from_unicode (rc_uint_type *length, const unichar *unicode, char **ascii) | |
292 | { | |
d856f2dd | 293 | codepage_from_unicode (length, unicode, ascii, wind_current_codepage); |
252b5132 RH |
294 | } |
295 | ||
296 | /* Print the unicode string UNICODE to the file E. LENGTH is the | |
297 | number of characters to print, or -1 if we should print until the | |
298 | end of the string. FIXME: On a Windows host, we should be calling | |
299 | some Windows function, probably WideCharToMultiByte. */ | |
300 | ||
301 | void | |
4a594fce | 302 | unicode_print (FILE *e, const unichar *unicode, rc_uint_type length) |
252b5132 RH |
303 | { |
304 | while (1) | |
305 | { | |
306 | unichar ch; | |
307 | ||
308 | if (length == 0) | |
309 | return; | |
4a594fce | 310 | if ((bfd_signed_vma) length > 0) |
252b5132 RH |
311 | --length; |
312 | ||
313 | ch = *unicode; | |
314 | ||
4a594fce | 315 | if (ch == 0 && (bfd_signed_vma) length < 0) |
252b5132 RH |
316 | return; |
317 | ||
318 | ++unicode; | |
319 | ||
320 | if ((ch & 0x7f) == ch) | |
321 | { | |
322 | if (ch == '\\') | |
4a594fce NC |
323 | fputs ("\\\\", e); |
324 | else if (ch == '"') | |
325 | fputs ("\"\"", e); | |
3882b010 | 326 | else if (ISPRINT (ch)) |
252b5132 RH |
327 | putc (ch, e); |
328 | else | |
329 | { | |
330 | switch (ch) | |
331 | { | |
332 | case ESCAPE_A: | |
333 | fputs ("\\a", e); | |
334 | break; | |
335 | ||
336 | case ESCAPE_B: | |
337 | fputs ("\\b", e); | |
338 | break; | |
339 | ||
340 | case ESCAPE_F: | |
341 | fputs ("\\f", e); | |
342 | break; | |
343 | ||
344 | case ESCAPE_N: | |
345 | fputs ("\\n", e); | |
346 | break; | |
347 | ||
348 | case ESCAPE_R: | |
349 | fputs ("\\r", e); | |
350 | break; | |
351 | ||
352 | case ESCAPE_T: | |
353 | fputs ("\\t", e); | |
354 | break; | |
355 | ||
356 | case ESCAPE_V: | |
357 | fputs ("\\v", e); | |
358 | break; | |
359 | ||
360 | default: | |
361 | fprintf (e, "\\%03o", (unsigned int) ch); | |
362 | break; | |
363 | } | |
364 | } | |
365 | } | |
366 | else if ((ch & 0xff) == ch) | |
367 | fprintf (e, "\\%03o", (unsigned int) ch); | |
368 | else | |
d856f2dd | 369 | fprintf (e, "\\x%04x", (unsigned int) ch); |
252b5132 RH |
370 | } |
371 | } | |
4a594fce NC |
372 | |
373 | /* Print a unicode string to a file. */ | |
5ba684e2 | 374 | |
4a594fce NC |
375 | void |
376 | ascii_print (FILE *e, const char *s, rc_uint_type length) | |
377 | { | |
378 | while (1) | |
379 | { | |
380 | char ch; | |
381 | ||
382 | if (length == 0) | |
383 | return; | |
384 | if ((bfd_signed_vma) length > 0) | |
385 | --length; | |
386 | ||
387 | ch = *s; | |
388 | ||
389 | if (ch == 0 && (bfd_signed_vma) length < 0) | |
390 | return; | |
391 | ||
392 | ++s; | |
393 | ||
394 | if ((ch & 0x7f) == ch) | |
395 | { | |
396 | if (ch == '\\') | |
397 | fputs ("\\\\", e); | |
398 | else if (ch == '"') | |
399 | fputs ("\"\"", e); | |
400 | else if (ISPRINT (ch)) | |
401 | putc (ch, e); | |
402 | else | |
403 | { | |
404 | switch (ch) | |
405 | { | |
406 | case ESCAPE_A: | |
407 | fputs ("\\a", e); | |
408 | break; | |
409 | ||
410 | case ESCAPE_B: | |
411 | fputs ("\\b", e); | |
412 | break; | |
413 | ||
414 | case ESCAPE_F: | |
415 | fputs ("\\f", e); | |
416 | break; | |
417 | ||
418 | case ESCAPE_N: | |
419 | fputs ("\\n", e); | |
420 | break; | |
421 | ||
422 | case ESCAPE_R: | |
423 | fputs ("\\r", e); | |
424 | break; | |
425 | ||
426 | case ESCAPE_T: | |
427 | fputs ("\\t", e); | |
428 | break; | |
429 | ||
430 | case ESCAPE_V: | |
431 | fputs ("\\v", e); | |
432 | break; | |
433 | ||
434 | default: | |
435 | fprintf (e, "\\%03o", (unsigned int) ch); | |
436 | break; | |
437 | } | |
438 | } | |
439 | } | |
440 | else | |
441 | fprintf (e, "\\%03o", (unsigned int) ch & 0xff); | |
442 | } | |
443 | } | |
444 | ||
445 | rc_uint_type | |
446 | unichar_len (const unichar *unicode) | |
447 | { | |
448 | rc_uint_type r = 0; | |
5ba684e2 | 449 | |
4a594fce NC |
450 | if (unicode) |
451 | while (unicode[r] != 0) | |
452 | r++; | |
453 | else | |
454 | --r; | |
455 | return r; | |
456 | } | |
457 | ||
458 | unichar * | |
459 | unichar_dup (const unichar *unicode) | |
460 | { | |
461 | unichar *r; | |
462 | int len; | |
463 | ||
464 | if (! unicode) | |
465 | return NULL; | |
466 | for (len = 0; unicode[len] != 0; ++len) | |
467 | ; | |
468 | ++len; | |
469 | r = ((unichar *) res_alloc (len * sizeof (unichar))); | |
470 | memcpy (r, unicode, len * sizeof (unichar)); | |
471 | return r; | |
472 | } | |
473 | ||
474 | unichar * | |
475 | unichar_dup_uppercase (const unichar *u) | |
476 | { | |
477 | unichar *r = unichar_dup (u); | |
478 | int i; | |
479 | ||
480 | if (! r) | |
481 | return NULL; | |
482 | ||
483 | for (i = 0; r[i] != 0; ++i) | |
484 | { | |
485 | if (r[i] >= 'a' && r[i] <= 'z') | |
486 | r[i] &= 0xdf; | |
487 | } | |
488 | return r; | |
489 | } | |
490 | ||
491 | static int | |
492 | unichar_isascii (const unichar *u, rc_uint_type len) | |
493 | { | |
494 | rc_uint_type i; | |
5ba684e2 | 495 | |
4a594fce NC |
496 | if ((bfd_signed_vma) len < 0) |
497 | { | |
498 | if (u) | |
499 | len = (rc_uint_type) unichar_len (u); | |
500 | else | |
501 | len = 0; | |
502 | } | |
503 | ||
504 | for (i = 0; i < len; i++) | |
505 | if ((u[i] & 0xff80) != 0) | |
506 | return 0; | |
507 | return 1; | |
508 | } | |
509 | ||
510 | void | |
511 | unicode_print_quoted (FILE *e, const unichar *u, rc_uint_type len) | |
512 | { | |
513 | if (! unichar_isascii (u, len)) | |
514 | fputc ('L', e); | |
515 | fputc ('"', e); | |
516 | unicode_print (e, u, len); | |
517 | fputc ('"', e); | |
518 | } | |
5ba684e2 NC |
519 | |
520 | int | |
521 | unicode_is_valid_codepage (rc_uint_type cp) | |
522 | { | |
523 | if ((cp & 0xffff) != cp) | |
524 | return 0; | |
525 | if (cp == CP_UTF16 || cp == CP_ACP) | |
526 | return 1; | |
527 | ||
528 | #if !defined (_WIN32) && !defined (__CYGWIN__) | |
529 | if (! wind_find_codepage_info (cp)) | |
530 | return 0; | |
531 | return 1; | |
532 | #else | |
533 | return !! IsValidCodePage ((UINT) cp); | |
534 | #endif | |
535 | } | |
536 | ||
537 | #if defined (_WIN32) || defined (__CYGWIN__) | |
538 | ||
539 | #define max_cp_string_len 6 | |
540 | ||
541 | static unsigned int | |
542 | codepage_from_langid (unsigned short langid) | |
543 | { | |
544 | char cp_string [max_cp_string_len]; | |
545 | int c; | |
546 | ||
547 | memset (cp_string, 0, max_cp_string_len); | |
548 | /* LOCALE_RETURN_NUMBER flag would avoid strtoul conversion, | |
549 | but is unavailable on Win95. */ | |
550 | c = GetLocaleInfoA (MAKELCID (langid, SORT_DEFAULT), | |
551 | LOCALE_IDEFAULTANSICODEPAGE, | |
552 | cp_string, max_cp_string_len); | |
553 | /* If codepage data for an LCID is not installed on users's system, | |
554 | GetLocaleInfo returns an empty string. Fall back to system ANSI | |
555 | default. */ | |
556 | if (c == 0) | |
557 | return CP_ACP; | |
558 | return strtoul (cp_string, 0, 10); | |
559 | } | |
560 | ||
561 | static unsigned int | |
562 | wincodepage_from_langid (unsigned short langid) | |
563 | { | |
564 | char cp_string [max_cp_string_len]; | |
565 | int c; | |
566 | ||
567 | memset (cp_string, 0, max_cp_string_len); | |
568 | /* LOCALE_RETURN_NUMBER flag would avoid strtoul conversion, | |
569 | but is unavailable on Win95. */ | |
570 | c = GetLocaleInfoA (MAKELCID (langid, SORT_DEFAULT), | |
571 | LOCALE_IDEFAULTCODEPAGE, | |
572 | cp_string, max_cp_string_len); | |
573 | /* If codepage data for an LCID is not installed on users's system, | |
574 | GetLocaleInfo returns an empty string. Fall back to system ANSI | |
575 | default. */ | |
576 | if (c == 0) | |
577 | return CP_OEM; | |
578 | return strtoul (cp_string, 0, 10); | |
579 | } | |
580 | ||
581 | static char * | |
582 | lang_from_langid (unsigned short langid) | |
583 | { | |
584 | char cp_string[261]; | |
585 | int c; | |
586 | ||
587 | memset (cp_string, 0, 261); | |
588 | c = GetLocaleInfoA (MAKELCID (langid, SORT_DEFAULT), | |
589 | LOCALE_SENGLANGUAGE, | |
590 | cp_string, 260); | |
591 | /* If codepage data for an LCID is not installed on users's system, | |
592 | GetLocaleInfo returns an empty string. Fall back to system ANSI | |
593 | default. */ | |
594 | if (c == 0) | |
595 | strcpy (cp_string, "Neutral"); | |
596 | return xstrdup (cp_string); | |
597 | } | |
598 | ||
599 | static char * | |
600 | country_from_langid (unsigned short langid) | |
601 | { | |
602 | char cp_string[261]; | |
603 | int c; | |
604 | ||
605 | memset (cp_string, 0, 261); | |
606 | c = GetLocaleInfoA (MAKELCID (langid, SORT_DEFAULT), | |
607 | LOCALE_SENGCOUNTRY, | |
608 | cp_string, 260); | |
609 | /* If codepage data for an LCID is not installed on users's system, | |
610 | GetLocaleInfo returns an empty string. Fall back to system ANSI | |
611 | default. */ | |
612 | if (c == 0) | |
613 | strcpy (cp_string, "Neutral"); | |
614 | return xstrdup (cp_string); | |
615 | } | |
616 | ||
617 | #endif | |
618 | ||
619 | const wind_language_t * | |
620 | wind_find_language_by_id (unsigned id) | |
621 | { | |
622 | #if !defined (_WIN32) && !defined (__CYGWIN__) | |
623 | int i; | |
624 | ||
625 | if (! id) | |
626 | return NULL; | |
627 | for (i = 0; languages[i].id != (unsigned) -1 && languages[i].id != id; i++) | |
628 | ; | |
629 | if (languages[i].id == id) | |
630 | return &languages[i]; | |
631 | return NULL; | |
632 | #else | |
633 | static wind_language_t wl; | |
634 | ||
635 | wl.id = id; | |
636 | wl.doscp = codepage_from_langid ((unsigned short) id); | |
637 | wl.wincp = wincodepage_from_langid ((unsigned short) id); | |
638 | wl.name = lang_from_langid ((unsigned short) id); | |
639 | wl.country = country_from_langid ((unsigned short) id); | |
640 | ||
641 | return & wl; | |
642 | #endif | |
643 | } | |
644 | ||
645 | const local_iconv_map * | |
646 | wind_find_codepage_info (unsigned cp) | |
647 | { | |
648 | #if !defined (_WIN32) && !defined (__CYGWIN__) | |
649 | int i; | |
650 | ||
651 | for (i = 0; codepages[i].codepage != (rc_uint_type) -1 && codepages[i].codepage != cp; i++) | |
652 | ; | |
653 | if (codepages[i].codepage == (rc_uint_type) -1) | |
654 | return NULL; | |
655 | return &codepages[i]; | |
656 | #else | |
657 | static local_iconv_map lim; | |
658 | if (!unicode_is_valid_codepage (cp)) | |
659 | return NULL; | |
660 | lim.codepage = cp; | |
661 | lim.iconv_name = ""; | |
662 | return & lim; | |
663 | #endif | |
664 | } | |
665 | ||
666 | /* Convert an Codepage string to a unicode string. */ | |
667 | ||
668 | void | |
669 | unicode_from_codepage (rc_uint_type *length, unichar **u, const char *src, rc_uint_type cp) | |
670 | { | |
671 | rc_uint_type len; | |
672 | ||
673 | len = wind_MultiByteToWideChar (cp, src, NULL, 0); | |
674 | if (len) | |
675 | { | |
676 | *u = ((unichar *) res_alloc (len)); | |
677 | wind_MultiByteToWideChar (cp, src, *u, len); | |
678 | } | |
679 | /* Discount the trailing '/0'. If MultiByteToWideChar failed, | |
680 | this will set *length to -1. */ | |
681 | len -= sizeof (unichar); | |
682 | ||
683 | if (length != NULL) | |
684 | *length = len / sizeof (unichar); | |
685 | } | |
686 | ||
687 | /* Convert an unicode string to an codepage string. */ | |
688 | ||
689 | void | |
690 | codepage_from_unicode (rc_uint_type *length, const unichar *unicode, char **ascii, rc_uint_type cp) | |
691 | { | |
692 | rc_uint_type len; | |
693 | ||
694 | len = wind_WideCharToMultiByte (cp, unicode, NULL, 0); | |
695 | if (len) | |
696 | { | |
697 | *ascii = (char *) res_alloc (len * sizeof (char)); | |
698 | wind_WideCharToMultiByte (cp, unicode, *ascii, len); | |
699 | } | |
700 | /* Discount the trailing '/0'. If MultiByteToWideChar failed, | |
701 | this will set *length to -1. */ | |
702 | len--; | |
703 | ||
704 | if (length != NULL) | |
705 | *length = len; | |
706 | } | |
707 | ||
34e8a6f7 | 708 | #if defined (HAVE_ICONV) && !defined (_WIN32) && !defined (__CYGWIN__) |
5ba684e2 | 709 | static int |
6f4c2146 | 710 | iconv_onechar (iconv_t cd, ICONV_CONST char *s, char *d, int d_len, const char **n_s, char **n_d) |
5ba684e2 NC |
711 | { |
712 | int i; | |
713 | ||
714 | for (i = 1; i <= 32; i++) | |
715 | { | |
716 | char *tmp_d = d; | |
6f4c2146 | 717 | ICONV_CONST char *tmp_s = s; |
5ba684e2 NC |
718 | size_t ret; |
719 | size_t s_left = (size_t) i; | |
720 | size_t d_left = (size_t) d_len; | |
721 | ||
722 | ret = iconv (cd, & tmp_s, & s_left, & tmp_d, & d_left); | |
723 | ||
724 | if (ret != (size_t) -1) | |
725 | { | |
726 | *n_s = tmp_s; | |
727 | *n_d = tmp_d; | |
728 | return 0; | |
729 | } | |
730 | } | |
731 | ||
732 | return 1; | |
733 | } | |
734 | ||
735 | static const char * | |
736 | wind_iconv_cp (rc_uint_type cp) | |
737 | { | |
738 | const local_iconv_map *lim = wind_find_codepage_info (cp); | |
739 | ||
740 | if (!lim) | |
741 | return NULL; | |
742 | return lim->iconv_name; | |
743 | } | |
6f4c2146 | 744 | #endif /* HAVE_ICONV */ |
5ba684e2 NC |
745 | |
746 | static rc_uint_type | |
747 | wind_MultiByteToWideChar (rc_uint_type cp, const char *mb, | |
748 | unichar *u, rc_uint_type u_len) | |
749 | { | |
750 | rc_uint_type ret = 0; | |
751 | ||
752 | #if defined (_WIN32) || defined (__CYGWIN__) | |
b882b028 NC |
753 | rc_uint_type conv_flags = MB_PRECOMPOSED; |
754 | ||
a2b903b7 | 755 | /* MB_PRECOMPOSED is not allowed for UTF-7 or UTF-8. |
b882b028 NC |
756 | MultiByteToWideChar will set the last error to |
757 | ERROR_INVALID_FLAGS if we do. */ | |
758 | if (cp == CP_UTF8 || cp == CP_UTF7) | |
759 | conv_flags = 0; | |
760 | ||
761 | ret = (rc_uint_type) MultiByteToWideChar (cp, conv_flags, | |
5ba684e2 NC |
762 | mb, -1, u, u_len); |
763 | /* Convert to bytes. */ | |
764 | ret *= sizeof (unichar); | |
765 | ||
6f4c2146 | 766 | #elif defined (HAVE_ICONV) |
5ba684e2 NC |
767 | int first = 1; |
768 | char tmp[32]; | |
769 | char *p_tmp; | |
770 | const char *iconv_name = wind_iconv_cp (cp); | |
771 | ||
772 | if (!mb || !iconv_name) | |
773 | return 0; | |
7e93ea4b | 774 | iconv_t cd = iconv_open ("UTF-16LE", iconv_name); |
5ba684e2 NC |
775 | |
776 | while (1) | |
777 | { | |
778 | int iret; | |
6f4c2146 NC |
779 | const char *n_mb = ""; |
780 | char *n_tmp = ""; | |
5ba684e2 NC |
781 | |
782 | p_tmp = tmp; | |
6f4c2146 | 783 | iret = iconv_onechar (cd, (ICONV_CONST char *) mb, p_tmp, 32, & n_mb, & n_tmp); |
5ba684e2 NC |
784 | if (first) |
785 | { | |
786 | first = 0; | |
787 | continue; | |
788 | } | |
789 | if (!iret) | |
790 | { | |
791 | size_t l_tmp = (size_t) (n_tmp - p_tmp); | |
792 | ||
793 | if (u) | |
794 | { | |
795 | if ((size_t) u_len < l_tmp) | |
796 | break; | |
797 | memcpy (u, tmp, l_tmp); | |
798 | u += l_tmp/2; | |
799 | u_len -= l_tmp; | |
800 | } | |
801 | ret += l_tmp; | |
802 | } | |
803 | else | |
804 | break; | |
805 | if (tmp[0] == 0 && tmp[1] == 0) | |
806 | break; | |
807 | mb = n_mb; | |
808 | } | |
809 | iconv_close (cd); | |
810 | #else | |
811 | if (cp) | |
812 | ret = 0; | |
813 | ret = strlen (mb) + 1; | |
814 | ret *= sizeof (unichar); | |
815 | if (u != NULL && u_len != 0) | |
816 | { | |
817 | do | |
818 | { | |
819 | *u++ = ((unichar) *mb) & 0xff; | |
820 | --u_len; mb++; | |
821 | } | |
822 | while (u_len != 0 && mb[-1] != 0); | |
823 | } | |
824 | if (u != NULL && u_len != 0) | |
825 | *u = 0; | |
826 | #endif | |
827 | return ret; | |
828 | } | |
829 | ||
830 | static rc_uint_type | |
831 | wind_WideCharToMultiByte (rc_uint_type cp, const unichar *u, char *mb, rc_uint_type mb_len) | |
832 | { | |
833 | rc_uint_type ret = 0; | |
834 | #if defined (_WIN32) || defined (__CYGWIN__) | |
835 | WINBOOL used_def = FALSE; | |
836 | ||
837 | ret = (rc_uint_type) WideCharToMultiByte (cp, 0, u, -1, mb, mb_len, | |
838 | NULL, & used_def); | |
6f4c2146 | 839 | #elif defined (HAVE_ICONV) |
5ba684e2 NC |
840 | int first = 1; |
841 | char tmp[32]; | |
842 | char *p_tmp; | |
843 | const char *iconv_name = wind_iconv_cp (cp); | |
844 | ||
845 | if (!u || !iconv_name) | |
846 | return 0; | |
7e93ea4b | 847 | iconv_t cd = iconv_open (iconv_name, "UTF-16LE"); |
5ba684e2 NC |
848 | |
849 | while (1) | |
850 | { | |
851 | int iret; | |
6f4c2146 NC |
852 | const char *n_u = ""; |
853 | char *n_tmp = ""; | |
5ba684e2 NC |
854 | |
855 | p_tmp = tmp; | |
6f4c2146 | 856 | iret = iconv_onechar (cd, (ICONV_CONST char *) u, p_tmp, 32, &n_u, & n_tmp); |
5ba684e2 NC |
857 | if (first) |
858 | { | |
859 | first = 0; | |
860 | continue; | |
861 | } | |
862 | if (!iret) | |
863 | { | |
864 | size_t l_tmp = (size_t) (n_tmp - p_tmp); | |
865 | ||
866 | if (mb) | |
867 | { | |
868 | if ((size_t) mb_len < l_tmp) | |
869 | break; | |
870 | memcpy (mb, tmp, l_tmp); | |
871 | mb += l_tmp; | |
872 | mb_len -= l_tmp; | |
873 | } | |
874 | ret += l_tmp; | |
875 | } | |
876 | else | |
877 | break; | |
878 | if (u[0] == 0) | |
879 | break; | |
880 | u = (const unichar *) n_u; | |
881 | } | |
882 | iconv_close (cd); | |
883 | #else | |
884 | if (cp) | |
885 | ret = 0; | |
886 | ||
887 | while (u[ret] != 0) | |
888 | ++ret; | |
889 | ||
890 | ++ret; | |
891 | ||
892 | if (mb) | |
893 | { | |
894 | while (*u != 0 && mb_len != 0) | |
895 | { | |
896 | if (u[0] == (u[0] & 0x7f)) | |
897 | *mb++ = (char) u[0]; | |
898 | else | |
899 | *mb++ = '_'; | |
900 | ++u; --mb_len; | |
901 | } | |
902 | if (mb_len != 0) | |
903 | *mb = 0; | |
904 | } | |
905 | #endif | |
906 | return ret; | |
907 | } |