2011-10-25 Kai Tietz <ktietz@redhat.com>
[deliverable/binutils-gdb.git] / binutils / winduni.c
1 /* winduni.c -- unicode support for the windres program.
2 Copyright 1997, 1998, 2000, 2001, 2003, 2005, 2007, 2009
3 Free Software Foundation, Inc.
4 Written by Ian Lance Taylor, Cygnus Support.
5 Rewritten by Kai Tietz, Onevision.
6
7 This file is part of GNU Binutils.
8
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or
12 (at your option) any later version.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
22 02110-1301, USA. */
23
24
25 /* This file contains unicode support routines for the windres
26 program. Ideally, we would have generic unicode support which
27 would work on all systems. However, we don't. Instead, on a
28 Windows host, we are prepared to call some Windows routines. This
29 means that we will generate different output on Windows and Unix
30 hosts, but that seems better than not really supporting unicode at
31 all. */
32
33 #include "sysdep.h"
34 #include "bfd.h"
35 #include "libiberty.h" /* for xstrdup */
36 #include "bucomm.h"
37 /* Must be include before windows.h and winnls.h. */
38 #if defined (_WIN32) || defined (__CYGWIN__)
39 #include <windows.h>
40 #include <winnls.h>
41 #endif
42 #include "winduni.h"
43 #include "safe-ctype.h"
44
45 #if HAVE_ICONV
46 #include <iconv.h>
47 #endif
48
49 static rc_uint_type wind_WideCharToMultiByte (rc_uint_type, const unichar *, char *, rc_uint_type);
50 static rc_uint_type wind_MultiByteToWideChar (rc_uint_type, const char *, unichar *, rc_uint_type);
51 static int unichar_isascii (const unichar *, rc_uint_type);
52
53 /* Convert an ASCII string to a unicode string. We just copy it,
54 expanding chars to shorts, rather than doing something intelligent. */
55
56 #if !defined (_WIN32) && !defined (__CYGWIN__)
57
58 /* Codepages mapped. */
59 static local_iconv_map codepages[] =
60 {
61 { 0, "MS-ANSI" },
62 { 1, "WINDOWS-1252" },
63 { 437, "MS-ANSI" },
64 { 737, "MS-GREEK" },
65 { 775, "WINBALTRIM" },
66 { 850, "MS-ANSI" },
67 { 852, "MS-EE" },
68 { 857, "MS-TURK" },
69 { 862, "CP862" },
70 { 864, "CP864" },
71 { 866, "MS-CYRL" },
72 { 874, "WINDOWS-874" },
73 { 932, "CP932" },
74 { 936, "CP936" },
75 { 949, "CP949" },
76 { 950, "CP950" },
77 { 1250, "WINDOWS-1250" },
78 { 1251, "WINDOWS-1251" },
79 { 1252, "WINDOWS-1252" },
80 { 1253, "WINDOWS-1253" },
81 { 1254, "WINDOWS-1254" },
82 { 1255, "WINDOWS-1255" },
83 { 1256, "WINDOWS-1256" },
84 { 1257, "WINDOWS-1257" },
85 { 1258, "WINDOWS-1258" },
86 { CP_UTF7, "UTF-7" },
87 { CP_UTF8, "UTF-8" },
88 { CP_UTF16, "UTF-16" },
89 { (rc_uint_type) -1, NULL }
90 };
91
92 /* Languages supported. */
93 static const wind_language_t languages[] =
94 {
95 { 0x0000, 437, 1252, "Neutral", "Neutral" },
96 { 0x0401, 864, 1256, "Arabic", "Saudi Arabia" }, { 0x0402, 866, 1251, "Bulgarian", "Bulgaria" },
97 { 0x0403, 850, 1252, "Catalan", "Spain" }, { 0x0404, 950, 950, "Chinese", "Taiwan" },
98 { 0x0405, 852, 1250, "Czech", "Czech Republic" }, { 0x0406, 850, 1252, "Danish", "Denmark" },
99 { 0x0407, 850, 1252, "German", "Germany" }, { 0x0408, 737, 1253, "Greek", "Greece" },
100 { 0x0409, 437, 1252, "English", "United States" }, { 0x040A, 850, 1252, "Spanish - Traditional Sort", "Spain" },
101 { 0x040B, 850, 1252, "Finnish", "Finland" }, { 0x040C, 850, 1252, "French", "France" },
102 { 0x040D, 862, 1255, "Hebrew", "Israel" }, { 0x040E, 852, 1250, "Hungarian", "Hungary" },
103 { 0x040F, 850, 1252, "Icelandic", "Iceland" }, { 0x0410, 850, 1252, "Italian", "Italy" },
104 { 0x0411, 932, 932, "Japanese", "Japan" }, { 0x0412, 949, 949, "Korean", "Korea (south)" },
105 { 0x0413, 850, 1252, "Dutch", "Netherlands" }, { 0x0414, 850, 1252, "Norwegian (Bokmål)", "Norway" },
106 { 0x0415, 852, 1250, "Polish", "Poland" }, { 0x0416, 850, 1252, "Portuguese", "Brazil" },
107 { 0x0418, 852, 1250, "Romanian", "Romania" }, { 0x0419, 866, 1251, "Russian", "Russia" },
108 { 0x041A, 852, 1250, "Croatian", "Croatia" }, { 0x041B, 852, 1250, "Slovak", "Slovakia" },
109 { 0x041C, 852, 1250, "Albanian", "Albania" }, { 0x041D, 850, 1252, "Swedish", "Sweden" },
110 { 0x041E, 874, 874, "Thai", "Thailand" }, { 0x041F, 857, 1254, "Turkish", "Turkey" },
111 { 0x0421, 850, 1252, "Indonesian", "Indonesia" }, { 0x0422, 866, 1251, "Ukrainian", "Ukraine" },
112 { 0x0423, 866, 1251, "Belarusian", "Belarus" }, { 0x0424, 852, 1250, "Slovene", "Slovenia" },
113 { 0x0425, 775, 1257, "Estonian", "Estonia" }, { 0x0426, 775, 1257, "Latvian", "Latvia" },
114 { 0x0427, 775, 1257, "Lithuanian", "Lithuania" },
115 { 0x0429, 864, 1256, "Arabic", "Farsi" }, { 0x042A,1258, 1258, "Vietnamese", "Vietnam" },
116 { 0x042D, 850, 1252, "Basque", "Spain" },
117 { 0x042F, 866, 1251, "Macedonian", "Former Yugoslav Republic of Macedonia" },
118 { 0x0436, 850, 1252, "Afrikaans", "South Africa" },
119 { 0x0438, 850, 1252, "Faroese", "Faroe Islands" },
120 { 0x043C, 437, 1252, "Irish", "Ireland" },
121 { 0x043E, 850, 1252, "Malay", "Malaysia" },
122 { 0x0801, 864, 1256, "Arabic", "Iraq" },
123 { 0x0804, 936, 936, "Chinese (People's republic of China)", "People's republic of China" },
124 { 0x0807, 850, 1252, "German", "Switzerland" },
125 { 0x0809, 850, 1252, "English", "United Kingdom" }, { 0x080A, 850, 1252, "Spanish", "Mexico" },
126 { 0x080C, 850, 1252, "French", "Belgium" },
127 { 0x0810, 850, 1252, "Italian", "Switzerland" },
128 { 0x0813, 850, 1252, "Dutch", "Belgium" }, { 0x0814, 850, 1252, "Norwegian (Nynorsk)", "Norway" },
129 { 0x0816, 850, 1252, "Portuguese", "Portugal" },
130 { 0x081A, 852, 1252, "Serbian (latin)", "Yugoslavia" },
131 { 0x081D, 850, 1252, "Swedish (Finland)", "Finland" },
132 { 0x0C01, 864, 1256, "Arabic", "Egypt" },
133 { 0x0C04, 950, 950, "Chinese", "Hong Kong" },
134 { 0x0C07, 850, 1252, "German", "Austria" },
135 { 0x0C09, 850, 1252, "English", "Australia" }, { 0x0C0A, 850, 1252, "Spanish - International Sort", "Spain" },
136 { 0x0C0C, 850, 1252, "French", "Canada"},
137 { 0x0C1A, 855, 1251, "Serbian (Cyrillic)", "Serbia" },
138 { 0x1001, 864, 1256, "Arabic", "Libya" },
139 { 0x1004, 936, 936, "Chinese", "Singapore" },
140 { 0x1007, 850, 1252, "German", "Luxembourg" },
141 { 0x1009, 850, 1252, "English", "Canada" },
142 { 0x100A, 850, 1252, "Spanish", "Guatemala" },
143 { 0x100C, 850, 1252, "French", "Switzerland" },
144 { 0x1401, 864, 1256, "Arabic", "Algeria" },
145 { 0x1407, 850, 1252, "German", "Liechtenstein" },
146 { 0x1409, 850, 1252, "English", "New Zealand" }, { 0x140A, 850, 1252, "Spanish", "Costa Rica" },
147 { 0x140C, 850, 1252, "French", "Luxembourg" },
148 { 0x1801, 864, 1256, "Arabic", "Morocco" },
149 { 0x1809, 850, 1252, "English", "Ireland" }, { 0x180A, 850, 1252, "Spanish", "Panama" },
150 { 0x180C, 850, 1252, "French", "Monaco" },
151 { 0x1C01, 864, 1256, "Arabic", "Tunisia" },
152 { 0x1C09, 437, 1252, "English", "South Africa" }, { 0x1C0A, 850, 1252, "Spanish", "Dominican Republic" },
153 { 0x2001, 864, 1256, "Arabic", "Oman" },
154 { 0x2009, 850, 1252, "English", "Jamaica" }, { 0x200A, 850, 1252, "Spanish", "Venezuela" },
155 { 0x2401, 864, 1256, "Arabic", "Yemen" },
156 { 0x2409, 850, 1252, "English", "Caribbean" }, { 0x240A, 850, 1252, "Spanish", "Colombia" },
157 { 0x2801, 864, 1256, "Arabic", "Syria" },
158 { 0x2809, 850, 1252, "English", "Belize" }, { 0x280A, 850, 1252, "Spanish", "Peru" },
159 { 0x2C01, 864, 1256, "Arabic", "Jordan" },
160 { 0x2C09, 437, 1252, "English", "Trinidad & Tobago" },{ 0x2C0A, 850, 1252, "Spanish", "Argentina" },
161 { 0x3001, 864, 1256, "Arabic", "Lebanon" },
162 { 0x3009, 437, 1252, "English", "Zimbabwe" }, { 0x300A, 850, 1252, "Spanish", "Ecuador" },
163 { 0x3401, 864, 1256, "Arabic", "Kuwait" },
164 { 0x3409, 437, 1252, "English", "Philippines" }, { 0x340A, 850, 1252, "Spanish", "Chile" },
165 { 0x3801, 864, 1256, "Arabic", "United Arab Emirates" },
166 { 0x380A, 850, 1252, "Spanish", "Uruguay" },
167 { 0x3C01, 864, 1256, "Arabic", "Bahrain" },
168 { 0x3C0A, 850, 1252, "Spanish", "Paraguay" },
169 { 0x4001, 864, 1256, "Arabic", "Qatar" },
170 { 0x400A, 850, 1252, "Spanish", "Bolivia" },
171 { 0x440A, 850, 1252, "Spanish", "El Salvador" },
172 { 0x480A, 850, 1252, "Spanish", "Honduras" },
173 { 0x4C0A, 850, 1252, "Spanish", "Nicaragua" },
174 { 0x500A, 850, 1252, "Spanish", "Puerto Rico" },
175 { (unsigned) -1, 0, 0, NULL, NULL }
176 };
177
178 #endif
179
180 /* Specifies the default codepage to be used for unicode
181 transformations. By default this is CP_ACP. */
182 rc_uint_type wind_default_codepage = CP_ACP;
183
184 /* Specifies the currently used codepage for unicode
185 transformations. By default this is CP_ACP. */
186 rc_uint_type wind_current_codepage = CP_ACP;
187
188 /* Convert an ASCII string to a unicode string. We just copy it,
189 expanding chars to shorts, rather than doing something intelligent. */
190
191 void
192 unicode_from_ascii (rc_uint_type *length, unichar **unicode, const char *ascii)
193 {
194 unicode_from_codepage (length, unicode, ascii, wind_current_codepage);
195 }
196
197 /* Convert an ASCII string with length A_LENGTH to a unicode string. We just
198 copy it, expanding chars to shorts, rather than doing something intelligent.
199 This routine converts also \0 within a string. */
200
201 void
202 unicode_from_ascii_len (rc_uint_type *length, unichar **unicode, const char *ascii, rc_uint_type a_length)
203 {
204 char *tmp, *p;
205 rc_uint_type tlen, elen, idx = 0;
206
207 *unicode = NULL;
208
209 if (!a_length)
210 {
211 if (length)
212 *length = 0;
213 return;
214 }
215
216 /* Make sure we have zero terminated string. */
217 p = tmp = (char *) alloca (a_length + 1);
218 memcpy (tmp, ascii, a_length);
219 tmp[a_length] = 0;
220
221 while (a_length > 0)
222 {
223 unichar *utmp, *up;
224
225 tlen = strlen (p);
226
227 if (tlen > a_length)
228 tlen = a_length;
229 if (*p == 0)
230 {
231 /* Make room for one more character. */
232 utmp = (unichar *) res_alloc (sizeof (unichar) * (idx + 1));
233 if (idx > 0)
234 {
235 memcpy (utmp, *unicode, idx * sizeof (unichar));
236 }
237 *unicode = utmp;
238 utmp[idx++] = 0;
239 --a_length;
240 p++;
241 continue;
242 }
243 utmp = NULL;
244 elen = 0;
245 elen = wind_MultiByteToWideChar (wind_current_codepage, p, NULL, 0);
246 if (elen)
247 {
248 utmp = ((unichar *) res_alloc (elen + sizeof (unichar) * 2));
249 wind_MultiByteToWideChar (wind_current_codepage, p, utmp, elen);
250 elen /= sizeof (unichar);
251 elen --;
252 }
253 else
254 {
255 /* Make room for one more character. */
256 utmp = (unichar *) res_alloc (sizeof (unichar) * (idx + 1));
257 if (idx > 0)
258 {
259 memcpy (utmp, *unicode, idx * sizeof (unichar));
260 }
261 *unicode = utmp;
262 utmp[idx++] = ((unichar) *p) & 0xff;
263 --a_length;
264 p++;
265 continue;
266 }
267 p += tlen;
268 a_length -= tlen;
269
270 up = (unichar *) res_alloc (sizeof (unichar) * (idx + elen));
271 if (idx > 0)
272 memcpy (up, *unicode, idx * sizeof (unichar));
273
274 *unicode = up;
275 if (elen)
276 memcpy (&up[idx], utmp, sizeof (unichar) * elen);
277
278 idx += elen;
279 }
280
281 if (length)
282 *length = idx;
283 }
284
285 /* Convert an unicode string to an ASCII string. We just copy it,
286 shrink shorts to chars, rather than doing something intelligent.
287 Shorts with not within the char range are replaced by '_'. */
288
289 void
290 ascii_from_unicode (rc_uint_type *length, const unichar *unicode, char **ascii)
291 {
292 codepage_from_unicode (length, unicode, ascii, wind_current_codepage);
293 }
294
295 /* Print the unicode string UNICODE to the file E. LENGTH is the
296 number of characters to print, or -1 if we should print until the
297 end of the string. FIXME: On a Windows host, we should be calling
298 some Windows function, probably WideCharToMultiByte. */
299
300 void
301 unicode_print (FILE *e, const unichar *unicode, rc_uint_type length)
302 {
303 while (1)
304 {
305 unichar ch;
306
307 if (length == 0)
308 return;
309 if ((bfd_signed_vma) length > 0)
310 --length;
311
312 ch = *unicode;
313
314 if (ch == 0 && (bfd_signed_vma) length < 0)
315 return;
316
317 ++unicode;
318
319 if ((ch & 0x7f) == ch)
320 {
321 if (ch == '\\')
322 fputs ("\\\\", e);
323 else if (ch == '"')
324 fputs ("\"\"", e);
325 else if (ISPRINT (ch))
326 putc (ch, e);
327 else
328 {
329 switch (ch)
330 {
331 case ESCAPE_A:
332 fputs ("\\a", e);
333 break;
334
335 case ESCAPE_B:
336 fputs ("\\b", e);
337 break;
338
339 case ESCAPE_F:
340 fputs ("\\f", e);
341 break;
342
343 case ESCAPE_N:
344 fputs ("\\n", e);
345 break;
346
347 case ESCAPE_R:
348 fputs ("\\r", e);
349 break;
350
351 case ESCAPE_T:
352 fputs ("\\t", e);
353 break;
354
355 case ESCAPE_V:
356 fputs ("\\v", e);
357 break;
358
359 default:
360 fprintf (e, "\\%03o", (unsigned int) ch);
361 break;
362 }
363 }
364 }
365 else if ((ch & 0xff) == ch)
366 fprintf (e, "\\%03o", (unsigned int) ch);
367 else
368 fprintf (e, "\\x%04x", (unsigned int) ch);
369 }
370 }
371
372 /* Print a unicode string to a file. */
373
374 void
375 ascii_print (FILE *e, const char *s, rc_uint_type length)
376 {
377 while (1)
378 {
379 char ch;
380
381 if (length == 0)
382 return;
383 if ((bfd_signed_vma) length > 0)
384 --length;
385
386 ch = *s;
387
388 if (ch == 0 && (bfd_signed_vma) length < 0)
389 return;
390
391 ++s;
392
393 if ((ch & 0x7f) == ch)
394 {
395 if (ch == '\\')
396 fputs ("\\\\", e);
397 else if (ch == '"')
398 fputs ("\"\"", e);
399 else if (ISPRINT (ch))
400 putc (ch, e);
401 else
402 {
403 switch (ch)
404 {
405 case ESCAPE_A:
406 fputs ("\\a", e);
407 break;
408
409 case ESCAPE_B:
410 fputs ("\\b", e);
411 break;
412
413 case ESCAPE_F:
414 fputs ("\\f", e);
415 break;
416
417 case ESCAPE_N:
418 fputs ("\\n", e);
419 break;
420
421 case ESCAPE_R:
422 fputs ("\\r", e);
423 break;
424
425 case ESCAPE_T:
426 fputs ("\\t", e);
427 break;
428
429 case ESCAPE_V:
430 fputs ("\\v", e);
431 break;
432
433 default:
434 fprintf (e, "\\%03o", (unsigned int) ch);
435 break;
436 }
437 }
438 }
439 else
440 fprintf (e, "\\%03o", (unsigned int) ch & 0xff);
441 }
442 }
443
444 rc_uint_type
445 unichar_len (const unichar *unicode)
446 {
447 rc_uint_type r = 0;
448
449 if (unicode)
450 while (unicode[r] != 0)
451 r++;
452 else
453 --r;
454 return r;
455 }
456
457 unichar *
458 unichar_dup (const unichar *unicode)
459 {
460 unichar *r;
461 int len;
462
463 if (! unicode)
464 return NULL;
465 for (len = 0; unicode[len] != 0; ++len)
466 ;
467 ++len;
468 r = ((unichar *) res_alloc (len * sizeof (unichar)));
469 memcpy (r, unicode, len * sizeof (unichar));
470 return r;
471 }
472
473 unichar *
474 unichar_dup_uppercase (const unichar *u)
475 {
476 unichar *r = unichar_dup (u);
477 int i;
478
479 if (! r)
480 return NULL;
481
482 for (i = 0; r[i] != 0; ++i)
483 {
484 if (r[i] >= 'a' && r[i] <= 'z')
485 r[i] &= 0xdf;
486 }
487 return r;
488 }
489
490 static int
491 unichar_isascii (const unichar *u, rc_uint_type len)
492 {
493 rc_uint_type i;
494
495 if ((bfd_signed_vma) len < 0)
496 {
497 if (u)
498 len = (rc_uint_type) unichar_len (u);
499 else
500 len = 0;
501 }
502
503 for (i = 0; i < len; i++)
504 if ((u[i] & 0xff80) != 0)
505 return 0;
506 return 1;
507 }
508
509 void
510 unicode_print_quoted (FILE *e, const unichar *u, rc_uint_type len)
511 {
512 if (! unichar_isascii (u, len))
513 fputc ('L', e);
514 fputc ('"', e);
515 unicode_print (e, u, len);
516 fputc ('"', e);
517 }
518
519 int
520 unicode_is_valid_codepage (rc_uint_type cp)
521 {
522 if ((cp & 0xffff) != cp)
523 return 0;
524 if (cp == CP_UTF16 || cp == CP_ACP)
525 return 1;
526
527 #if !defined (_WIN32) && !defined (__CYGWIN__)
528 if (! wind_find_codepage_info (cp))
529 return 0;
530 return 1;
531 #else
532 return !! IsValidCodePage ((UINT) cp);
533 #endif
534 }
535
536 #if defined (_WIN32) || defined (__CYGWIN__)
537
538 #define max_cp_string_len 6
539
540 static unsigned int
541 codepage_from_langid (unsigned short langid)
542 {
543 char cp_string [max_cp_string_len];
544 int c;
545
546 memset (cp_string, 0, max_cp_string_len);
547 /* LOCALE_RETURN_NUMBER flag would avoid strtoul conversion,
548 but is unavailable on Win95. */
549 c = GetLocaleInfoA (MAKELCID (langid, SORT_DEFAULT),
550 LOCALE_IDEFAULTANSICODEPAGE,
551 cp_string, max_cp_string_len);
552 /* If codepage data for an LCID is not installed on users's system,
553 GetLocaleInfo returns an empty string. Fall back to system ANSI
554 default. */
555 if (c == 0)
556 return CP_ACP;
557 return strtoul (cp_string, 0, 10);
558 }
559
560 static unsigned int
561 wincodepage_from_langid (unsigned short langid)
562 {
563 char cp_string [max_cp_string_len];
564 int c;
565
566 memset (cp_string, 0, max_cp_string_len);
567 /* LOCALE_RETURN_NUMBER flag would avoid strtoul conversion,
568 but is unavailable on Win95. */
569 c = GetLocaleInfoA (MAKELCID (langid, SORT_DEFAULT),
570 LOCALE_IDEFAULTCODEPAGE,
571 cp_string, max_cp_string_len);
572 /* If codepage data for an LCID is not installed on users's system,
573 GetLocaleInfo returns an empty string. Fall back to system ANSI
574 default. */
575 if (c == 0)
576 return CP_OEM;
577 return strtoul (cp_string, 0, 10);
578 }
579
580 static char *
581 lang_from_langid (unsigned short langid)
582 {
583 char cp_string[261];
584 int c;
585
586 memset (cp_string, 0, 261);
587 c = GetLocaleInfoA (MAKELCID (langid, SORT_DEFAULT),
588 LOCALE_SENGLANGUAGE,
589 cp_string, 260);
590 /* If codepage data for an LCID is not installed on users's system,
591 GetLocaleInfo returns an empty string. Fall back to system ANSI
592 default. */
593 if (c == 0)
594 strcpy (cp_string, "Neutral");
595 return xstrdup (cp_string);
596 }
597
598 static char *
599 country_from_langid (unsigned short langid)
600 {
601 char cp_string[261];
602 int c;
603
604 memset (cp_string, 0, 261);
605 c = GetLocaleInfoA (MAKELCID (langid, SORT_DEFAULT),
606 LOCALE_SENGCOUNTRY,
607 cp_string, 260);
608 /* If codepage data for an LCID is not installed on users's system,
609 GetLocaleInfo returns an empty string. Fall back to system ANSI
610 default. */
611 if (c == 0)
612 strcpy (cp_string, "Neutral");
613 return xstrdup (cp_string);
614 }
615
616 #endif
617
618 const wind_language_t *
619 wind_find_language_by_id (unsigned id)
620 {
621 #if !defined (_WIN32) && !defined (__CYGWIN__)
622 int i;
623
624 if (! id)
625 return NULL;
626 for (i = 0; languages[i].id != (unsigned) -1 && languages[i].id != id; i++)
627 ;
628 if (languages[i].id == id)
629 return &languages[i];
630 return NULL;
631 #else
632 static wind_language_t wl;
633
634 wl.id = id;
635 wl.doscp = codepage_from_langid ((unsigned short) id);
636 wl.wincp = wincodepage_from_langid ((unsigned short) id);
637 wl.name = lang_from_langid ((unsigned short) id);
638 wl.country = country_from_langid ((unsigned short) id);
639
640 return & wl;
641 #endif
642 }
643
644 const local_iconv_map *
645 wind_find_codepage_info (unsigned cp)
646 {
647 #if !defined (_WIN32) && !defined (__CYGWIN__)
648 int i;
649
650 for (i = 0; codepages[i].codepage != (rc_uint_type) -1 && codepages[i].codepage != cp; i++)
651 ;
652 if (codepages[i].codepage == (rc_uint_type) -1)
653 return NULL;
654 return &codepages[i];
655 #else
656 static local_iconv_map lim;
657 if (!unicode_is_valid_codepage (cp))
658 return NULL;
659 lim.codepage = cp;
660 lim.iconv_name = "";
661 return & lim;
662 #endif
663 }
664
665 /* Convert an Codepage string to a unicode string. */
666
667 void
668 unicode_from_codepage (rc_uint_type *length, unichar **u, const char *src, rc_uint_type cp)
669 {
670 rc_uint_type len;
671
672 len = wind_MultiByteToWideChar (cp, src, NULL, 0);
673 if (len)
674 {
675 *u = ((unichar *) res_alloc (len));
676 wind_MultiByteToWideChar (cp, src, *u, len);
677 }
678 /* Discount the trailing '/0'. If MultiByteToWideChar failed,
679 this will set *length to -1. */
680 len -= sizeof (unichar);
681
682 if (length != NULL)
683 *length = len / sizeof (unichar);
684 }
685
686 /* Convert an unicode string to an codepage string. */
687
688 void
689 codepage_from_unicode (rc_uint_type *length, const unichar *unicode, char **ascii, rc_uint_type cp)
690 {
691 rc_uint_type len;
692
693 len = wind_WideCharToMultiByte (cp, unicode, NULL, 0);
694 if (len)
695 {
696 *ascii = (char *) res_alloc (len * sizeof (char));
697 wind_WideCharToMultiByte (cp, unicode, *ascii, len);
698 }
699 /* Discount the trailing '/0'. If MultiByteToWideChar failed,
700 this will set *length to -1. */
701 len--;
702
703 if (length != NULL)
704 *length = len;
705 }
706
707 #if defined (HAVE_ICONV) && !defined (_WIN32) && !defined (__CYGWIN__)
708 static int
709 iconv_onechar (iconv_t cd, ICONV_CONST char *s, char *d, int d_len, const char **n_s, char **n_d)
710 {
711 int i;
712
713 for (i = 1; i <= 32; i++)
714 {
715 char *tmp_d = d;
716 ICONV_CONST char *tmp_s = s;
717 size_t ret;
718 size_t s_left = (size_t) i;
719 size_t d_left = (size_t) d_len;
720
721 ret = iconv (cd, & tmp_s, & s_left, & tmp_d, & d_left);
722
723 if (ret != (size_t) -1)
724 {
725 *n_s = tmp_s;
726 *n_d = tmp_d;
727 return 0;
728 }
729 }
730
731 return 1;
732 }
733
734 static const char *
735 wind_iconv_cp (rc_uint_type cp)
736 {
737 const local_iconv_map *lim = wind_find_codepage_info (cp);
738
739 if (!lim)
740 return NULL;
741 return lim->iconv_name;
742 }
743 #endif /* HAVE_ICONV */
744
745 static rc_uint_type
746 wind_MultiByteToWideChar (rc_uint_type cp, const char *mb,
747 unichar *u, rc_uint_type u_len)
748 {
749 rc_uint_type ret = 0;
750
751 #if defined (_WIN32) || defined (__CYGWIN__)
752 rc_uint_type conv_flags = MB_PRECOMPOSED;
753
754 /* MB_PRECOMPOSED is not allowed for UTF-7 or UTF-8.
755 MultiByteToWideChar will set the last error to
756 ERROR_INVALID_FLAGS if we do. */
757 if (cp == CP_UTF8 || cp == CP_UTF7)
758 conv_flags = 0;
759
760 ret = (rc_uint_type) MultiByteToWideChar (cp, conv_flags,
761 mb, -1, u, u_len);
762 /* Convert to bytes. */
763 ret *= sizeof (unichar);
764
765 #elif defined (HAVE_ICONV)
766 int first = 1;
767 char tmp[32];
768 char *p_tmp;
769 const char *iconv_name = wind_iconv_cp (cp);
770
771 if (!mb || !iconv_name)
772 return 0;
773 iconv_t cd = iconv_open ("UTF-16", iconv_name);
774
775 while (1)
776 {
777 int iret;
778 const char *n_mb = "";
779 char *n_tmp = "";
780
781 p_tmp = tmp;
782 iret = iconv_onechar (cd, (ICONV_CONST char *) mb, p_tmp, 32, & n_mb, & n_tmp);
783 if (first)
784 {
785 first = 0;
786 continue;
787 }
788 if (!iret)
789 {
790 size_t l_tmp = (size_t) (n_tmp - p_tmp);
791
792 if (u)
793 {
794 if ((size_t) u_len < l_tmp)
795 break;
796 memcpy (u, tmp, l_tmp);
797 u += l_tmp/2;
798 u_len -= l_tmp;
799 }
800 ret += l_tmp;
801 }
802 else
803 break;
804 if (tmp[0] == 0 && tmp[1] == 0)
805 break;
806 mb = n_mb;
807 }
808 iconv_close (cd);
809 #else
810 if (cp)
811 ret = 0;
812 ret = strlen (mb) + 1;
813 ret *= sizeof (unichar);
814 if (u != NULL && u_len != 0)
815 {
816 do
817 {
818 *u++ = ((unichar) *mb) & 0xff;
819 --u_len; mb++;
820 }
821 while (u_len != 0 && mb[-1] != 0);
822 }
823 if (u != NULL && u_len != 0)
824 *u = 0;
825 #endif
826 return ret;
827 }
828
829 static rc_uint_type
830 wind_WideCharToMultiByte (rc_uint_type cp, const unichar *u, char *mb, rc_uint_type mb_len)
831 {
832 rc_uint_type ret = 0;
833 #if defined (_WIN32) || defined (__CYGWIN__)
834 WINBOOL used_def = FALSE;
835
836 ret = (rc_uint_type) WideCharToMultiByte (cp, 0, u, -1, mb, mb_len,
837 NULL, & used_def);
838 #elif defined (HAVE_ICONV)
839 int first = 1;
840 char tmp[32];
841 char *p_tmp;
842 const char *iconv_name = wind_iconv_cp (cp);
843
844 if (!u || !iconv_name)
845 return 0;
846 iconv_t cd = iconv_open (iconv_name, "UTF-16");
847
848 while (1)
849 {
850 int iret;
851 const char *n_u = "";
852 char *n_tmp = "";
853
854 p_tmp = tmp;
855 iret = iconv_onechar (cd, (ICONV_CONST char *) u, p_tmp, 32, &n_u, & n_tmp);
856 if (first)
857 {
858 first = 0;
859 continue;
860 }
861 if (!iret)
862 {
863 size_t l_tmp = (size_t) (n_tmp - p_tmp);
864
865 if (mb)
866 {
867 if ((size_t) mb_len < l_tmp)
868 break;
869 memcpy (mb, tmp, l_tmp);
870 mb += l_tmp;
871 mb_len -= l_tmp;
872 }
873 ret += l_tmp;
874 }
875 else
876 break;
877 if (u[0] == 0)
878 break;
879 u = (const unichar *) n_u;
880 }
881 iconv_close (cd);
882 #else
883 if (cp)
884 ret = 0;
885
886 while (u[ret] != 0)
887 ++ret;
888
889 ++ret;
890
891 if (mb)
892 {
893 while (*u != 0 && mb_len != 0)
894 {
895 if (u[0] == (u[0] & 0x7f))
896 *mb++ = (char) u[0];
897 else
898 *mb++ = '_';
899 ++u; --mb_len;
900 }
901 if (mb_len != 0)
902 *mb = 0;
903 }
904 #endif
905 return ret;
906 }
This page took 0.061404 seconds and 5 git commands to generate.