Update Gnulib to the latest git version
[deliverable/binutils-gdb.git] / gnulib / import / localcharset.c
CommitLineData
8690e634
JK
1/* Determine a canonical name for the current locale's character encoding.
2
c0c3707f 3 Copyright (C) 2000-2006, 2008-2019 Free Software Foundation, Inc.
8690e634
JK
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3, or (at your option)
8 any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License along
c0c3707f 16 with this program; if not, see <https://www.gnu.org/licenses/>. */
8690e634
JK
17
18/* Written by Bruno Haible <bruno@clisp.org>. */
19
20#include <config.h>
21
22/* Specification. */
23#include "localcharset.h"
24
8690e634
JK
25#include <stddef.h>
26#include <stdio.h>
27#include <string.h>
28#include <stdlib.h>
29
30#if defined __APPLE__ && defined __MACH__ && HAVE_LANGINFO_CODESET
31# define DARWIN7 /* Darwin 7 or newer, i.e. Mac OS X 10.3 or newer */
32#endif
33
c0c3707f 34#if defined _WIN32 && !defined __CYGWIN__
8690e634 35# define WINDOWS_NATIVE
4a626d0a 36# include <locale.h>
8690e634
JK
37#endif
38
39#if defined __EMX__
40/* Assume EMX program runs on OS/2, even if compiled under DOS. */
41# ifndef OS2
42# define OS2
43# endif
44#endif
45
46#if !defined WINDOWS_NATIVE
8690e634
JK
47# if HAVE_LANGINFO_CODESET
48# include <langinfo.h>
49# else
c0c3707f 50# if 0 /* see comment regarding use of setlocale(), below */
8690e634
JK
51# include <locale.h>
52# endif
53# endif
54# ifdef __CYGWIN__
55# define WIN32_LEAN_AND_MEAN
56# include <windows.h>
57# endif
58#elif defined WINDOWS_NATIVE
59# define WIN32_LEAN_AND_MEAN
60# include <windows.h>
61#endif
62#if defined OS2
63# define INCL_DOS
64# include <os2.h>
65#endif
66
4a626d0a
PA
67/* For MB_CUR_MAX_L */
68#if defined DARWIN7
69# include <xlocale.h>
70#endif
71
8690e634 72
c0c3707f 73#if HAVE_LANGINFO_CODESET || defined WINDOWS_NATIVE || defined OS2
8690e634 74
c0c3707f
CB
75/* On these platforms, we use a mapping from non-canonical encoding name
76 to GNU canonical encoding name. */
8690e634 77
c0c3707f
CB
78/* With glibc-2.1 or newer, we don't need any canonicalization,
79 because glibc has iconv and both glibc and libiconv support all
80 GNU canonical names directly. */
81# if !((defined __GNU_LIBRARY__ && __GLIBC__ >= 2) || defined __UCLIBC__)
8690e634 82
c0c3707f 83struct table_entry
8690e634 84{
c0c3707f
CB
85 const char alias[11+1];
86 const char canonical[11+1];
87};
88
89/* Table of platform-dependent mappings, sorted in ascending order. */
90static const struct table_entry alias_table[] =
91 {
92# if defined __FreeBSD__ /* FreeBSD */
93 /*{ "ARMSCII-8", "ARMSCII-8" },*/
94 { "Big5", "BIG5" },
95 { "C", "ASCII" },
96 /*{ "CP1131", "CP1131" },*/
97 /*{ "CP1251", "CP1251" },*/
98 /*{ "CP866", "CP866" },*/
99 /*{ "GB18030", "GB18030" },*/
100 /*{ "GB2312", "GB2312" },*/
101 /*{ "GBK", "GBK" },*/
102 /*{ "ISCII-DEV", "?" },*/
103 { "ISO8859-1", "ISO-8859-1" },
104 { "ISO8859-13", "ISO-8859-13" },
105 { "ISO8859-15", "ISO-8859-15" },
106 { "ISO8859-2", "ISO-8859-2" },
107 { "ISO8859-5", "ISO-8859-5" },
108 { "ISO8859-7", "ISO-8859-7" },
109 { "ISO8859-9", "ISO-8859-9" },
110 /*{ "KOI8-R", "KOI8-R" },*/
111 /*{ "KOI8-U", "KOI8-U" },*/
112 { "SJIS", "SHIFT_JIS" },
113 { "US-ASCII", "ASCII" },
114 { "eucCN", "GB2312" },
115 { "eucJP", "EUC-JP" },
116 { "eucKR", "EUC-KR" }
117# define alias_table_defined
118# endif
119# if defined __NetBSD__ /* NetBSD */
120 { "646", "ASCII" },
121 /*{ "ARMSCII-8", "ARMSCII-8" },*/
122 /*{ "BIG5", "BIG5" },*/
123 { "Big5-HKSCS", "BIG5-HKSCS" },
124 /*{ "CP1251", "CP1251" },*/
125 /*{ "CP866", "CP866" },*/
126 /*{ "GB18030", "GB18030" },*/
127 /*{ "GB2312", "GB2312" },*/
128 { "ISO8859-1", "ISO-8859-1" },
129 { "ISO8859-13", "ISO-8859-13" },
130 { "ISO8859-15", "ISO-8859-15" },
131 { "ISO8859-2", "ISO-8859-2" },
132 { "ISO8859-4", "ISO-8859-4" },
133 { "ISO8859-5", "ISO-8859-5" },
134 { "ISO8859-7", "ISO-8859-7" },
135 /*{ "KOI8-R", "KOI8-R" },*/
136 /*{ "KOI8-U", "KOI8-U" },*/
137 /*{ "PT154", "PT154" },*/
138 { "SJIS", "SHIFT_JIS" },
139 { "eucCN", "GB2312" },
140 { "eucJP", "EUC-JP" },
141 { "eucKR", "EUC-KR" },
142 { "eucTW", "EUC-TW" }
143# define alias_table_defined
144# endif
145# if defined __OpenBSD__ /* OpenBSD */
146 { "646", "ASCII" },
147 { "ISO8859-1", "ISO-8859-1" },
148 { "ISO8859-13", "ISO-8859-13" },
149 { "ISO8859-15", "ISO-8859-15" },
150 { "ISO8859-2", "ISO-8859-2" },
151 { "ISO8859-4", "ISO-8859-4" },
152 { "ISO8859-5", "ISO-8859-5" },
153 { "ISO8859-7", "ISO-8859-7" }
154# define alias_table_defined
155# endif
156# if defined __APPLE__ && defined __MACH__ /* Mac OS X */
157 /* Darwin 7.5 has nl_langinfo(CODESET), but sometimes its value is
158 useless:
159 - It returns the empty string when LANG is set to a locale of the
160 form ll_CC, although ll_CC/LC_CTYPE is a symlink to an UTF-8
161 LC_CTYPE file.
162 - The environment variables LANG, LC_CTYPE, LC_ALL are not set by
163 the system; nl_langinfo(CODESET) returns "US-ASCII" in this case.
164 - The documentation says:
165 "... all code that calls BSD system routines should ensure
166 that the const *char parameters of these routines are in UTF-8
167 encoding. All BSD system functions expect their string
168 parameters to be in UTF-8 encoding and nothing else."
169 It also says
170 "An additional caveat is that string parameters for files,
171 paths, and other file-system entities must be in canonical
172 UTF-8. In a canonical UTF-8 Unicode string, all decomposable
173 characters are decomposed ..."
174 but this is not true: You can pass non-decomposed UTF-8 strings
175 to file system functions, and it is the OS which will convert
176 them to decomposed UTF-8 before accessing the file system.
177 - The Apple Terminal application displays UTF-8 by default.
178 - However, other applications are free to use different encodings:
179 - xterm uses ISO-8859-1 by default.
180 - TextEdit uses MacRoman by default.
181 We prefer UTF-8 over decomposed UTF-8-MAC because one should
182 minimize the use of decomposed Unicode. Unfortunately, through the
183 Darwin file system, decomposed UTF-8 strings are leaked into user
184 space nevertheless.
185 Then there are also the locales with encodings other than US-ASCII
186 and UTF-8. These locales can be occasionally useful to users (e.g.
187 when grepping through ISO-8859-1 encoded text files), when all their
188 file names are in US-ASCII.
189 */
190 { "ARMSCII-8", "ARMSCII-8" },
191 { "Big5", "BIG5" },
192 { "Big5HKSCS", "BIG5-HKSCS" },
193 { "CP1131", "CP1131" },
194 { "CP1251", "CP1251" },
195 { "CP866", "CP866" },
196 { "CP949", "CP949" },
197 { "GB18030", "GB18030" },
198 { "GB2312", "GB2312" },
199 { "GBK", "GBK" },
200 /*{ "ISCII-DEV", "?" },*/
201 { "ISO8859-1", "ISO-8859-1" },
202 { "ISO8859-13", "ISO-8859-13" },
203 { "ISO8859-15", "ISO-8859-15" },
204 { "ISO8859-2", "ISO-8859-2" },
205 { "ISO8859-4", "ISO-8859-4" },
206 { "ISO8859-5", "ISO-8859-5" },
207 { "ISO8859-7", "ISO-8859-7" },
208 { "ISO8859-9", "ISO-8859-9" },
209 { "KOI8-R", "KOI8-R" },
210 { "KOI8-U", "KOI8-U" },
211 { "PT154", "PT154" },
212 { "SJIS", "SHIFT_JIS" },
213 { "eucCN", "GB2312" },
214 { "eucJP", "EUC-JP" },
215 { "eucKR", "EUC-KR" }
216# define alias_table_defined
217# endif
218# if defined _AIX /* AIX */
219 /*{ "GBK", "GBK" },*/
220 { "IBM-1046", "CP1046" },
221 { "IBM-1124", "CP1124" },
222 { "IBM-1129", "CP1129" },
223 { "IBM-1252", "CP1252" },
224 { "IBM-850", "CP850" },
225 { "IBM-856", "CP856" },
226 { "IBM-921", "ISO-8859-13" },
227 { "IBM-922", "CP922" },
228 { "IBM-932", "CP932" },
229 { "IBM-943", "CP943" },
230 { "IBM-eucCN", "GB2312" },
231 { "IBM-eucJP", "EUC-JP" },
232 { "IBM-eucKR", "EUC-KR" },
233 { "IBM-eucTW", "EUC-TW" },
234 { "ISO8859-1", "ISO-8859-1" },
235 { "ISO8859-15", "ISO-8859-15" },
236 { "ISO8859-2", "ISO-8859-2" },
237 { "ISO8859-5", "ISO-8859-5" },
238 { "ISO8859-6", "ISO-8859-6" },
239 { "ISO8859-7", "ISO-8859-7" },
240 { "ISO8859-8", "ISO-8859-8" },
241 { "ISO8859-9", "ISO-8859-9" },
242 { "TIS-620", "TIS-620" },
243 /*{ "UTF-8", "UTF-8" },*/
244 { "big5", "BIG5" }
245# define alias_table_defined
246# endif
247# if defined __hpux /* HP-UX */
248 { "SJIS", "SHIFT_JIS" },
249 { "arabic8", "HP-ARABIC8" },
250 { "big5", "BIG5" },
251 { "cp1251", "CP1251" },
252 { "eucJP", "EUC-JP" },
253 { "eucKR", "EUC-KR" },
254 { "eucTW", "EUC-TW" },
255 { "gb18030", "GB18030" },
256 { "greek8", "HP-GREEK8" },
257 { "hebrew8", "HP-HEBREW8" },
258 { "hkbig5", "BIG5-HKSCS" },
259 { "hp15CN", "GB2312" },
260 { "iso88591", "ISO-8859-1" },
261 { "iso885913", "ISO-8859-13" },
262 { "iso885915", "ISO-8859-15" },
263 { "iso88592", "ISO-8859-2" },
264 { "iso88594", "ISO-8859-4" },
265 { "iso88595", "ISO-8859-5" },
266 { "iso88596", "ISO-8859-6" },
267 { "iso88597", "ISO-8859-7" },
268 { "iso88598", "ISO-8859-8" },
269 { "iso88599", "ISO-8859-9" },
270 { "kana8", "HP-KANA8" },
271 { "koi8r", "KOI8-R" },
272 { "roman8", "HP-ROMAN8" },
273 { "tis620", "TIS-620" },
274 { "turkish8", "HP-TURKISH8" },
275 { "utf8", "UTF-8" }
276# define alias_table_defined
277# endif
278# if defined __sgi /* IRIX */
279 { "ISO8859-1", "ISO-8859-1" },
280 { "ISO8859-15", "ISO-8859-15" },
281 { "ISO8859-2", "ISO-8859-2" },
282 { "ISO8859-5", "ISO-8859-5" },
283 { "ISO8859-7", "ISO-8859-7" },
284 { "ISO8859-9", "ISO-8859-9" },
285 { "eucCN", "GB2312" },
286 { "eucJP", "EUC-JP" },
287 { "eucKR", "EUC-KR" },
288 { "eucTW", "EUC-TW" }
289# define alias_table_defined
290# endif
291# if defined __osf__ /* OSF/1 */
292 /*{ "GBK", "GBK" },*/
293 { "ISO8859-1", "ISO-8859-1" },
294 { "ISO8859-15", "ISO-8859-15" },
295 { "ISO8859-2", "ISO-8859-2" },
296 { "ISO8859-4", "ISO-8859-4" },
297 { "ISO8859-5", "ISO-8859-5" },
298 { "ISO8859-7", "ISO-8859-7" },
299 { "ISO8859-8", "ISO-8859-8" },
300 { "ISO8859-9", "ISO-8859-9" },
301 { "KSC5601", "CP949" },
302 { "SJIS", "SHIFT_JIS" },
303 { "TACTIS", "TIS-620" },
304 /*{ "UTF-8", "UTF-8" },*/
305 { "big5", "BIG5" },
306 { "cp850", "CP850" },
307 { "dechanyu", "DEC-HANYU" },
308 { "dechanzi", "GB2312" },
309 { "deckanji", "DEC-KANJI" },
310 { "deckorean", "EUC-KR" },
311 { "eucJP", "EUC-JP" },
312 { "eucKR", "EUC-KR" },
313 { "eucTW", "EUC-TW" },
314 { "sdeckanji", "EUC-JP" }
315# define alias_table_defined
316# endif
317# if defined __sun /* Solaris */
318 { "5601", "EUC-KR" },
319 { "646", "ASCII" },
320 /*{ "BIG5", "BIG5" },*/
321 { "Big5-HKSCS", "BIG5-HKSCS" },
322 { "GB18030", "GB18030" },
323 /*{ "GBK", "GBK" },*/
324 { "ISO8859-1", "ISO-8859-1" },
325 { "ISO8859-11", "TIS-620" },
326 { "ISO8859-13", "ISO-8859-13" },
327 { "ISO8859-15", "ISO-8859-15" },
328 { "ISO8859-2", "ISO-8859-2" },
329 { "ISO8859-3", "ISO-8859-3" },
330 { "ISO8859-4", "ISO-8859-4" },
331 { "ISO8859-5", "ISO-8859-5" },
332 { "ISO8859-6", "ISO-8859-6" },
333 { "ISO8859-7", "ISO-8859-7" },
334 { "ISO8859-8", "ISO-8859-8" },
335 { "ISO8859-9", "ISO-8859-9" },
336 { "PCK", "SHIFT_JIS" },
337 { "TIS620.2533", "TIS-620" },
338 /*{ "UTF-8", "UTF-8" },*/
339 { "ansi-1251", "CP1251" },
340 { "cns11643", "EUC-TW" },
341 { "eucJP", "EUC-JP" },
342 { "gb2312", "GB2312" },
343 { "koi8-r", "KOI8-R" }
344# define alias_table_defined
345# endif
346# if defined __minix /* Minix */
347 { "646", "ASCII" }
348# define alias_table_defined
349# endif
350# if defined WINDOWS_NATIVE || defined __CYGWIN__ /* Windows */
351 { "CP1361", "JOHAB" },
352 { "CP20127", "ASCII" },
353 { "CP20866", "KOI8-R" },
354 { "CP20936", "GB2312" },
355 { "CP21866", "KOI8-RU" },
356 { "CP28591", "ISO-8859-1" },
357 { "CP28592", "ISO-8859-2" },
358 { "CP28593", "ISO-8859-3" },
359 { "CP28594", "ISO-8859-4" },
360 { "CP28595", "ISO-8859-5" },
361 { "CP28596", "ISO-8859-6" },
362 { "CP28597", "ISO-8859-7" },
363 { "CP28598", "ISO-8859-8" },
364 { "CP28599", "ISO-8859-9" },
365 { "CP28605", "ISO-8859-15" },
366 { "CP38598", "ISO-8859-8" },
367 { "CP51932", "EUC-JP" },
368 { "CP51936", "GB2312" },
369 { "CP51949", "EUC-KR" },
370 { "CP51950", "EUC-TW" },
371 { "CP54936", "GB18030" },
372 { "CP65001", "UTF-8" },
373 { "CP936", "GBK" }
374# define alias_table_defined
375# endif
376# if defined OS2 /* OS/2 */
377 /* The list of encodings is taken from "List of OS/2 Codepages"
378 by Alex Taylor:
379 <http://altsan.org/os2/toolkits/uls/index.html#codepages>.
380 See also "__convcp() of kLIBC":
381 <https://github.com/bitwiseworks/libc/blob/master/src/emx/src/lib/locale/__convcp.c>. */
382 { "CP1004", "CP1252" },
383 /*{ "CP1041", "CP943" },*/
384 /*{ "CP1088", "CP949" },*/
385 { "CP1089", "ISO-8859-6" },
386 /*{ "CP1114", "CP950" },*/
387 /*{ "CP1115", "GB2312" },*/
388 { "CP1208", "UTF-8" },
389 /*{ "CP1380", "GB2312" },*/
390 { "CP1381", "GB2312" },
391 { "CP1383", "GB2312" },
392 { "CP1386", "GBK" },
393 /*{ "CP301", "CP943" },*/
394 { "CP3372", "EUC-JP" },
395 { "CP4946", "CP850" },
396 /*{ "CP5048", "JIS_X0208-1990" },*/
397 /*{ "CP5049", "JIS_X0212-1990" },*/
398 /*{ "CP5067", "KS_C_5601-1987" },*/
399 { "CP813", "ISO-8859-7" },
400 { "CP819", "ISO-8859-1" },
401 { "CP878", "KOI8-R" },
402 /*{ "CP897", "CP943" },*/
403 { "CP912", "ISO-8859-2" },
404 { "CP913", "ISO-8859-3" },
405 { "CP914", "ISO-8859-4" },
406 { "CP915", "ISO-8859-5" },
407 { "CP916", "ISO-8859-8" },
408 { "CP920", "ISO-8859-9" },
409 { "CP921", "ISO-8859-13" },
410 { "CP923", "ISO-8859-15" },
411 /*{ "CP941", "CP943" },*/
412 /*{ "CP947", "CP950" },*/
413 /*{ "CP951", "CP949" },*/
414 /*{ "CP952", "JIS_X0208-1990" },*/
415 /*{ "CP953", "JIS_X0212-1990" },*/
416 { "CP954", "EUC-JP" },
417 { "CP964", "EUC-TW" },
418 { "CP970", "EUC-KR" },
419 /*{ "CP971", "KS_C_5601-1987" },*/
420 { "IBM-1004", "CP1252" },
421 /*{ "IBM-1006", "?" },*/
422 /*{ "IBM-1008", "?" },*/
423 /*{ "IBM-1041", "CP943" },*/
424 /*{ "IBM-1051", "?" },*/
425 /*{ "IBM-1088", "CP949" },*/
426 { "IBM-1089", "ISO-8859-6" },
427 /*{ "IBM-1098", "?" },*/
428 /*{ "IBM-1114", "CP950" },*/
429 /*{ "IBM-1115", "GB2312" },*/
430 /*{ "IBM-1116", "?" },*/
431 /*{ "IBM-1117", "?" },*/
432 /*{ "IBM-1118", "?" },*/
433 /*{ "IBM-1119", "?" },*/
434 { "IBM-1124", "CP1124" },
435 { "IBM-1125", "CP1125" },
436 { "IBM-1131", "CP1131" },
437 { "IBM-1208", "UTF-8" },
438 { "IBM-1250", "CP1250" },
439 { "IBM-1251", "CP1251" },
440 { "IBM-1252", "CP1252" },
441 { "IBM-1253", "CP1253" },
442 { "IBM-1254", "CP1254" },
443 { "IBM-1255", "CP1255" },
444 { "IBM-1256", "CP1256" },
445 { "IBM-1257", "CP1257" },
446 /*{ "IBM-1275", "?" },*/
447 /*{ "IBM-1276", "?" },*/
448 /*{ "IBM-1277", "?" },*/
449 /*{ "IBM-1280", "?" },*/
450 /*{ "IBM-1281", "?" },*/
451 /*{ "IBM-1282", "?" },*/
452 /*{ "IBM-1283", "?" },*/
453 /*{ "IBM-1380", "GB2312" },*/
454 { "IBM-1381", "GB2312" },
455 { "IBM-1383", "GB2312" },
456 { "IBM-1386", "GBK" },
457 /*{ "IBM-301", "CP943" },*/
458 { "IBM-3372", "EUC-JP" },
459 { "IBM-367", "ASCII" },
460 { "IBM-437", "CP437" },
461 { "IBM-4946", "CP850" },
462 /*{ "IBM-5048", "JIS_X0208-1990" },*/
463 /*{ "IBM-5049", "JIS_X0212-1990" },*/
464 /*{ "IBM-5067", "KS_C_5601-1987" },*/
465 { "IBM-813", "ISO-8859-7" },
466 { "IBM-819", "ISO-8859-1" },
467 { "IBM-850", "CP850" },
468 /*{ "IBM-851", "?" },*/
469 { "IBM-852", "CP852" },
470 { "IBM-855", "CP855" },
471 { "IBM-856", "CP856" },
472 { "IBM-857", "CP857" },
473 /*{ "IBM-859", "?" },*/
474 { "IBM-860", "CP860" },
475 { "IBM-861", "CP861" },
476 { "IBM-862", "CP862" },
477 { "IBM-863", "CP863" },
478 { "IBM-864", "CP864" },
479 { "IBM-865", "CP865" },
480 { "IBM-866", "CP866" },
481 /*{ "IBM-868", "?" },*/
482 { "IBM-869", "CP869" },
483 { "IBM-874", "CP874" },
484 { "IBM-878", "KOI8-R" },
485 /*{ "IBM-895", "?" },*/
486 /*{ "IBM-897", "CP943" },*/
487 /*{ "IBM-907", "?" },*/
488 /*{ "IBM-909", "?" },*/
489 { "IBM-912", "ISO-8859-2" },
490 { "IBM-913", "ISO-8859-3" },
491 { "IBM-914", "ISO-8859-4" },
492 { "IBM-915", "ISO-8859-5" },
493 { "IBM-916", "ISO-8859-8" },
494 { "IBM-920", "ISO-8859-9" },
495 { "IBM-921", "ISO-8859-13" },
496 { "IBM-922", "CP922" },
497 { "IBM-923", "ISO-8859-15" },
498 { "IBM-932", "CP932" },
499 /*{ "IBM-941", "CP943" },*/
500 /*{ "IBM-942", "?" },*/
501 { "IBM-943", "CP943" },
502 /*{ "IBM-947", "CP950" },*/
503 { "IBM-949", "CP949" },
504 { "IBM-950", "CP950" },
505 /*{ "IBM-951", "CP949" },*/
506 /*{ "IBM-952", "JIS_X0208-1990" },*/
507 /*{ "IBM-953", "JIS_X0212-1990" },*/
508 { "IBM-954", "EUC-JP" },
509 /*{ "IBM-955", "?" },*/
510 { "IBM-964", "EUC-TW" },
511 { "IBM-970", "EUC-KR" },
512 /*{ "IBM-971", "KS_C_5601-1987" },*/
513 { "IBM-eucCN", "GB2312" },
514 { "IBM-eucJP", "EUC-JP" },
515 { "IBM-eucKR", "EUC-KR" },
516 { "IBM-eucTW", "EUC-TW" },
517 { "IBM33722", "EUC-JP" },
518 { "ISO8859-1", "ISO-8859-1" },
519 { "ISO8859-2", "ISO-8859-2" },
520 { "ISO8859-3", "ISO-8859-3" },
521 { "ISO8859-4", "ISO-8859-4" },
522 { "ISO8859-5", "ISO-8859-5" },
523 { "ISO8859-6", "ISO-8859-6" },
524 { "ISO8859-7", "ISO-8859-7" },
525 { "ISO8859-8", "ISO-8859-8" },
526 { "ISO8859-9", "ISO-8859-9" },
527 /*{ "JISX0201-1976", "JISX0201-1976" },*/
528 /*{ "JISX0208-1978", "?" },*/
529 /*{ "JISX0208-1983", "JIS_X0208-1983" },*/
530 /*{ "JISX0208-1990", "JIS_X0208-1990" },*/
531 /*{ "JISX0212-1990", "JIS_X0212-1990" },*/
532 /*{ "KSC5601-1987", "KS_C_5601-1987" },*/
533 { "SJIS-1", "CP943" },
534 { "SJIS-2", "CP943" },
535 { "eucJP", "EUC-JP" },
536 { "eucKR", "EUC-KR" },
537 { "eucTW-1993", "EUC-TW" }
538# define alias_table_defined
539# endif
540# if defined VMS /* OpenVMS */
541 /* The list of encodings is taken from the OpenVMS 7.3-1 documentation
542 "Compaq C Run-Time Library Reference Manual for OpenVMS systems"
543 section 10.7 "Handling Different Character Sets". */
544 { "DECHANYU", "DEC-HANYU" },
545 { "DECHANZI", "GB2312" },
546 { "DECKANJI", "DEC-KANJI" },
547 { "DECKOREAN", "EUC-KR" },
548 { "ISO8859-1", "ISO-8859-1" },
549 { "ISO8859-2", "ISO-8859-2" },
550 { "ISO8859-5", "ISO-8859-5" },
551 { "ISO8859-7", "ISO-8859-7" },
552 { "ISO8859-8", "ISO-8859-8" },
553 { "ISO8859-9", "ISO-8859-9" },
554 { "SDECKANJI", "EUC-JP" },
555 { "SJIS", "SHIFT_JIS" },
556 { "eucJP", "EUC-JP" },
557 { "eucTW", "EUC-TW" }
558# define alias_table_defined
559# endif
560# ifndef alias_table_defined
561 /* Just a dummy entry, to avoid a C syntax error. */
562 { "", "" }
563# endif
564 };
5e8754f9 565
c0c3707f 566# endif
8690e634
JK
567
568#else
569
c0c3707f
CB
570/* On these platforms, we use a mapping from locale name to GNU canonical
571 encoding name. */
8690e634 572
c0c3707f
CB
573struct table_entry
574{
575 const char locale[17+1];
576 const char canonical[11+1];
577};
578
579/* Table of platform-dependent mappings, sorted in ascending order. */
580static const struct table_entry locale_table[] =
581 {
582# if defined __FreeBSD__ /* FreeBSD 4.2 */
583 { "cs_CZ.ISO_8859-2", "ISO-8859-2" },
584 { "da_DK.DIS_8859-15", "ISO-8859-15" },
585 { "da_DK.ISO_8859-1", "ISO-8859-1" },
586 { "de_AT.DIS_8859-15", "ISO-8859-15" },
587 { "de_AT.ISO_8859-1", "ISO-8859-1" },
588 { "de_CH.DIS_8859-15", "ISO-8859-15" },
589 { "de_CH.ISO_8859-1", "ISO-8859-1" },
590 { "de_DE.DIS_8859-15", "ISO-8859-15" },
591 { "de_DE.ISO_8859-1", "ISO-8859-1" },
592 { "en_AU.DIS_8859-15", "ISO-8859-15" },
593 { "en_AU.ISO_8859-1", "ISO-8859-1" },
594 { "en_CA.DIS_8859-15", "ISO-8859-15" },
595 { "en_CA.ISO_8859-1", "ISO-8859-1" },
596 { "en_GB.DIS_8859-15", "ISO-8859-15" },
597 { "en_GB.ISO_8859-1", "ISO-8859-1" },
598 { "en_US.DIS_8859-15", "ISO-8859-15" },
599 { "en_US.ISO_8859-1", "ISO-8859-1" },
600 { "es_ES.DIS_8859-15", "ISO-8859-15" },
601 { "es_ES.ISO_8859-1", "ISO-8859-1" },
602 { "fi_FI.DIS_8859-15", "ISO-8859-15" },
603 { "fi_FI.ISO_8859-1", "ISO-8859-1" },
604 { "fr_BE.DIS_8859-15", "ISO-8859-15" },
605 { "fr_BE.ISO_8859-1", "ISO-8859-1" },
606 { "fr_CA.DIS_8859-15", "ISO-8859-15" },
607 { "fr_CA.ISO_8859-1", "ISO-8859-1" },
608 { "fr_CH.DIS_8859-15", "ISO-8859-15" },
609 { "fr_CH.ISO_8859-1", "ISO-8859-1" },
610 { "fr_FR.DIS_8859-15", "ISO-8859-15" },
611 { "fr_FR.ISO_8859-1", "ISO-8859-1" },
612 { "hr_HR.ISO_8859-2", "ISO-8859-2" },
613 { "hu_HU.ISO_8859-2", "ISO-8859-2" },
614 { "is_IS.DIS_8859-15", "ISO-8859-15" },
615 { "is_IS.ISO_8859-1", "ISO-8859-1" },
616 { "it_CH.DIS_8859-15", "ISO-8859-15" },
617 { "it_CH.ISO_8859-1", "ISO-8859-1" },
618 { "it_IT.DIS_8859-15", "ISO-8859-15" },
619 { "it_IT.ISO_8859-1", "ISO-8859-1" },
620 { "ja_JP.EUC", "EUC-JP" },
621 { "ja_JP.SJIS", "SHIFT_JIS" },
622 { "ja_JP.Shift_JIS", "SHIFT_JIS" },
623 { "ko_KR.EUC", "EUC-KR" },
624 { "la_LN.ASCII", "ASCII" },
625 { "la_LN.DIS_8859-15", "ISO-8859-15" },
626 { "la_LN.ISO_8859-1", "ISO-8859-1" },
627 { "la_LN.ISO_8859-2", "ISO-8859-2" },
628 { "la_LN.ISO_8859-4", "ISO-8859-4" },
629 { "lt_LN.ASCII", "ASCII" },
630 { "lt_LN.DIS_8859-15", "ISO-8859-15" },
631 { "lt_LN.ISO_8859-1", "ISO-8859-1" },
632 { "lt_LN.ISO_8859-2", "ISO-8859-2" },
633 { "lt_LT.ISO_8859-4", "ISO-8859-4" },
634 { "nl_BE.DIS_8859-15", "ISO-8859-15" },
635 { "nl_BE.ISO_8859-1", "ISO-8859-1" },
636 { "nl_NL.DIS_8859-15", "ISO-8859-15" },
637 { "nl_NL.ISO_8859-1", "ISO-8859-1" },
638 { "no_NO.DIS_8859-15", "ISO-8859-15" },
639 { "no_NO.ISO_8859-1", "ISO-8859-1" },
640 { "pl_PL.ISO_8859-2", "ISO-8859-2" },
641 { "pt_PT.DIS_8859-15", "ISO-8859-15" },
642 { "pt_PT.ISO_8859-1", "ISO-8859-1" },
643 { "ru_RU.CP866", "CP866" },
644 { "ru_RU.ISO_8859-5", "ISO-8859-5" },
645 { "ru_RU.KOI8-R", "KOI8-R" },
646 { "ru_SU.CP866", "CP866" },
647 { "ru_SU.ISO_8859-5", "ISO-8859-5" },
648 { "ru_SU.KOI8-R", "KOI8-R" },
649 { "sl_SI.ISO_8859-2", "ISO-8859-2" },
650 { "sv_SE.DIS_8859-15", "ISO-8859-15" },
651 { "sv_SE.ISO_8859-1", "ISO-8859-1" },
652 { "uk_UA.KOI8-U", "KOI8-U" },
653 { "zh_CN.EUC", "GB2312" },
654 { "zh_TW.BIG5", "BIG5" },
655 { "zh_TW.Big5", "BIG5" }
656# define locale_table_defined
8690e634 657# endif
c0c3707f
CB
658# if defined __DJGPP__ /* DOS / DJGPP 2.03 */
659 /* The encodings given here may not all be correct.
660 If you find that the encoding given for your language and
661 country is not the one your DOS machine actually uses, just
662 correct it in this file, and send a mail to
663 Juan Manuel Guerrero <juan.guerrero@gmx.de>
664 and <bug-gnulib@gnu.org>. */
665 { "C", "ASCII" },
666 { "ar", "CP864" },
667 { "ar_AE", "CP864" },
668 { "ar_DZ", "CP864" },
669 { "ar_EG", "CP864" },
670 { "ar_IQ", "CP864" },
671 { "ar_IR", "CP864" },
672 { "ar_JO", "CP864" },
673 { "ar_KW", "CP864" },
674 { "ar_MA", "CP864" },
675 { "ar_OM", "CP864" },
676 { "ar_QA", "CP864" },
677 { "ar_SA", "CP864" },
678 { "ar_SY", "CP864" },
679 { "be", "CP866" },
680 { "be_BE", "CP866" },
681 { "bg", "CP866" }, /* not CP855 ?? */
682 { "bg_BG", "CP866" }, /* not CP855 ?? */
683 { "ca", "CP850" },
684 { "ca_ES", "CP850" },
685 { "cs", "CP852" },
686 { "cs_CZ", "CP852" },
687 { "da", "CP865" }, /* not CP850 ?? */
688 { "da_DK", "CP865" }, /* not CP850 ?? */
689 { "de", "CP850" },
690 { "de_AT", "CP850" },
691 { "de_CH", "CP850" },
692 { "de_DE", "CP850" },
693 { "el", "CP869" },
694 { "el_GR", "CP869" },
695 { "en", "CP850" },
696 { "en_AU", "CP850" }, /* not CP437 ?? */
697 { "en_CA", "CP850" },
698 { "en_GB", "CP850" },
699 { "en_NZ", "CP437" },
700 { "en_US", "CP437" },
701 { "en_ZA", "CP850" }, /* not CP437 ?? */
702 { "eo", "CP850" },
703 { "eo_EO", "CP850" },
704 { "es", "CP850" },
705 { "es_AR", "CP850" },
706 { "es_BO", "CP850" },
707 { "es_CL", "CP850" },
708 { "es_CO", "CP850" },
709 { "es_CR", "CP850" },
710 { "es_CU", "CP850" },
711 { "es_DO", "CP850" },
712 { "es_EC", "CP850" },
713 { "es_ES", "CP850" },
714 { "es_GT", "CP850" },
715 { "es_HN", "CP850" },
716 { "es_MX", "CP850" },
717 { "es_NI", "CP850" },
718 { "es_PA", "CP850" },
719 { "es_PE", "CP850" },
720 { "es_PY", "CP850" },
721 { "es_SV", "CP850" },
722 { "es_UY", "CP850" },
723 { "es_VE", "CP850" },
724 { "et", "CP850" },
725 { "et_EE", "CP850" },
726 { "eu", "CP850" },
727 { "eu_ES", "CP850" },
728 { "fi", "CP850" },
729 { "fi_FI", "CP850" },
730 { "fr", "CP850" },
731 { "fr_BE", "CP850" },
732 { "fr_CA", "CP850" },
733 { "fr_CH", "CP850" },
734 { "fr_FR", "CP850" },
735 { "ga", "CP850" },
736 { "ga_IE", "CP850" },
737 { "gd", "CP850" },
738 { "gd_GB", "CP850" },
739 { "gl", "CP850" },
740 { "gl_ES", "CP850" },
741 { "he", "CP862" },
742 { "he_IL", "CP862" },
743 { "hr", "CP852" },
744 { "hr_HR", "CP852" },
745 { "hu", "CP852" },
746 { "hu_HU", "CP852" },
747 { "id", "CP850" }, /* not CP437 ?? */
748 { "id_ID", "CP850" }, /* not CP437 ?? */
749 { "is", "CP861" }, /* not CP850 ?? */
750 { "is_IS", "CP861" }, /* not CP850 ?? */
751 { "it", "CP850" },
752 { "it_CH", "CP850" },
753 { "it_IT", "CP850" },
754 { "ja", "CP932" },
755 { "ja_JP", "CP932" },
756 { "kr", "CP949" }, /* not CP934 ?? */
757 { "kr_KR", "CP949" }, /* not CP934 ?? */
758 { "lt", "CP775" },
759 { "lt_LT", "CP775" },
760 { "lv", "CP775" },
761 { "lv_LV", "CP775" },
762 { "mk", "CP866" }, /* not CP855 ?? */
763 { "mk_MK", "CP866" }, /* not CP855 ?? */
764 { "mt", "CP850" },
765 { "mt_MT", "CP850" },
766 { "nb", "CP865" }, /* not CP850 ?? */
767 { "nb_NO", "CP865" }, /* not CP850 ?? */
768 { "nl", "CP850" },
769 { "nl_BE", "CP850" },
770 { "nl_NL", "CP850" },
771 { "nn", "CP865" }, /* not CP850 ?? */
772 { "nn_NO", "CP865" }, /* not CP850 ?? */
773 { "no", "CP865" }, /* not CP850 ?? */
774 { "no_NO", "CP865" }, /* not CP850 ?? */
775 { "pl", "CP852" },
776 { "pl_PL", "CP852" },
777 { "pt", "CP850" },
778 { "pt_BR", "CP850" },
779 { "pt_PT", "CP850" },
780 { "ro", "CP852" },
781 { "ro_RO", "CP852" },
782 { "ru", "CP866" },
783 { "ru_RU", "CP866" },
784 { "sk", "CP852" },
785 { "sk_SK", "CP852" },
786 { "sl", "CP852" },
787 { "sl_SI", "CP852" },
788 { "sq", "CP852" },
789 { "sq_AL", "CP852" },
790 { "sr", "CP852" }, /* CP852 or CP866 or CP855 ?? */
791 { "sr_CS", "CP852" }, /* CP852 or CP866 or CP855 ?? */
792 { "sr_YU", "CP852" }, /* CP852 or CP866 or CP855 ?? */
793 { "sv", "CP850" },
794 { "sv_SE", "CP850" },
795 { "th", "CP874" },
796 { "th_TH", "CP874" },
797 { "tr", "CP857" },
798 { "tr_TR", "CP857" },
799 { "uk", "CP1125" },
800 { "uk_UA", "CP1125" },
801 { "zh_CN", "GBK" },
802 { "zh_TW", "CP950" } /* not CP938 ?? */
803# define locale_table_defined
8690e634 804# endif
c0c3707f
CB
805# ifndef locale_table_defined
806 /* Just a dummy entry, to avoid a C syntax error. */
807 { "", "" }
4a626d0a 808# endif
c0c3707f 809 };
8690e634 810
c0c3707f 811#endif
5e8754f9 812
8690e634
JK
813
814/* Determine the current locale's character encoding, and canonicalize it
c0c3707f 815 into one of the canonical names listed in localcharset.h.
8690e634
JK
816 The result must not be freed; it is statically allocated.
817 If the canonical name cannot be determined, the result is a non-canonical
818 name. */
819
820#ifdef STATIC
821STATIC
822#endif
823const char *
824locale_charset (void)
825{
826 const char *codeset;
8690e634 827
c0c3707f 828#if HAVE_LANGINFO_CODESET || defined WINDOWS_NATIVE || defined OS2
8690e634
JK
829
830# if HAVE_LANGINFO_CODESET
831
832 /* Most systems support nl_langinfo (CODESET) nowadays. */
833 codeset = nl_langinfo (CODESET);
834
835# ifdef __CYGWIN__
836 /* Cygwin < 1.7 does not have locales. nl_langinfo (CODESET) always
837 returns "US-ASCII". Return the suffix of the locale name from the
838 environment variables (if present) or the codepage as a number. */
839 if (codeset != NULL && strcmp (codeset, "US-ASCII") == 0)
840 {
841 const char *locale;
842 static char buf[2 + 10 + 1];
843
844 locale = getenv ("LC_ALL");
845 if (locale == NULL || locale[0] == '\0')
846 {
847 locale = getenv ("LC_CTYPE");
848 if (locale == NULL || locale[0] == '\0')
849 locale = getenv ("LANG");
850 }
851 if (locale != NULL && locale[0] != '\0')
852 {
853 /* If the locale name contains an encoding after the dot, return
854 it. */
855 const char *dot = strchr (locale, '.');
856
857 if (dot != NULL)
858 {
859 const char *modifier;
860
861 dot++;
862 /* Look for the possible @... trailer and remove it, if any. */
863 modifier = strchr (dot, '@');
864 if (modifier == NULL)
865 return dot;
866 if (modifier - dot < sizeof (buf))
867 {
868 memcpy (buf, dot, modifier - dot);
869 buf [modifier - dot] = '\0';
870 return buf;
871 }
872 }
873 }
874
875 /* The Windows API has a function returning the locale's codepage as a
876 number: GetACP(). This encoding is used by Cygwin, unless the user
877 has set the environment variable CYGWIN=codepage:oem (which very few
878 people do).
879 Output directed to console windows needs to be converted (to
880 GetOEMCP() if the console is using a raster font, or to
881 GetConsoleOutputCP() if it is using a TrueType font). Cygwin does
882 this conversion transparently (see winsup/cygwin/fhandler_console.cc),
883 converting to GetConsoleOutputCP(). This leads to correct results,
884 except when SetConsoleOutputCP has been called and a raster font is
885 in use. */
886 sprintf (buf, "CP%u", GetACP ());
887 codeset = buf;
888 }
889# endif
890
c0c3707f
CB
891 if (codeset == NULL)
892 /* The canonical name cannot be determined. */
893 codeset = "";
5e8754f9 894
c0c3707f 895# elif defined WINDOWS_NATIVE
8690e634
JK
896
897 static char buf[2 + 10 + 1];
898
4a626d0a
PA
899 /* The Windows API has a function returning the locale's codepage as
900 a number, but the value doesn't change according to what the
901 'setlocale' call specified. So we use it as a last resort, in
902 case the string returned by 'setlocale' doesn't specify the
903 codepage. */
904 char *current_locale = setlocale (LC_ALL, NULL);
905 char *pdot;
906
907 /* If they set different locales for different categories,
908 'setlocale' will return a semi-colon separated list of locale
909 values. To make sure we use the correct one, we choose LC_CTYPE. */
910 if (strchr (current_locale, ';'))
911 current_locale = setlocale (LC_CTYPE, NULL);
912
913 pdot = strrchr (current_locale, '.');
c0c3707f 914 if (pdot && 2 + strlen (pdot + 1) + 1 <= sizeof (buf))
4a626d0a
PA
915 sprintf (buf, "CP%s", pdot + 1);
916 else
917 {
918 /* The Windows API has a function returning the locale's codepage as a
919 number: GetACP().
920 When the output goes to a console window, it needs to be provided in
921 GetOEMCP() encoding if the console is using a raster font, or in
922 GetConsoleOutputCP() encoding if it is using a TrueType font.
923 But in GUI programs and for output sent to files and pipes, GetACP()
924 encoding is the best bet. */
925 sprintf (buf, "CP%u", GetACP ());
926 }
c0c3707f
CB
927 /* For a locale name such as "French_France.65001", in Windows 10,
928 setlocale now returns "French_France.utf8" instead. */
929 if (strcmp (buf + 2, "65001") == 0 || strcmp (buf + 2, "utf8") == 0)
930 codeset = "UTF-8";
931 else
932 codeset = buf;
8690e634 933
c0c3707f 934# elif defined OS2
8690e634
JK
935
936 const char *locale;
937 static char buf[2 + 10 + 1];
938 ULONG cp[3];
939 ULONG cplen;
940
4a626d0a
PA
941 codeset = NULL;
942
8690e634
JK
943 /* Allow user to override the codeset, as set in the operating system,
944 with standard language environment variables. */
945 locale = getenv ("LC_ALL");
946 if (locale == NULL || locale[0] == '\0')
947 {
948 locale = getenv ("LC_CTYPE");
949 if (locale == NULL || locale[0] == '\0')
950 locale = getenv ("LANG");
951 }
952 if (locale != NULL && locale[0] != '\0')
953 {
954 /* If the locale name contains an encoding after the dot, return it. */
955 const char *dot = strchr (locale, '.');
956
957 if (dot != NULL)
958 {
959 const char *modifier;
960
961 dot++;
962 /* Look for the possible @... trailer and remove it, if any. */
963 modifier = strchr (dot, '@');
964 if (modifier == NULL)
965 return dot;
966 if (modifier - dot < sizeof (buf))
967 {
968 memcpy (buf, dot, modifier - dot);
969 buf [modifier - dot] = '\0';
970 return buf;
971 }
972 }
973
4a626d0a
PA
974 /* For the POSIX locale, don't use the system's codepage. */
975 if (strcmp (locale, "C") == 0 || strcmp (locale, "POSIX") == 0)
976 codeset = "";
8690e634 977 }
4a626d0a
PA
978
979 if (codeset == NULL)
8690e634
JK
980 {
981 /* OS/2 has a function returning the locale's codepage as a number. */
982 if (DosQueryCp (sizeof (cp), cp, &cplen))
983 codeset = "";
984 else
985 {
986 sprintf (buf, "CP%u", cp[0]);
987 codeset = buf;
988 }
989 }
990
c0c3707f 991# else
7a6dbc2f 992
c0c3707f 993# error "Add code for other platforms here."
7a6dbc2f 994
c0c3707f
CB
995# endif
996
997 /* Resolve alias. */
998 {
999# ifdef alias_table_defined
1000 /* On some platforms, UTF-8 locales are the most frequently used ones.
1001 Speed up the common case and slow down the less common cases by
1002 testing for this case first. */
1003# if defined __OpenBSD__ || (defined __APPLE__ && defined __MACH__) || defined __sun || defined __CYGWIN__
1004 if (strcmp (codeset, "UTF-8") == 0)
1005 goto done_table_lookup;
1006 else
1007# endif
7a6dbc2f 1008 {
c0c3707f
CB
1009 const struct table_entry * const table = alias_table;
1010 size_t const table_size =
1011 sizeof (alias_table) / sizeof (struct table_entry);
1012 /* The table is sorted. Perform a binary search. */
1013 size_t hi = table_size;
1014 size_t lo = 0;
1015 while (lo < hi)
1016 {
1017 /* Invariant:
1018 for i < lo, strcmp (table[i].alias, codeset) < 0,
1019 for i >= hi, strcmp (table[i].alias, codeset) > 0. */
1020 size_t mid = (hi + lo) >> 1; /* >= lo, < hi */
1021 int cmp = strcmp (table[mid].alias, codeset);
1022 if (cmp < 0)
1023 lo = mid + 1;
1024 else if (cmp > 0)
1025 hi = mid;
1026 else
1027 {
1028 /* Found an i with
1029 strcmp (table[i].alias, codeset) == 0. */
1030 codeset = table[mid].canonical;
1031 goto done_table_lookup;
1032 }
1033 }
7a6dbc2f 1034 }
c0c3707f
CB
1035 if (0)
1036 done_table_lookup: ;
1037 else
1038# endif
1039 {
1040 /* Did not find it in the table. */
1041 /* On Mac OS X, all modern locales use the UTF-8 encoding.
1042 BeOS and Haiku have a single locale, and it has UTF-8 encoding. */
1043# if (defined __APPLE__ && defined __MACH__) || defined __BEOS__ || defined __HAIKU__
1044 codeset = "UTF-8";
1045# else
1046 /* Don't return an empty string. GNU libc and GNU libiconv interpret
1047 the empty string as denoting "the locale's character encoding",
1048 thus GNU libiconv would call this function a second time. */
1049 if (codeset[0] == '\0')
1050 codeset = "ASCII";
1051# endif
1052 }
1053 }
7a6dbc2f 1054
c0c3707f
CB
1055#else
1056
1057 /* On old systems which lack it, use setlocale or getenv. */
1058 const char *locale = NULL;
1059
1060 /* But most old systems don't have a complete set of locales. Some
1061 (like DJGPP) have only the C locale. Therefore we don't use setlocale
1062 here; it would return "C" when it doesn't support the locale name the
1063 user has set. */
1064# if 0
1065 locale = setlocale (LC_CTYPE, NULL);
1066# endif
1067 if (locale == NULL || locale[0] == '\0')
1068 {
1069 locale = getenv ("LC_ALL");
1070 if (locale == NULL || locale[0] == '\0')
1071 {
1072 locale = getenv ("LC_CTYPE");
1073 if (locale == NULL || locale[0] == '\0')
1074 locale = getenv ("LANG");
1075 if (locale == NULL)
1076 locale = "";
1077 }
1078 }
1079
1080 /* Map locale name to canonical encoding name. */
1081 {
1082# ifdef locale_table_defined
1083 const struct table_entry * const table = locale_table;
1084 size_t const table_size =
1085 sizeof (locale_table) / sizeof (struct table_entry);
1086 /* The table is sorted. Perform a binary search. */
1087 size_t hi = table_size;
1088 size_t lo = 0;
1089 while (lo < hi)
1090 {
1091 /* Invariant:
1092 for i < lo, strcmp (table[i].locale, locale) < 0,
1093 for i >= hi, strcmp (table[i].locale, locale) > 0. */
1094 size_t mid = (hi + lo) >> 1; /* >= lo, < hi */
1095 int cmp = strcmp (table[mid].locale, locale);
1096 if (cmp < 0)
1097 lo = mid + 1;
1098 else if (cmp > 0)
1099 hi = mid;
1100 else
1101 {
1102 /* Found an i with
1103 strcmp (table[i].locale, locale) == 0. */
1104 codeset = table[mid].canonical;
1105 goto done_table_lookup;
1106 }
1107 }
1108 if (0)
1109 done_table_lookup: ;
1110 else
1111# endif
1112 {
1113 /* Did not find it in the table. */
1114 /* On Mac OS X, all modern locales use the UTF-8 encoding.
1115 BeOS and Haiku have a single locale, and it has UTF-8 encoding. */
1116# if (defined __APPLE__ && defined __MACH__) || defined __BEOS__ || defined __HAIKU__
1117 codeset = "UTF-8";
1118# else
1119 /* The canonical name cannot be determined. */
1120 /* Don't return an empty string. GNU libc and GNU libiconv interpret
1121 the empty string as denoting "the locale's character encoding",
1122 thus GNU libiconv would call this function a second time. */
1123 codeset = "ASCII";
1124# endif
1125 }
1126 }
1127
1128#endif
8690e634 1129
a512b375
JB
1130#ifdef DARWIN7
1131 /* Mac OS X sets MB_CUR_MAX to 1 when LC_ALL=C, and "UTF-8"
1132 (the default codeset) does not work when MB_CUR_MAX is 1. */
4a626d0a 1133 if (strcmp (codeset, "UTF-8") == 0 && MB_CUR_MAX_L (uselocale (NULL)) <= 1)
a512b375
JB
1134 codeset = "ASCII";
1135#endif
1136
8690e634
JK
1137 return codeset;
1138}
This page took 0.635249 seconds and 4 git commands to generate.