* pex-unix.c (pex_unix_exec_child): Save/restore environ.
[deliverable/binutils-gdb.git] / gdb / charset.c
CommitLineData
234b45d4 1/* Character set conversion support for GDB.
1bac305b 2
0fb0cc75 3 Copyright (C) 2001, 2003, 2007, 2008, 2009 Free Software Foundation, Inc.
234b45d4
KB
4
5 This file is part of GDB.
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
a9762ec7 9 the Free Software Foundation; either version 3 of the License, or
234b45d4
KB
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
a9762ec7 18 along with this program. If not, see <http://www.gnu.org/licenses/>. */
234b45d4
KB
19
20#include "defs.h"
21#include "charset.h"
22#include "gdbcmd.h"
23#include "gdb_assert.h"
6c7a06a3 24#include "gdb_obstack.h"
732f6a93 25#include "gdb_wait.h"
6c7a06a3
TT
26#include "charset-list.h"
27#include "vec.h"
234b45d4
KB
28
29#include <stddef.h>
4ef3f3be 30#include "gdb_string.h"
234b45d4
KB
31#include <ctype.h>
32
234b45d4
KB
33\f
34/* How GDB's character set support works
35
6c7a06a3 36 GDB has three global settings:
234b45d4
KB
37
38 - The `current host character set' is the character set GDB should
39 use in talking to the user, and which (hopefully) the user's
6c7a06a3
TT
40 terminal knows how to display properly. Most users should not
41 change this.
234b45d4
KB
42
43 - The `current target character set' is the character set the
44 program being debugged uses.
45
6c7a06a3
TT
46 - The `current target wide character set' is the wide character set
47 the program being debugged uses, that is, the encoding used for
48 wchar_t.
49
234b45d4
KB
50 There are commands to set each of these, and mechanisms for
51 choosing reasonable default values. GDB has a global list of
52 character sets that it can use as its host or target character
53 sets.
54
55 The header file `charset.h' declares various functions that
56 different pieces of GDB need to perform tasks like:
57
58 - printing target strings and characters to the user's terminal
59 (mostly target->host conversions),
60
61 - building target-appropriate representations of strings and
62 characters the user enters in expressions (mostly host->target
63 conversions),
64
6c7a06a3
TT
65 and so on.
66
67 To avoid excessive code duplication and maintenance efforts,
68 GDB simply requires a capable iconv function. Users on platforms
69 without a suitable iconv can use the GNU iconv library. */
234b45d4
KB
70
71\f
6c7a06a3 72#ifdef PHONY_ICONV
234b45d4 73
6c7a06a3
TT
74/* Provide a phony iconv that does as little as possible. Also,
75 arrange for there to be a single available character set. */
234b45d4 76
6c7a06a3
TT
77#undef GDB_DEFAULT_HOST_CHARSET
78#define GDB_DEFAULT_HOST_CHARSET "ISO-8859-1"
79#define GDB_DEFAULT_TARGET_CHARSET "ISO-8859-1"
80#define GDB_DEFAULT_TARGET_WIDE_CHARSET "ISO-8859-1"
81#undef DEFAULT_CHARSET_NAMES
82#define DEFAULT_CHARSET_NAMES GDB_DEFAULT_HOST_CHARSET ,
83
84#undef iconv_t
85#define iconv_t int
86#undef iconv_open
87#undef iconv
88#undef iconv_close
89
0dd7fb99
TT
90#undef ICONV_CONST
91#define ICONV_CONST const
92
76208fec 93/* Some systems don't have EILSEQ, so we define it here, but not as
e726d784
EZ
94 EINVAL, because callers of `iconv' want to distinguish EINVAL and
95 EILSEQ. This is what iconv.h from libiconv does as well. Note
96 that wchar.h may also define EILSEQ, so this needs to be after we
97 include wchar.h, which happens in defs.h through gdb_wchar.h. */
98#ifndef EILSEQ
99#define EILSEQ ENOENT
100#endif
101
6c7a06a3
TT
102iconv_t
103iconv_open (const char *to, const char *from)
104{
b8899f2b 105 /* We allow conversions from UTF-32BE, wchar_t, and the host charset.
6c7a06a3 106 We allow conversions to wchar_t and the host charset. */
b8899f2b 107 if (strcmp (from, "UTF-32BE") && strcmp (from, "wchar_t")
6c7a06a3
TT
108 && strcmp (from, GDB_DEFAULT_HOST_CHARSET))
109 return -1;
110 if (strcmp (to, "wchar_t") && strcmp (to, GDB_DEFAULT_HOST_CHARSET))
111 return -1;
234b45d4 112
b8899f2b 113 /* Return 1 if we are converting from UTF-32BE, 0 otherwise. This is
6c7a06a3 114 used as a flag in calls to iconv. */
b8899f2b 115 return !strcmp (from, "UTF-32BE");
6c7a06a3 116}
234b45d4 117
6c7a06a3
TT
118int
119iconv_close (iconv_t arg)
120{
121 return 0;
122}
234b45d4 123
6c7a06a3 124size_t
b8899f2b 125iconv (iconv_t utf_flag, const char **inbuf, size_t *inbytesleft,
6c7a06a3
TT
126 char **outbuf, size_t *outbytesleft)
127{
b8899f2b 128 if (utf_flag)
6c7a06a3
TT
129 {
130 while (*inbytesleft >= 4)
131 {
132 size_t j;
133 unsigned long c = 0;
134
135 for (j = 0; j < 4; ++j)
136 {
137 c <<= 8;
138 c += (*inbuf)[j] & 0xff;
139 }
140
141 if (c >= 256)
142 {
143 errno = EILSEQ;
144 return -1;
145 }
146 **outbuf = c & 0xff;
147 ++*outbuf;
148 --*outbytesleft;
149
150 ++*inbuf;
151 *inbytesleft -= 4;
152 }
153 if (*inbytesleft < 4)
154 {
155 errno = EINVAL;
156 return -1;
157 }
158 }
159 else
160 {
161 /* In all other cases we simply copy input bytes to the
162 output. */
163 size_t amt = *inbytesleft;
164 if (amt > *outbytesleft)
165 amt = *outbytesleft;
166 memcpy (*outbuf, *inbuf, amt);
167 *inbuf += amt;
168 *outbuf += amt;
169 *inbytesleft -= amt;
170 *outbytesleft -= amt;
171 }
234b45d4 172
6c7a06a3
TT
173 if (*inbytesleft)
174 {
175 errno = E2BIG;
176 return -1;
177 }
234b45d4 178
6c7a06a3
TT
179 /* The number of non-reversible conversions -- but they were all
180 reversible. */
181 return 0;
182}
234b45d4 183
6c7a06a3 184#endif
234b45d4
KB
185
186
187\f
188/* The global lists of character sets and translations. */
189
190
e33d66ec
EZ
191#ifndef GDB_DEFAULT_TARGET_CHARSET
192#define GDB_DEFAULT_TARGET_CHARSET "ISO-8859-1"
193#endif
194
6c7a06a3 195#ifndef GDB_DEFAULT_TARGET_WIDE_CHARSET
b8899f2b 196#define GDB_DEFAULT_TARGET_WIDE_CHARSET "UTF-32"
6c7a06a3
TT
197#endif
198
199static const char *auto_host_charset_name = GDB_DEFAULT_HOST_CHARSET;
200static const char *host_charset_name = "auto";
920d2a44
AC
201static void
202show_host_charset_name (struct ui_file *file, int from_tty,
203 struct cmd_list_element *c,
204 const char *value)
205{
6c7a06a3
TT
206 if (!strcmp (value, "auto"))
207 fprintf_filtered (file,
208 _("The host character set is \"auto; currently %s\".\n"),
209 auto_host_charset_name);
210 else
211 fprintf_filtered (file, _("The host character set is \"%s\".\n"), value);
920d2a44
AC
212}
213
e33d66ec 214static const char *target_charset_name = GDB_DEFAULT_TARGET_CHARSET;
920d2a44
AC
215static void
216show_target_charset_name (struct ui_file *file, int from_tty,
217 struct cmd_list_element *c, const char *value)
218{
219 fprintf_filtered (file, _("The target character set is \"%s\".\n"),
220 value);
221}
222
6c7a06a3
TT
223static const char *target_wide_charset_name = GDB_DEFAULT_TARGET_WIDE_CHARSET;
224static void
225show_target_wide_charset_name (struct ui_file *file, int from_tty,
226 struct cmd_list_element *c, const char *value)
e33d66ec 227{
6c7a06a3
TT
228 fprintf_filtered (file, _("The target wide character set is \"%s\".\n"),
229 value);
230}
e33d66ec 231
6c7a06a3 232static const char *default_charset_names[] =
e33d66ec 233{
6c7a06a3 234 DEFAULT_CHARSET_NAMES
e33d66ec
EZ
235 0
236};
234b45d4 237
6c7a06a3 238static const char **charset_enum;
234b45d4 239
6c7a06a3
TT
240\f
241/* If the target wide character set has big- or little-endian
242 variants, these are the corresponding names. */
243static const char *target_wide_charset_be_name;
244static const char *target_wide_charset_le_name;
234b45d4 245
6c7a06a3
TT
246/* A helper function for validate which sets the target wide big- and
247 little-endian character set names, if possible. */
234b45d4 248
6c7a06a3
TT
249static void
250set_be_le_names (void)
234b45d4 251{
6c7a06a3 252 int i, len;
234b45d4 253
6c7a06a3
TT
254 target_wide_charset_le_name = NULL;
255 target_wide_charset_be_name = NULL;
234b45d4 256
6c7a06a3
TT
257 len = strlen (target_wide_charset_name);
258 for (i = 0; charset_enum[i]; ++i)
259 {
260 if (strncmp (target_wide_charset_name, charset_enum[i], len))
261 continue;
262 if ((charset_enum[i][len] == 'B'
263 || charset_enum[i][len] == 'L')
264 && charset_enum[i][len + 1] == 'E'
265 && charset_enum[i][len + 2] == '\0')
266 {
267 if (charset_enum[i][len] == 'B')
268 target_wide_charset_be_name = charset_enum[i];
269 else
270 target_wide_charset_le_name = charset_enum[i];
271 }
272 }
234b45d4
KB
273}
274
6c7a06a3
TT
275/* 'Set charset', 'set host-charset', 'set target-charset', 'set
276 target-wide-charset', 'set charset' sfunc's. */
234b45d4
KB
277
278static void
6c7a06a3 279validate (void)
234b45d4 280{
6c7a06a3
TT
281 iconv_t desc;
282 const char *host_cset = host_charset ();
234b45d4 283
6c7a06a3
TT
284 desc = iconv_open (target_wide_charset_name, host_cset);
285 if (desc == (iconv_t) -1)
286 error ("Cannot convert between character sets `%s' and `%s'",
287 target_wide_charset_name, host_cset);
288 iconv_close (desc);
234b45d4 289
6c7a06a3
TT
290 desc = iconv_open (target_charset_name, host_cset);
291 if (desc == (iconv_t) -1)
292 error ("Cannot convert between character sets `%s' and `%s'",
293 target_charset_name, host_cset);
294 iconv_close (desc);
234b45d4 295
6c7a06a3 296 set_be_le_names ();
234b45d4
KB
297}
298
6c7a06a3
TT
299/* This is the sfunc for the 'set charset' command. */
300static void
301set_charset_sfunc (char *charset, int from_tty, struct cmd_list_element *c)
234b45d4 302{
6c7a06a3
TT
303 /* CAREFUL: set the target charset here as well. */
304 target_charset_name = host_charset_name;
305 validate ();
234b45d4
KB
306}
307
6c7a06a3
TT
308/* 'set host-charset' command sfunc. We need a wrapper here because
309 the function needs to have a specific signature. */
310static void
311set_host_charset_sfunc (char *charset, int from_tty,
312 struct cmd_list_element *c)
234b45d4 313{
6c7a06a3 314 validate ();
234b45d4
KB
315}
316
6c7a06a3
TT
317/* Wrapper for the 'set target-charset' command. */
318static void
319set_target_charset_sfunc (char *charset, int from_tty,
320 struct cmd_list_element *c)
234b45d4 321{
6c7a06a3 322 validate ();
234b45d4
KB
323}
324
6c7a06a3
TT
325/* Wrapper for the 'set target-wide-charset' command. */
326static void
327set_target_wide_charset_sfunc (char *charset, int from_tty,
328 struct cmd_list_element *c)
234b45d4 329{
6c7a06a3 330 validate ();
234b45d4
KB
331}
332
6c7a06a3
TT
333/* sfunc for the 'show charset' command. */
334static void
335show_charset (struct ui_file *file, int from_tty, struct cmd_list_element *c,
336 const char *name)
234b45d4 337{
6c7a06a3
TT
338 show_host_charset_name (file, from_tty, c, host_charset_name);
339 show_target_charset_name (file, from_tty, c, target_charset_name);
340 show_target_wide_charset_name (file, from_tty, c, target_wide_charset_name);
234b45d4
KB
341}
342
234b45d4 343\f
6c7a06a3 344/* Accessor functions. */
234b45d4 345
6c7a06a3
TT
346const char *
347host_charset (void)
234b45d4 348{
6c7a06a3
TT
349 if (!strcmp (host_charset_name, "auto"))
350 return auto_host_charset_name;
351 return host_charset_name;
234b45d4
KB
352}
353
6c7a06a3
TT
354const char *
355target_charset (void)
234b45d4 356{
6c7a06a3 357 return target_charset_name;
234b45d4 358}
234b45d4 359
6c7a06a3 360const char *
e17a4113 361target_wide_charset (enum bfd_endian byte_order)
234b45d4 362{
e17a4113 363 if (byte_order == BFD_ENDIAN_BIG)
234b45d4 364 {
6c7a06a3
TT
365 if (target_wide_charset_be_name)
366 return target_wide_charset_be_name;
234b45d4 367 }
6c7a06a3 368 else
234b45d4 369 {
6c7a06a3
TT
370 if (target_wide_charset_le_name)
371 return target_wide_charset_le_name;
234b45d4
KB
372 }
373
6c7a06a3 374 return target_wide_charset_name;
234b45d4
KB
375}
376
234b45d4 377\f
6c7a06a3
TT
378/* Host character set management. For the time being, we assume that
379 the host character set is some superset of ASCII. */
234b45d4 380
6c7a06a3
TT
381char
382host_letter_to_control_character (char c)
234b45d4 383{
6c7a06a3
TT
384 if (c == '?')
385 return 0177;
386 return c & 0237;
234b45d4
KB
387}
388
6c7a06a3
TT
389/* Convert a host character, C, to its hex value. C must already have
390 been validated using isxdigit. */
234b45d4 391
6c7a06a3
TT
392int
393host_hex_value (char c)
234b45d4 394{
6c7a06a3
TT
395 if (isdigit (c))
396 return c - '0';
397 if (c >= 'a' && c <= 'f')
398 return 10 + c - 'a';
399 gdb_assert (c >= 'A' && c <= 'F');
400 return 10 + c - 'A';
234b45d4
KB
401}
402
234b45d4 403\f
6c7a06a3 404/* Public character management functions. */
234b45d4 405
6c7a06a3 406/* A cleanup function which is run to close an iconv descriptor. */
234b45d4 407
6c7a06a3
TT
408static void
409cleanup_iconv (void *p)
234b45d4 410{
6c7a06a3
TT
411 iconv_t *descp = p;
412 iconv_close (*descp);
234b45d4
KB
413}
414
6c7a06a3
TT
415void
416convert_between_encodings (const char *from, const char *to,
417 const gdb_byte *bytes, unsigned int num_bytes,
418 int width, struct obstack *output,
419 enum transliterations translit)
420{
421 iconv_t desc;
422 struct cleanup *cleanups;
423 size_t inleft;
424 char *inp;
425 unsigned int space_request;
426
427 /* Often, the host and target charsets will be the same. */
428 if (!strcmp (from, to))
429 {
430 obstack_grow (output, bytes, num_bytes);
431 return;
432 }
234b45d4 433
6c7a06a3
TT
434 desc = iconv_open (to, from);
435 if (desc == (iconv_t) -1)
436 perror_with_name ("Converting character sets");
437 cleanups = make_cleanup (cleanup_iconv, &desc);
234b45d4 438
6c7a06a3
TT
439 inleft = num_bytes;
440 inp = (char *) bytes;
234b45d4 441
6c7a06a3 442 space_request = num_bytes;
234b45d4 443
6c7a06a3 444 while (inleft > 0)
234b45d4 445 {
6c7a06a3
TT
446 char *outp;
447 size_t outleft, r;
448 int old_size;
449
450 old_size = obstack_object_size (output);
451 obstack_blank (output, space_request);
452
453 outp = obstack_base (output) + old_size;
454 outleft = space_request;
455
0dd7fb99 456 r = iconv (desc, (ICONV_CONST char **) &inp, &inleft, &outp, &outleft);
6c7a06a3
TT
457
458 /* Now make sure that the object on the obstack only includes
459 bytes we have converted. */
460 obstack_blank (output, - (int) outleft);
461
462 if (r == (size_t) -1)
463 {
464 switch (errno)
465 {
466 case EILSEQ:
467 {
468 int i;
469
470 /* Invalid input sequence. */
471 if (translit == translit_none)
472 error (_("Could not convert character to `%s' character set"),
473 to);
474
475 /* We emit escape sequence for the bytes, skip them,
476 and try again. */
477 for (i = 0; i < width; ++i)
478 {
479 char octal[5];
480
481 sprintf (octal, "\\%.3o", *inp & 0xff);
482 obstack_grow_str (output, octal);
483
484 ++inp;
485 --inleft;
486 }
487 }
488 break;
489
490 case E2BIG:
491 /* We ran out of space in the output buffer. Make it
492 bigger next time around. */
493 space_request *= 2;
494 break;
495
496 case EINVAL:
497 /* Incomplete input sequence. FIXME: ought to report this
498 to the caller somehow. */
499 inleft = 0;
500 break;
501
502 default:
503 perror_with_name ("Internal error while converting character sets");
504 }
505 }
234b45d4 506 }
234b45d4 507
6c7a06a3 508 do_cleanups (cleanups);
234b45d4
KB
509}
510
e33d66ec 511\f
e33d66ec 512
6c7a06a3
TT
513/* An iterator that returns host wchar_t's from a target string. */
514struct wchar_iterator
e33d66ec 515{
6c7a06a3
TT
516 /* The underlying iconv descriptor. */
517 iconv_t desc;
e33d66ec 518
6c7a06a3
TT
519 /* The input string. This is updated as convert characters. */
520 char *input;
521 /* The number of bytes remaining in the input. */
522 size_t bytes;
e33d66ec 523
6c7a06a3
TT
524 /* The width of an input character. */
525 size_t width;
e33d66ec 526
6c7a06a3
TT
527 /* The output buffer and its size. */
528 gdb_wchar_t *out;
529 size_t out_size;
530};
234b45d4 531
6c7a06a3
TT
532/* Create a new iterator. */
533struct wchar_iterator *
534make_wchar_iterator (const gdb_byte *input, size_t bytes, const char *charset,
535 size_t width)
234b45d4 536{
6c7a06a3
TT
537 struct wchar_iterator *result;
538 iconv_t desc;
234b45d4 539
732f6a93 540 desc = iconv_open (INTERMEDIATE_ENCODING, charset);
6c7a06a3
TT
541 if (desc == (iconv_t) -1)
542 perror_with_name ("Converting character sets");
234b45d4 543
6c7a06a3
TT
544 result = XNEW (struct wchar_iterator);
545 result->desc = desc;
546 result->input = (char *) input;
547 result->bytes = bytes;
548 result->width = width;
234b45d4 549
6c7a06a3
TT
550 result->out = XNEW (gdb_wchar_t);
551 result->out_size = 1;
234b45d4 552
6c7a06a3 553 return result;
e33d66ec 554}
234b45d4 555
e33d66ec 556static void
6c7a06a3 557do_cleanup_iterator (void *p)
e33d66ec 558{
6c7a06a3 559 struct wchar_iterator *iter = p;
234b45d4 560
6c7a06a3
TT
561 iconv_close (iter->desc);
562 xfree (iter->out);
563 xfree (iter);
234b45d4
KB
564}
565
6c7a06a3
TT
566struct cleanup *
567make_cleanup_wchar_iterator (struct wchar_iterator *iter)
e33d66ec 568{
6c7a06a3 569 return make_cleanup (do_cleanup_iterator, iter);
e33d66ec 570}
234b45d4 571
6c7a06a3
TT
572int
573wchar_iterate (struct wchar_iterator *iter,
574 enum wchar_iterate_result *out_result,
575 gdb_wchar_t **out_chars,
576 const gdb_byte **ptr,
577 size_t *len)
578{
579 size_t out_request;
580
581 /* Try to convert some characters. At first we try to convert just
582 a single character. The reason for this is that iconv does not
583 necessarily update its outgoing arguments when it encounters an
584 invalid input sequence -- but we want to reliably report this to
585 our caller so it can emit an escape sequence. */
586 out_request = 1;
587 while (iter->bytes > 0)
e33d66ec 588 {
6c7a06a3
TT
589 char *outptr = (char *) &iter->out[0];
590 char *orig_inptr = iter->input;
591 size_t orig_in = iter->bytes;
592 size_t out_avail = out_request * sizeof (gdb_wchar_t);
593 size_t num;
594 gdb_wchar_t result;
595
0dd7fb99
TT
596 size_t r = iconv (iter->desc,
597 (ICONV_CONST char **) &iter->input, &iter->bytes,
6c7a06a3
TT
598 &outptr, &out_avail);
599 if (r == (size_t) -1)
600 {
601 switch (errno)
602 {
603 case EILSEQ:
604 /* Invalid input sequence. Skip it, and let the caller
605 know about it. */
606 *out_result = wchar_iterate_invalid;
607 *ptr = iter->input;
608 *len = iter->width;
609 iter->input += iter->width;
610 iter->bytes -= iter->width;
611 return 0;
612
613 case E2BIG:
614 /* We ran out of space. We still might have converted a
615 character; if so, return it. Otherwise, grow the
616 buffer and try again. */
617 if (out_avail < out_request * sizeof (gdb_wchar_t))
618 break;
619
620 ++out_request;
621 if (out_request > iter->out_size)
622 {
623 iter->out_size = out_request;
624 iter->out = xrealloc (iter->out,
625 out_request * sizeof (gdb_wchar_t));
626 }
627 continue;
628
629 case EINVAL:
630 /* Incomplete input sequence. Let the caller know, and
631 arrange for future calls to see EOF. */
632 *out_result = wchar_iterate_incomplete;
633 *ptr = iter->input;
634 *len = iter->bytes;
635 iter->bytes = 0;
636 return 0;
637
638 default:
639 perror_with_name ("Internal error while converting character sets");
640 }
641 }
642
643 /* We converted something. */
644 num = out_request - out_avail / sizeof (gdb_wchar_t);
645 *out_result = wchar_iterate_ok;
646 *out_chars = iter->out;
647 *ptr = orig_inptr;
648 *len = orig_in - iter->bytes;
649 return num;
e33d66ec 650 }
6c7a06a3
TT
651
652 /* Really done. */
653 *out_result = wchar_iterate_eof;
654 return -1;
234b45d4
KB
655}
656
e33d66ec 657\f
6c7a06a3 658/* The charset.c module initialization function. */
234b45d4 659
6c7a06a3 660extern initialize_file_ftype _initialize_charset; /* -Wmissing-prototype */
234b45d4 661
6c7a06a3
TT
662typedef char *char_ptr;
663DEF_VEC_P (char_ptr);
234b45d4 664
6c7a06a3 665static VEC (char_ptr) *charsets;
234b45d4 666
6c7a06a3 667#ifdef PHONY_ICONV
234b45d4 668
6c7a06a3
TT
669static void
670find_charset_names (void)
234b45d4 671{
6c7a06a3
TT
672 VEC_safe_push (char_ptr, charsets, GDB_DEFAULT_HOST_CHARSET);
673 VEC_safe_push (char_ptr, charsets, NULL);
234b45d4
KB
674}
675
6c7a06a3 676#else /* PHONY_ICONV */
fc3b640d
TT
677
678/* Sometimes, libiconv redefines iconvlist as libiconvlist -- but
679 provides different symbols in the static and dynamic libraries.
680 So, configure may see libiconvlist but not iconvlist. But, calling
681 iconvlist is the right thing to do and will work. Hence we do a
682 check here but unconditionally call iconvlist below. */
683#if defined (HAVE_ICONVLIST) || defined (HAVE_LIBICONVLIST)
234b45d4 684
6c7a06a3
TT
685/* A helper function that adds some character sets to the vector of
686 all character sets. This is a callback function for iconvlist. */
687
688static int
689add_one (unsigned int count, const char *const *names, void *data)
234b45d4 690{
6c7a06a3 691 unsigned int i;
234b45d4 692
6c7a06a3
TT
693 for (i = 0; i < count; ++i)
694 VEC_safe_push (char_ptr, charsets, xstrdup (names[i]));
234b45d4 695
6c7a06a3 696 return 0;
234b45d4
KB
697}
698
6c7a06a3
TT
699static void
700find_charset_names (void)
234b45d4 701{
6c7a06a3
TT
702 iconvlist (add_one, NULL);
703 VEC_safe_push (char_ptr, charsets, NULL);
234b45d4
KB
704}
705
6c7a06a3 706#else
234b45d4 707
6c7a06a3
TT
708static void
709find_charset_names (void)
234b45d4 710{
732f6a93
TT
711 struct pex_obj *child;
712 char *args[3];
713 int err, status;
714 int fail = 1;
715
716 child = pex_init (0, "iconv", NULL);
717
718 args[0] = "iconv";
719 args[1] = "-l";
720 args[2] = NULL;
721 /* Note that we simply ignore errors here. */
722 if (!pex_run (child, PEX_SEARCH | PEX_STDERR_TO_STDOUT, "iconv",
723 args, NULL, NULL, &err))
724 {
725 FILE *in = pex_read_output (child, 0);
726
727 /* POSIX says that iconv -l uses an unspecified format. We
728 parse the glibc and libiconv formats; feel free to add others
729 as needed. */
730 while (!feof (in))
731 {
732 /* The size of buf is chosen arbitrarily. */
733 char buf[1024];
734 char *start, *r;
735 int len, keep_going;
736
737 r = fgets (buf, sizeof (buf), in);
738 if (!r)
739 break;
740 len = strlen (r);
741 if (len <= 3)
742 continue;
743 /* Strip off the newline. */
744 --len;
745 /* Strip off one or two '/'s. glibc will print lines like
746 "8859_7//", but also "10646-1:1993/UCS4/". */
747 if (buf[len - 1] == '/')
748 --len;
749 if (buf[len - 1] == '/')
750 --len;
751 buf[len] = '\0';
752
753 /* libiconv will print multiple entries per line, separated
754 by spaces. */
755 start = buf;
756 while (1)
757 {
758 int keep_going;
759 char *p;
760
761 /* Find the next space, or end-of-line. */
762 for (p = start; *p && *p != ' '; ++p)
763 ;
764 /* Ignore an empty result. */
765 if (p == start)
766 break;
767 keep_going = *p;
768 *p = '\0';
769 VEC_safe_push (char_ptr, charsets, xstrdup (start));
770 if (!keep_going)
771 break;
772 /* Skip any extra spaces. */
773 for (start = p + 1; *start && *start == ' '; ++start)
774 ;
775 }
776 }
234b45d4 777
732f6a93
TT
778 if (pex_get_status (child, 1, &status)
779 && WIFEXITED (status) && !WEXITSTATUS (status))
780 fail = 0;
234b45d4 781
6c7a06a3 782 }
234b45d4 783
732f6a93 784 pex_free (child);
234b45d4 785
732f6a93
TT
786 if (fail)
787 {
788 /* Some error occurred, so drop the vector. */
789 int ix;
790 char *elt;
791 for (ix = 0; VEC_iterate (char_ptr, charsets, ix, elt); ++ix)
792 xfree (elt);
793 VEC_truncate (char_ptr, charsets, 0);
794 }
795 else
796 VEC_safe_push (char_ptr, charsets, NULL);
6c7a06a3 797}
234b45d4 798
fc3b640d 799#endif /* HAVE_ICONVLIST || HAVE_LIBICONVLIST */
6c7a06a3 800#endif /* PHONY_ICONV */
234b45d4
KB
801
802void
803_initialize_charset (void)
804{
e33d66ec
EZ
805 struct cmd_list_element *new_cmd;
806
6c7a06a3
TT
807 /* The first element is always "auto"; then we skip it for the
808 commands where it is not allowed. */
732f6a93 809 VEC_safe_push (char_ptr, charsets, xstrdup ("auto"));
6c7a06a3
TT
810 find_charset_names ();
811
812 if (VEC_length (char_ptr, charsets) > 1)
813 charset_enum = (const char **) VEC_address (char_ptr, charsets);
814 else
815 charset_enum = default_charset_names;
816
817#ifndef PHONY_ICONV
818#ifdef HAVE_LANGINFO_CODESET
819 auto_host_charset_name = nl_langinfo (CODESET);
58720494 820 /* Solaris will return `646' here -- but the Solaris iconv then
06be6983
TG
821 does not accept this. Darwin (and maybe FreeBSD) may return "" here,
822 which GNU libiconv doesn't like (infinite loop). */
823 if (!strcmp (auto_host_charset_name, "646") || !*auto_host_charset_name)
58720494 824 auto_host_charset_name = "ASCII";
6c7a06a3
TT
825 target_charset_name = auto_host_charset_name;
826
827 set_be_le_names ();
828#endif
829#endif
e33d66ec 830
7ab04401 831 add_setshow_enum_cmd ("charset", class_support,
6c7a06a3 832 &charset_enum[1], &host_charset_name, _("\
7ab04401
AC
833Set the host and target character sets."), _("\
834Show the host and target character sets."), _("\
3d263c1d
BI
835The `host character set' is the one used by the system GDB is running on.\n\
836The `target character set' is the one used by the program being debugged.\n\
837You may only use supersets of ASCII for your host character set; GDB does\n\
838not support any others.\n\
839To see a list of the character sets GDB supports, type `set charset <TAB>'."),
7ab04401
AC
840 /* Note that the sfunc below needs to set
841 target_charset_name, because the 'set
842 charset' command sets two variables. */
843 set_charset_sfunc,
844 show_charset,
845 &setlist, &showlist);
846
847 add_setshow_enum_cmd ("host-charset", class_support,
6c7a06a3 848 charset_enum, &host_charset_name, _("\
7ab04401
AC
849Set the host character set."), _("\
850Show the host character set."), _("\
3d263c1d
BI
851The `host character set' is the one used by the system GDB is running on.\n\
852You may only use supersets of ASCII for your host character set; GDB does\n\
853not support any others.\n\
854To see a list of the character sets GDB supports, type `set host-charset <TAB>'."),
7ab04401 855 set_host_charset_sfunc,
920d2a44 856 show_host_charset_name,
7ab04401
AC
857 &setlist, &showlist);
858
859 add_setshow_enum_cmd ("target-charset", class_support,
6c7a06a3 860 &charset_enum[1], &target_charset_name, _("\
7ab04401
AC
861Set the target character set."), _("\
862Show the target character set."), _("\
3d263c1d
BI
863The `target character set' is the one used by the program being debugged.\n\
864GDB translates characters and strings between the host and target\n\
865character sets as needed.\n\
866To see a list of the character sets GDB supports, type `set target-charset'<TAB>"),
7ab04401 867 set_target_charset_sfunc,
920d2a44 868 show_target_charset_name,
7ab04401 869 &setlist, &showlist);
6c7a06a3
TT
870
871 add_setshow_enum_cmd ("target-wide-charset", class_support,
872 &charset_enum[1], &target_wide_charset_name,
873 _("\
874Set the target wide character set."), _("\
875Show the target wide character set."), _("\
876The `target wide character set' is the one used by the program being debugged.\n\
877In particular it is the encoding used by `wchar_t'.\n\
878GDB translates characters and strings between the host and target\n\
879character sets as needed.\n\
880To see a list of the character sets GDB supports, type\n\
881`set target-wide-charset'<TAB>"),
882 set_target_wide_charset_sfunc,
883 show_target_wide_charset_name,
884 &setlist, &showlist);
234b45d4 885}
This page took 0.646013 seconds and 4 git commands to generate.