2009-12-15 Tristan Gingold <gingold@adacore.com>
[deliverable/binutils-gdb.git] / gdb / charset.c
CommitLineData
234b45d4 1/* Character set conversion support for GDB.
1bac305b 2
0fb0cc75 3 Copyright (C) 2001, 2003, 2007, 2008, 2009 Free Software Foundation, Inc.
234b45d4
KB
4
5 This file is part of GDB.
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
a9762ec7 9 the Free Software Foundation; either version 3 of the License, or
234b45d4
KB
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
a9762ec7 18 along with this program. If not, see <http://www.gnu.org/licenses/>. */
234b45d4
KB
19
20#include "defs.h"
21#include "charset.h"
22#include "gdbcmd.h"
23#include "gdb_assert.h"
6c7a06a3 24#include "gdb_obstack.h"
732f6a93 25#include "gdb_wait.h"
6c7a06a3
TT
26#include "charset-list.h"
27#include "vec.h"
40b5c9fb 28#include "environ.h"
234b45d4
KB
29
30#include <stddef.h>
4ef3f3be 31#include "gdb_string.h"
234b45d4
KB
32#include <ctype.h>
33
234b45d4
KB
34\f
35/* How GDB's character set support works
36
6c7a06a3 37 GDB has three global settings:
234b45d4
KB
38
39 - The `current host character set' is the character set GDB should
40 use in talking to the user, and which (hopefully) the user's
6c7a06a3
TT
41 terminal knows how to display properly. Most users should not
42 change this.
234b45d4
KB
43
44 - The `current target character set' is the character set the
45 program being debugged uses.
46
6c7a06a3
TT
47 - The `current target wide character set' is the wide character set
48 the program being debugged uses, that is, the encoding used for
49 wchar_t.
50
234b45d4
KB
51 There are commands to set each of these, and mechanisms for
52 choosing reasonable default values. GDB has a global list of
53 character sets that it can use as its host or target character
54 sets.
55
56 The header file `charset.h' declares various functions that
57 different pieces of GDB need to perform tasks like:
58
59 - printing target strings and characters to the user's terminal
60 (mostly target->host conversions),
61
62 - building target-appropriate representations of strings and
63 characters the user enters in expressions (mostly host->target
64 conversions),
65
6c7a06a3
TT
66 and so on.
67
68 To avoid excessive code duplication and maintenance efforts,
69 GDB simply requires a capable iconv function. Users on platforms
70 without a suitable iconv can use the GNU iconv library. */
234b45d4
KB
71
72\f
6c7a06a3 73#ifdef PHONY_ICONV
234b45d4 74
6c7a06a3
TT
75/* Provide a phony iconv that does as little as possible. Also,
76 arrange for there to be a single available character set. */
234b45d4 77
6c7a06a3
TT
78#undef GDB_DEFAULT_HOST_CHARSET
79#define GDB_DEFAULT_HOST_CHARSET "ISO-8859-1"
80#define GDB_DEFAULT_TARGET_CHARSET "ISO-8859-1"
81#define GDB_DEFAULT_TARGET_WIDE_CHARSET "ISO-8859-1"
82#undef DEFAULT_CHARSET_NAMES
83#define DEFAULT_CHARSET_NAMES GDB_DEFAULT_HOST_CHARSET ,
84
85#undef iconv_t
86#define iconv_t int
87#undef iconv_open
88#undef iconv
89#undef iconv_close
90
0dd7fb99
TT
91#undef ICONV_CONST
92#define ICONV_CONST const
93
76208fec 94/* Some systems don't have EILSEQ, so we define it here, but not as
e726d784
EZ
95 EINVAL, because callers of `iconv' want to distinguish EINVAL and
96 EILSEQ. This is what iconv.h from libiconv does as well. Note
97 that wchar.h may also define EILSEQ, so this needs to be after we
98 include wchar.h, which happens in defs.h through gdb_wchar.h. */
99#ifndef EILSEQ
100#define EILSEQ ENOENT
101#endif
102
6c7a06a3
TT
103iconv_t
104iconv_open (const char *to, const char *from)
105{
b8899f2b 106 /* We allow conversions from UTF-32BE, wchar_t, and the host charset.
6c7a06a3 107 We allow conversions to wchar_t and the host charset. */
b8899f2b 108 if (strcmp (from, "UTF-32BE") && strcmp (from, "wchar_t")
6c7a06a3
TT
109 && strcmp (from, GDB_DEFAULT_HOST_CHARSET))
110 return -1;
111 if (strcmp (to, "wchar_t") && strcmp (to, GDB_DEFAULT_HOST_CHARSET))
112 return -1;
234b45d4 113
b8899f2b 114 /* Return 1 if we are converting from UTF-32BE, 0 otherwise. This is
6c7a06a3 115 used as a flag in calls to iconv. */
b8899f2b 116 return !strcmp (from, "UTF-32BE");
6c7a06a3 117}
234b45d4 118
6c7a06a3
TT
119int
120iconv_close (iconv_t arg)
121{
122 return 0;
123}
234b45d4 124
6c7a06a3 125size_t
b8899f2b 126iconv (iconv_t utf_flag, const char **inbuf, size_t *inbytesleft,
6c7a06a3
TT
127 char **outbuf, size_t *outbytesleft)
128{
b8899f2b 129 if (utf_flag)
6c7a06a3
TT
130 {
131 while (*inbytesleft >= 4)
132 {
133 size_t j;
134 unsigned long c = 0;
135
136 for (j = 0; j < 4; ++j)
137 {
138 c <<= 8;
139 c += (*inbuf)[j] & 0xff;
140 }
141
142 if (c >= 256)
143 {
144 errno = EILSEQ;
145 return -1;
146 }
147 **outbuf = c & 0xff;
148 ++*outbuf;
149 --*outbytesleft;
150
151 ++*inbuf;
152 *inbytesleft -= 4;
153 }
154 if (*inbytesleft < 4)
155 {
156 errno = EINVAL;
157 return -1;
158 }
159 }
160 else
161 {
162 /* In all other cases we simply copy input bytes to the
163 output. */
164 size_t amt = *inbytesleft;
165 if (amt > *outbytesleft)
166 amt = *outbytesleft;
167 memcpy (*outbuf, *inbuf, amt);
168 *inbuf += amt;
169 *outbuf += amt;
170 *inbytesleft -= amt;
171 *outbytesleft -= amt;
172 }
234b45d4 173
6c7a06a3
TT
174 if (*inbytesleft)
175 {
176 errno = E2BIG;
177 return -1;
178 }
234b45d4 179
6c7a06a3
TT
180 /* The number of non-reversible conversions -- but they were all
181 reversible. */
182 return 0;
183}
234b45d4 184
6c7a06a3 185#endif
234b45d4
KB
186
187
188\f
189/* The global lists of character sets and translations. */
190
191
e33d66ec
EZ
192#ifndef GDB_DEFAULT_TARGET_CHARSET
193#define GDB_DEFAULT_TARGET_CHARSET "ISO-8859-1"
194#endif
195
6c7a06a3 196#ifndef GDB_DEFAULT_TARGET_WIDE_CHARSET
b8899f2b 197#define GDB_DEFAULT_TARGET_WIDE_CHARSET "UTF-32"
6c7a06a3
TT
198#endif
199
200static const char *auto_host_charset_name = GDB_DEFAULT_HOST_CHARSET;
201static const char *host_charset_name = "auto";
920d2a44
AC
202static void
203show_host_charset_name (struct ui_file *file, int from_tty,
204 struct cmd_list_element *c,
205 const char *value)
206{
6c7a06a3
TT
207 if (!strcmp (value, "auto"))
208 fprintf_filtered (file,
209 _("The host character set is \"auto; currently %s\".\n"),
210 auto_host_charset_name);
211 else
212 fprintf_filtered (file, _("The host character set is \"%s\".\n"), value);
920d2a44
AC
213}
214
e33d66ec 215static const char *target_charset_name = GDB_DEFAULT_TARGET_CHARSET;
920d2a44
AC
216static void
217show_target_charset_name (struct ui_file *file, int from_tty,
218 struct cmd_list_element *c, const char *value)
219{
220 fprintf_filtered (file, _("The target character set is \"%s\".\n"),
221 value);
222}
223
6c7a06a3
TT
224static const char *target_wide_charset_name = GDB_DEFAULT_TARGET_WIDE_CHARSET;
225static void
226show_target_wide_charset_name (struct ui_file *file, int from_tty,
227 struct cmd_list_element *c, const char *value)
e33d66ec 228{
6c7a06a3
TT
229 fprintf_filtered (file, _("The target wide character set is \"%s\".\n"),
230 value);
231}
e33d66ec 232
6c7a06a3 233static const char *default_charset_names[] =
e33d66ec 234{
6c7a06a3 235 DEFAULT_CHARSET_NAMES
e33d66ec
EZ
236 0
237};
234b45d4 238
6c7a06a3 239static const char **charset_enum;
234b45d4 240
6c7a06a3
TT
241\f
242/* If the target wide character set has big- or little-endian
243 variants, these are the corresponding names. */
244static const char *target_wide_charset_be_name;
245static const char *target_wide_charset_le_name;
234b45d4 246
6c7a06a3
TT
247/* A helper function for validate which sets the target wide big- and
248 little-endian character set names, if possible. */
234b45d4 249
6c7a06a3
TT
250static void
251set_be_le_names (void)
234b45d4 252{
6c7a06a3 253 int i, len;
234b45d4 254
6c7a06a3
TT
255 target_wide_charset_le_name = NULL;
256 target_wide_charset_be_name = NULL;
234b45d4 257
6c7a06a3
TT
258 len = strlen (target_wide_charset_name);
259 for (i = 0; charset_enum[i]; ++i)
260 {
261 if (strncmp (target_wide_charset_name, charset_enum[i], len))
262 continue;
263 if ((charset_enum[i][len] == 'B'
264 || charset_enum[i][len] == 'L')
265 && charset_enum[i][len + 1] == 'E'
266 && charset_enum[i][len + 2] == '\0')
267 {
268 if (charset_enum[i][len] == 'B')
269 target_wide_charset_be_name = charset_enum[i];
270 else
271 target_wide_charset_le_name = charset_enum[i];
272 }
273 }
234b45d4
KB
274}
275
6c7a06a3
TT
276/* 'Set charset', 'set host-charset', 'set target-charset', 'set
277 target-wide-charset', 'set charset' sfunc's. */
234b45d4
KB
278
279static void
6c7a06a3 280validate (void)
234b45d4 281{
6c7a06a3
TT
282 iconv_t desc;
283 const char *host_cset = host_charset ();
234b45d4 284
6c7a06a3
TT
285 desc = iconv_open (target_wide_charset_name, host_cset);
286 if (desc == (iconv_t) -1)
287 error ("Cannot convert between character sets `%s' and `%s'",
288 target_wide_charset_name, host_cset);
289 iconv_close (desc);
234b45d4 290
6c7a06a3
TT
291 desc = iconv_open (target_charset_name, host_cset);
292 if (desc == (iconv_t) -1)
293 error ("Cannot convert between character sets `%s' and `%s'",
294 target_charset_name, host_cset);
295 iconv_close (desc);
234b45d4 296
6c7a06a3 297 set_be_le_names ();
234b45d4
KB
298}
299
6c7a06a3
TT
300/* This is the sfunc for the 'set charset' command. */
301static void
302set_charset_sfunc (char *charset, int from_tty, struct cmd_list_element *c)
234b45d4 303{
6c7a06a3
TT
304 /* CAREFUL: set the target charset here as well. */
305 target_charset_name = host_charset_name;
306 validate ();
234b45d4
KB
307}
308
6c7a06a3
TT
309/* 'set host-charset' command sfunc. We need a wrapper here because
310 the function needs to have a specific signature. */
311static void
312set_host_charset_sfunc (char *charset, int from_tty,
313 struct cmd_list_element *c)
234b45d4 314{
6c7a06a3 315 validate ();
234b45d4
KB
316}
317
6c7a06a3
TT
318/* Wrapper for the 'set target-charset' command. */
319static void
320set_target_charset_sfunc (char *charset, int from_tty,
321 struct cmd_list_element *c)
234b45d4 322{
6c7a06a3 323 validate ();
234b45d4
KB
324}
325
6c7a06a3
TT
326/* Wrapper for the 'set target-wide-charset' command. */
327static void
328set_target_wide_charset_sfunc (char *charset, int from_tty,
329 struct cmd_list_element *c)
234b45d4 330{
6c7a06a3 331 validate ();
234b45d4
KB
332}
333
6c7a06a3
TT
334/* sfunc for the 'show charset' command. */
335static void
336show_charset (struct ui_file *file, int from_tty, struct cmd_list_element *c,
337 const char *name)
234b45d4 338{
6c7a06a3
TT
339 show_host_charset_name (file, from_tty, c, host_charset_name);
340 show_target_charset_name (file, from_tty, c, target_charset_name);
341 show_target_wide_charset_name (file, from_tty, c, target_wide_charset_name);
234b45d4
KB
342}
343
234b45d4 344\f
6c7a06a3 345/* Accessor functions. */
234b45d4 346
6c7a06a3
TT
347const char *
348host_charset (void)
234b45d4 349{
6c7a06a3
TT
350 if (!strcmp (host_charset_name, "auto"))
351 return auto_host_charset_name;
352 return host_charset_name;
234b45d4
KB
353}
354
6c7a06a3
TT
355const char *
356target_charset (void)
234b45d4 357{
6c7a06a3 358 return target_charset_name;
234b45d4 359}
234b45d4 360
6c7a06a3 361const char *
e17a4113 362target_wide_charset (enum bfd_endian byte_order)
234b45d4 363{
e17a4113 364 if (byte_order == BFD_ENDIAN_BIG)
234b45d4 365 {
6c7a06a3
TT
366 if (target_wide_charset_be_name)
367 return target_wide_charset_be_name;
234b45d4 368 }
6c7a06a3 369 else
234b45d4 370 {
6c7a06a3
TT
371 if (target_wide_charset_le_name)
372 return target_wide_charset_le_name;
234b45d4
KB
373 }
374
6c7a06a3 375 return target_wide_charset_name;
234b45d4
KB
376}
377
234b45d4 378\f
6c7a06a3
TT
379/* Host character set management. For the time being, we assume that
380 the host character set is some superset of ASCII. */
234b45d4 381
6c7a06a3
TT
382char
383host_letter_to_control_character (char c)
234b45d4 384{
6c7a06a3
TT
385 if (c == '?')
386 return 0177;
387 return c & 0237;
234b45d4
KB
388}
389
6c7a06a3
TT
390/* Convert a host character, C, to its hex value. C must already have
391 been validated using isxdigit. */
234b45d4 392
6c7a06a3
TT
393int
394host_hex_value (char c)
234b45d4 395{
6c7a06a3
TT
396 if (isdigit (c))
397 return c - '0';
398 if (c >= 'a' && c <= 'f')
399 return 10 + c - 'a';
400 gdb_assert (c >= 'A' && c <= 'F');
401 return 10 + c - 'A';
234b45d4
KB
402}
403
234b45d4 404\f
6c7a06a3 405/* Public character management functions. */
234b45d4 406
6c7a06a3 407/* A cleanup function which is run to close an iconv descriptor. */
234b45d4 408
6c7a06a3
TT
409static void
410cleanup_iconv (void *p)
234b45d4 411{
6c7a06a3
TT
412 iconv_t *descp = p;
413 iconv_close (*descp);
234b45d4
KB
414}
415
6c7a06a3
TT
416void
417convert_between_encodings (const char *from, const char *to,
418 const gdb_byte *bytes, unsigned int num_bytes,
419 int width, struct obstack *output,
420 enum transliterations translit)
421{
422 iconv_t desc;
423 struct cleanup *cleanups;
424 size_t inleft;
425 char *inp;
426 unsigned int space_request;
427
428 /* Often, the host and target charsets will be the same. */
429 if (!strcmp (from, to))
430 {
431 obstack_grow (output, bytes, num_bytes);
432 return;
433 }
234b45d4 434
6c7a06a3
TT
435 desc = iconv_open (to, from);
436 if (desc == (iconv_t) -1)
437 perror_with_name ("Converting character sets");
438 cleanups = make_cleanup (cleanup_iconv, &desc);
234b45d4 439
6c7a06a3
TT
440 inleft = num_bytes;
441 inp = (char *) bytes;
234b45d4 442
6c7a06a3 443 space_request = num_bytes;
234b45d4 444
6c7a06a3 445 while (inleft > 0)
234b45d4 446 {
6c7a06a3
TT
447 char *outp;
448 size_t outleft, r;
449 int old_size;
450
451 old_size = obstack_object_size (output);
452 obstack_blank (output, space_request);
453
454 outp = obstack_base (output) + old_size;
455 outleft = space_request;
456
0dd7fb99 457 r = iconv (desc, (ICONV_CONST char **) &inp, &inleft, &outp, &outleft);
6c7a06a3
TT
458
459 /* Now make sure that the object on the obstack only includes
460 bytes we have converted. */
461 obstack_blank (output, - (int) outleft);
462
463 if (r == (size_t) -1)
464 {
465 switch (errno)
466 {
467 case EILSEQ:
468 {
469 int i;
470
471 /* Invalid input sequence. */
472 if (translit == translit_none)
473 error (_("Could not convert character to `%s' character set"),
474 to);
475
476 /* We emit escape sequence for the bytes, skip them,
477 and try again. */
478 for (i = 0; i < width; ++i)
479 {
480 char octal[5];
481
482 sprintf (octal, "\\%.3o", *inp & 0xff);
483 obstack_grow_str (output, octal);
484
485 ++inp;
486 --inleft;
487 }
488 }
489 break;
490
491 case E2BIG:
492 /* We ran out of space in the output buffer. Make it
493 bigger next time around. */
494 space_request *= 2;
495 break;
496
497 case EINVAL:
498 /* Incomplete input sequence. FIXME: ought to report this
499 to the caller somehow. */
500 inleft = 0;
501 break;
502
503 default:
504 perror_with_name ("Internal error while converting character sets");
505 }
506 }
234b45d4 507 }
234b45d4 508
6c7a06a3 509 do_cleanups (cleanups);
234b45d4
KB
510}
511
e33d66ec 512\f
e33d66ec 513
6c7a06a3
TT
514/* An iterator that returns host wchar_t's from a target string. */
515struct wchar_iterator
e33d66ec 516{
6c7a06a3
TT
517 /* The underlying iconv descriptor. */
518 iconv_t desc;
e33d66ec 519
6c7a06a3
TT
520 /* The input string. This is updated as convert characters. */
521 char *input;
522 /* The number of bytes remaining in the input. */
523 size_t bytes;
e33d66ec 524
6c7a06a3
TT
525 /* The width of an input character. */
526 size_t width;
e33d66ec 527
6c7a06a3
TT
528 /* The output buffer and its size. */
529 gdb_wchar_t *out;
530 size_t out_size;
531};
234b45d4 532
6c7a06a3
TT
533/* Create a new iterator. */
534struct wchar_iterator *
535make_wchar_iterator (const gdb_byte *input, size_t bytes, const char *charset,
536 size_t width)
234b45d4 537{
6c7a06a3
TT
538 struct wchar_iterator *result;
539 iconv_t desc;
234b45d4 540
732f6a93 541 desc = iconv_open (INTERMEDIATE_ENCODING, charset);
6c7a06a3
TT
542 if (desc == (iconv_t) -1)
543 perror_with_name ("Converting character sets");
234b45d4 544
6c7a06a3
TT
545 result = XNEW (struct wchar_iterator);
546 result->desc = desc;
547 result->input = (char *) input;
548 result->bytes = bytes;
549 result->width = width;
234b45d4 550
6c7a06a3
TT
551 result->out = XNEW (gdb_wchar_t);
552 result->out_size = 1;
234b45d4 553
6c7a06a3 554 return result;
e33d66ec 555}
234b45d4 556
e33d66ec 557static void
6c7a06a3 558do_cleanup_iterator (void *p)
e33d66ec 559{
6c7a06a3 560 struct wchar_iterator *iter = p;
234b45d4 561
6c7a06a3
TT
562 iconv_close (iter->desc);
563 xfree (iter->out);
564 xfree (iter);
234b45d4
KB
565}
566
6c7a06a3
TT
567struct cleanup *
568make_cleanup_wchar_iterator (struct wchar_iterator *iter)
e33d66ec 569{
6c7a06a3 570 return make_cleanup (do_cleanup_iterator, iter);
e33d66ec 571}
234b45d4 572
6c7a06a3
TT
573int
574wchar_iterate (struct wchar_iterator *iter,
575 enum wchar_iterate_result *out_result,
576 gdb_wchar_t **out_chars,
577 const gdb_byte **ptr,
578 size_t *len)
579{
580 size_t out_request;
581
582 /* Try to convert some characters. At first we try to convert just
583 a single character. The reason for this is that iconv does not
584 necessarily update its outgoing arguments when it encounters an
585 invalid input sequence -- but we want to reliably report this to
586 our caller so it can emit an escape sequence. */
587 out_request = 1;
588 while (iter->bytes > 0)
e33d66ec 589 {
6c7a06a3
TT
590 char *outptr = (char *) &iter->out[0];
591 char *orig_inptr = iter->input;
592 size_t orig_in = iter->bytes;
593 size_t out_avail = out_request * sizeof (gdb_wchar_t);
594 size_t num;
595 gdb_wchar_t result;
596
0dd7fb99
TT
597 size_t r = iconv (iter->desc,
598 (ICONV_CONST char **) &iter->input, &iter->bytes,
6c7a06a3
TT
599 &outptr, &out_avail);
600 if (r == (size_t) -1)
601 {
602 switch (errno)
603 {
604 case EILSEQ:
605 /* Invalid input sequence. Skip it, and let the caller
606 know about it. */
607 *out_result = wchar_iterate_invalid;
608 *ptr = iter->input;
609 *len = iter->width;
610 iter->input += iter->width;
611 iter->bytes -= iter->width;
612 return 0;
613
614 case E2BIG:
615 /* We ran out of space. We still might have converted a
616 character; if so, return it. Otherwise, grow the
617 buffer and try again. */
618 if (out_avail < out_request * sizeof (gdb_wchar_t))
619 break;
620
621 ++out_request;
622 if (out_request > iter->out_size)
623 {
624 iter->out_size = out_request;
625 iter->out = xrealloc (iter->out,
626 out_request * sizeof (gdb_wchar_t));
627 }
628 continue;
629
630 case EINVAL:
631 /* Incomplete input sequence. Let the caller know, and
632 arrange for future calls to see EOF. */
633 *out_result = wchar_iterate_incomplete;
634 *ptr = iter->input;
635 *len = iter->bytes;
636 iter->bytes = 0;
637 return 0;
638
639 default:
640 perror_with_name ("Internal error while converting character sets");
641 }
642 }
643
644 /* We converted something. */
645 num = out_request - out_avail / sizeof (gdb_wchar_t);
646 *out_result = wchar_iterate_ok;
647 *out_chars = iter->out;
648 *ptr = orig_inptr;
649 *len = orig_in - iter->bytes;
650 return num;
e33d66ec 651 }
6c7a06a3
TT
652
653 /* Really done. */
654 *out_result = wchar_iterate_eof;
655 return -1;
234b45d4
KB
656}
657
e33d66ec 658\f
6c7a06a3 659/* The charset.c module initialization function. */
234b45d4 660
6c7a06a3 661extern initialize_file_ftype _initialize_charset; /* -Wmissing-prototype */
234b45d4 662
6c7a06a3
TT
663typedef char *char_ptr;
664DEF_VEC_P (char_ptr);
234b45d4 665
6c7a06a3 666static VEC (char_ptr) *charsets;
234b45d4 667
6c7a06a3 668#ifdef PHONY_ICONV
234b45d4 669
6c7a06a3
TT
670static void
671find_charset_names (void)
234b45d4 672{
6c7a06a3
TT
673 VEC_safe_push (char_ptr, charsets, GDB_DEFAULT_HOST_CHARSET);
674 VEC_safe_push (char_ptr, charsets, NULL);
234b45d4
KB
675}
676
6c7a06a3 677#else /* PHONY_ICONV */
fc3b640d
TT
678
679/* Sometimes, libiconv redefines iconvlist as libiconvlist -- but
680 provides different symbols in the static and dynamic libraries.
681 So, configure may see libiconvlist but not iconvlist. But, calling
682 iconvlist is the right thing to do and will work. Hence we do a
683 check here but unconditionally call iconvlist below. */
684#if defined (HAVE_ICONVLIST) || defined (HAVE_LIBICONVLIST)
234b45d4 685
6c7a06a3
TT
686/* A helper function that adds some character sets to the vector of
687 all character sets. This is a callback function for iconvlist. */
688
689static int
690add_one (unsigned int count, const char *const *names, void *data)
234b45d4 691{
6c7a06a3 692 unsigned int i;
234b45d4 693
6c7a06a3
TT
694 for (i = 0; i < count; ++i)
695 VEC_safe_push (char_ptr, charsets, xstrdup (names[i]));
234b45d4 696
6c7a06a3 697 return 0;
234b45d4
KB
698}
699
6c7a06a3
TT
700static void
701find_charset_names (void)
234b45d4 702{
6c7a06a3
TT
703 iconvlist (add_one, NULL);
704 VEC_safe_push (char_ptr, charsets, NULL);
234b45d4
KB
705}
706
6c7a06a3 707#else
234b45d4 708
40b5c9fb
DE
709/* Return non-zero if LINE (output from iconv) should be ignored.
710 Older iconv programs (e.g. 2.2.2) include the human readable
711 introduction even when stdout is not a tty. Newer versions omit
712 the intro if stdout is not a tty. */
713
714static int
715ignore_line_p (const char *line)
716{
717 /* This table is used to filter the output. If this text appears
718 anywhere in the line, it is ignored (strstr is used). */
719 static const char * const ignore_lines[] =
720 {
721 "The following",
722 "not necessarily",
723 "the FROM and TO",
724 "listed with several",
725 NULL
726 };
727 int i;
728
729 for (i = 0; ignore_lines[i] != NULL; ++i)
730 {
731 if (strstr (line, ignore_lines[i]) != NULL)
732 return 1;
733 }
734
735 return 0;
736}
737
6c7a06a3
TT
738static void
739find_charset_names (void)
234b45d4 740{
732f6a93
TT
741 struct pex_obj *child;
742 char *args[3];
743 int err, status;
744 int fail = 1;
40b5c9fb
DE
745 struct gdb_environ *iconv_env;
746
747 /* Older iconvs, e.g. 2.2.2, don't omit the intro text if stdout is not
748 a tty. We need to recognize it and ignore it. This text is subject
749 to translation, so force LANGUAGE=C. */
750 iconv_env = make_environ ();
751 init_environ (iconv_env);
752 set_in_environ (iconv_env, "LANGUAGE", "C");
753 set_in_environ (iconv_env, "LC_ALL", "C");
732f6a93
TT
754
755 child = pex_init (0, "iconv", NULL);
756
757 args[0] = "iconv";
758 args[1] = "-l";
759 args[2] = NULL;
760 /* Note that we simply ignore errors here. */
40b5c9fb
DE
761 if (!pex_run_in_environment (child, PEX_SEARCH | PEX_STDERR_TO_STDOUT,
762 "iconv", args, environ_vector (iconv_env),
763 NULL, NULL, &err))
732f6a93
TT
764 {
765 FILE *in = pex_read_output (child, 0);
766
767 /* POSIX says that iconv -l uses an unspecified format. We
768 parse the glibc and libiconv formats; feel free to add others
769 as needed. */
40b5c9fb 770
732f6a93
TT
771 while (!feof (in))
772 {
773 /* The size of buf is chosen arbitrarily. */
774 char buf[1024];
775 char *start, *r;
776 int len, keep_going;
777
778 r = fgets (buf, sizeof (buf), in);
779 if (!r)
780 break;
781 len = strlen (r);
782 if (len <= 3)
783 continue;
40b5c9fb
DE
784 if (ignore_line_p (r))
785 continue;
786
732f6a93
TT
787 /* Strip off the newline. */
788 --len;
789 /* Strip off one or two '/'s. glibc will print lines like
790 "8859_7//", but also "10646-1:1993/UCS4/". */
791 if (buf[len - 1] == '/')
792 --len;
793 if (buf[len - 1] == '/')
794 --len;
795 buf[len] = '\0';
796
797 /* libiconv will print multiple entries per line, separated
40b5c9fb
DE
798 by spaces. Older iconvs will print multiple entries per line,
799 indented by two spaces, and separated by ", "
800 (i.e. the human readable form). */
732f6a93
TT
801 start = buf;
802 while (1)
803 {
804 int keep_going;
805 char *p;
806
40b5c9fb
DE
807 /* Skip leading blanks. */
808 for (p = start; *p && *p == ' '; ++p)
809 ;
810 start = p;
811 /* Find the next space, comma, or end-of-line. */
812 for ( ; *p && *p != ' ' && *p != ','; ++p)
732f6a93
TT
813 ;
814 /* Ignore an empty result. */
815 if (p == start)
816 break;
817 keep_going = *p;
818 *p = '\0';
819 VEC_safe_push (char_ptr, charsets, xstrdup (start));
820 if (!keep_going)
821 break;
822 /* Skip any extra spaces. */
823 for (start = p + 1; *start && *start == ' '; ++start)
824 ;
825 }
826 }
234b45d4 827
732f6a93
TT
828 if (pex_get_status (child, 1, &status)
829 && WIFEXITED (status) && !WEXITSTATUS (status))
830 fail = 0;
234b45d4 831
6c7a06a3 832 }
234b45d4 833
732f6a93 834 pex_free (child);
40b5c9fb 835 free_environ (iconv_env);
234b45d4 836
732f6a93
TT
837 if (fail)
838 {
839 /* Some error occurred, so drop the vector. */
840 int ix;
841 char *elt;
842 for (ix = 0; VEC_iterate (char_ptr, charsets, ix, elt); ++ix)
843 xfree (elt);
844 VEC_truncate (char_ptr, charsets, 0);
845 }
846 else
847 VEC_safe_push (char_ptr, charsets, NULL);
6c7a06a3 848}
234b45d4 849
fc3b640d 850#endif /* HAVE_ICONVLIST || HAVE_LIBICONVLIST */
6c7a06a3 851#endif /* PHONY_ICONV */
234b45d4
KB
852
853void
854_initialize_charset (void)
855{
e33d66ec
EZ
856 struct cmd_list_element *new_cmd;
857
6c7a06a3
TT
858 /* The first element is always "auto"; then we skip it for the
859 commands where it is not allowed. */
732f6a93 860 VEC_safe_push (char_ptr, charsets, xstrdup ("auto"));
6c7a06a3
TT
861 find_charset_names ();
862
863 if (VEC_length (char_ptr, charsets) > 1)
864 charset_enum = (const char **) VEC_address (char_ptr, charsets);
865 else
866 charset_enum = default_charset_names;
867
868#ifndef PHONY_ICONV
869#ifdef HAVE_LANGINFO_CODESET
870 auto_host_charset_name = nl_langinfo (CODESET);
58720494 871 /* Solaris will return `646' here -- but the Solaris iconv then
06be6983
TG
872 does not accept this. Darwin (and maybe FreeBSD) may return "" here,
873 which GNU libiconv doesn't like (infinite loop). */
874 if (!strcmp (auto_host_charset_name, "646") || !*auto_host_charset_name)
58720494 875 auto_host_charset_name = "ASCII";
6c7a06a3
TT
876 target_charset_name = auto_host_charset_name;
877
878 set_be_le_names ();
879#endif
880#endif
e33d66ec 881
7ab04401 882 add_setshow_enum_cmd ("charset", class_support,
6c7a06a3 883 &charset_enum[1], &host_charset_name, _("\
7ab04401
AC
884Set the host and target character sets."), _("\
885Show the host and target character sets."), _("\
3d263c1d
BI
886The `host character set' is the one used by the system GDB is running on.\n\
887The `target character set' is the one used by the program being debugged.\n\
888You may only use supersets of ASCII for your host character set; GDB does\n\
889not support any others.\n\
890To see a list of the character sets GDB supports, type `set charset <TAB>'."),
7ab04401
AC
891 /* Note that the sfunc below needs to set
892 target_charset_name, because the 'set
893 charset' command sets two variables. */
894 set_charset_sfunc,
895 show_charset,
896 &setlist, &showlist);
897
898 add_setshow_enum_cmd ("host-charset", class_support,
6c7a06a3 899 charset_enum, &host_charset_name, _("\
7ab04401
AC
900Set the host character set."), _("\
901Show the host character set."), _("\
3d263c1d
BI
902The `host character set' is the one used by the system GDB is running on.\n\
903You may only use supersets of ASCII for your host character set; GDB does\n\
904not support any others.\n\
905To see a list of the character sets GDB supports, type `set host-charset <TAB>'."),
7ab04401 906 set_host_charset_sfunc,
920d2a44 907 show_host_charset_name,
7ab04401
AC
908 &setlist, &showlist);
909
910 add_setshow_enum_cmd ("target-charset", class_support,
6c7a06a3 911 &charset_enum[1], &target_charset_name, _("\
7ab04401
AC
912Set the target character set."), _("\
913Show the target character set."), _("\
3d263c1d
BI
914The `target character set' is the one used by the program being debugged.\n\
915GDB translates characters and strings between the host and target\n\
916character sets as needed.\n\
917To see a list of the character sets GDB supports, type `set target-charset'<TAB>"),
7ab04401 918 set_target_charset_sfunc,
920d2a44 919 show_target_charset_name,
7ab04401 920 &setlist, &showlist);
6c7a06a3
TT
921
922 add_setshow_enum_cmd ("target-wide-charset", class_support,
923 &charset_enum[1], &target_wide_charset_name,
924 _("\
925Set the target wide character set."), _("\
926Show the target wide character set."), _("\
927The `target wide character set' is the one used by the program being debugged.\n\
928In particular it is the encoding used by `wchar_t'.\n\
929GDB translates characters and strings between the host and target\n\
930character sets as needed.\n\
931To see a list of the character sets GDB supports, type\n\
932`set target-wide-charset'<TAB>"),
933 set_target_wide_charset_sfunc,
934 show_target_wide_charset_name,
935 &setlist, &showlist);
234b45d4 936}
This page took 0.485065 seconds and 4 git commands to generate.