* breakpoint.c (break_command_really): Change return type to int.
[deliverable/binutils-gdb.git] / gdb / charset.c
CommitLineData
234b45d4 1/* Character set conversion support for GDB.
1bac305b 2
4c38e0a4
JB
3 Copyright (C) 2001, 2003, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
234b45d4
KB
5
6 This file is part of GDB.
7
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
a9762ec7 10 the Free Software Foundation; either version 3 of the License, or
234b45d4
KB
11 (at your option) any later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
a9762ec7 19 along with this program. If not, see <http://www.gnu.org/licenses/>. */
234b45d4
KB
20
21#include "defs.h"
22#include "charset.h"
23#include "gdbcmd.h"
24#include "gdb_assert.h"
6c7a06a3 25#include "gdb_obstack.h"
732f6a93 26#include "gdb_wait.h"
6c7a06a3
TT
27#include "charset-list.h"
28#include "vec.h"
40b5c9fb 29#include "environ.h"
234b45d4
KB
30
31#include <stddef.h>
4ef3f3be 32#include "gdb_string.h"
234b45d4
KB
33#include <ctype.h>
34
234b45d4
KB
35\f
36/* How GDB's character set support works
37
6c7a06a3 38 GDB has three global settings:
234b45d4
KB
39
40 - The `current host character set' is the character set GDB should
41 use in talking to the user, and which (hopefully) the user's
6c7a06a3
TT
42 terminal knows how to display properly. Most users should not
43 change this.
234b45d4
KB
44
45 - The `current target character set' is the character set the
46 program being debugged uses.
47
6c7a06a3
TT
48 - The `current target wide character set' is the wide character set
49 the program being debugged uses, that is, the encoding used for
50 wchar_t.
51
234b45d4
KB
52 There are commands to set each of these, and mechanisms for
53 choosing reasonable default values. GDB has a global list of
54 character sets that it can use as its host or target character
55 sets.
56
57 The header file `charset.h' declares various functions that
58 different pieces of GDB need to perform tasks like:
59
60 - printing target strings and characters to the user's terminal
61 (mostly target->host conversions),
62
63 - building target-appropriate representations of strings and
64 characters the user enters in expressions (mostly host->target
65 conversions),
66
6c7a06a3
TT
67 and so on.
68
69 To avoid excessive code duplication and maintenance efforts,
70 GDB simply requires a capable iconv function. Users on platforms
71 without a suitable iconv can use the GNU iconv library. */
234b45d4
KB
72
73\f
6c7a06a3 74#ifdef PHONY_ICONV
234b45d4 75
6c7a06a3
TT
76/* Provide a phony iconv that does as little as possible. Also,
77 arrange for there to be a single available character set. */
234b45d4 78
6c7a06a3
TT
79#undef GDB_DEFAULT_HOST_CHARSET
80#define GDB_DEFAULT_HOST_CHARSET "ISO-8859-1"
81#define GDB_DEFAULT_TARGET_CHARSET "ISO-8859-1"
82#define GDB_DEFAULT_TARGET_WIDE_CHARSET "ISO-8859-1"
83#undef DEFAULT_CHARSET_NAMES
84#define DEFAULT_CHARSET_NAMES GDB_DEFAULT_HOST_CHARSET ,
85
86#undef iconv_t
87#define iconv_t int
88#undef iconv_open
89#undef iconv
90#undef iconv_close
91
0dd7fb99
TT
92#undef ICONV_CONST
93#define ICONV_CONST const
94
76208fec 95/* Some systems don't have EILSEQ, so we define it here, but not as
e726d784
EZ
96 EINVAL, because callers of `iconv' want to distinguish EINVAL and
97 EILSEQ. This is what iconv.h from libiconv does as well. Note
98 that wchar.h may also define EILSEQ, so this needs to be after we
99 include wchar.h, which happens in defs.h through gdb_wchar.h. */
100#ifndef EILSEQ
101#define EILSEQ ENOENT
102#endif
103
6c7a06a3
TT
104iconv_t
105iconv_open (const char *to, const char *from)
106{
b8899f2b 107 /* We allow conversions from UTF-32BE, wchar_t, and the host charset.
6c7a06a3 108 We allow conversions to wchar_t and the host charset. */
b8899f2b 109 if (strcmp (from, "UTF-32BE") && strcmp (from, "wchar_t")
6c7a06a3
TT
110 && strcmp (from, GDB_DEFAULT_HOST_CHARSET))
111 return -1;
112 if (strcmp (to, "wchar_t") && strcmp (to, GDB_DEFAULT_HOST_CHARSET))
113 return -1;
234b45d4 114
b8899f2b 115 /* Return 1 if we are converting from UTF-32BE, 0 otherwise. This is
6c7a06a3 116 used as a flag in calls to iconv. */
b8899f2b 117 return !strcmp (from, "UTF-32BE");
6c7a06a3 118}
234b45d4 119
6c7a06a3
TT
120int
121iconv_close (iconv_t arg)
122{
123 return 0;
124}
234b45d4 125
6c7a06a3 126size_t
b8899f2b 127iconv (iconv_t utf_flag, const char **inbuf, size_t *inbytesleft,
6c7a06a3
TT
128 char **outbuf, size_t *outbytesleft)
129{
b8899f2b 130 if (utf_flag)
6c7a06a3
TT
131 {
132 while (*inbytesleft >= 4)
133 {
134 size_t j;
135 unsigned long c = 0;
136
137 for (j = 0; j < 4; ++j)
138 {
139 c <<= 8;
140 c += (*inbuf)[j] & 0xff;
141 }
142
143 if (c >= 256)
144 {
145 errno = EILSEQ;
146 return -1;
147 }
148 **outbuf = c & 0xff;
149 ++*outbuf;
150 --*outbytesleft;
151
152 ++*inbuf;
153 *inbytesleft -= 4;
154 }
155 if (*inbytesleft < 4)
156 {
157 errno = EINVAL;
158 return -1;
159 }
160 }
161 else
162 {
163 /* In all other cases we simply copy input bytes to the
164 output. */
165 size_t amt = *inbytesleft;
166 if (amt > *outbytesleft)
167 amt = *outbytesleft;
168 memcpy (*outbuf, *inbuf, amt);
169 *inbuf += amt;
170 *outbuf += amt;
171 *inbytesleft -= amt;
172 *outbytesleft -= amt;
173 }
234b45d4 174
6c7a06a3
TT
175 if (*inbytesleft)
176 {
177 errno = E2BIG;
178 return -1;
179 }
234b45d4 180
6c7a06a3
TT
181 /* The number of non-reversible conversions -- but they were all
182 reversible. */
183 return 0;
184}
234b45d4 185
6c7a06a3 186#endif
234b45d4
KB
187
188
189\f
190/* The global lists of character sets and translations. */
191
192
e33d66ec
EZ
193#ifndef GDB_DEFAULT_TARGET_CHARSET
194#define GDB_DEFAULT_TARGET_CHARSET "ISO-8859-1"
195#endif
196
6c7a06a3 197#ifndef GDB_DEFAULT_TARGET_WIDE_CHARSET
b8899f2b 198#define GDB_DEFAULT_TARGET_WIDE_CHARSET "UTF-32"
6c7a06a3
TT
199#endif
200
201static const char *auto_host_charset_name = GDB_DEFAULT_HOST_CHARSET;
202static const char *host_charset_name = "auto";
920d2a44
AC
203static void
204show_host_charset_name (struct ui_file *file, int from_tty,
205 struct cmd_list_element *c,
206 const char *value)
207{
6c7a06a3
TT
208 if (!strcmp (value, "auto"))
209 fprintf_filtered (file,
210 _("The host character set is \"auto; currently %s\".\n"),
211 auto_host_charset_name);
212 else
213 fprintf_filtered (file, _("The host character set is \"%s\".\n"), value);
920d2a44
AC
214}
215
e33d66ec 216static const char *target_charset_name = GDB_DEFAULT_TARGET_CHARSET;
920d2a44
AC
217static void
218show_target_charset_name (struct ui_file *file, int from_tty,
219 struct cmd_list_element *c, const char *value)
220{
221 fprintf_filtered (file, _("The target character set is \"%s\".\n"),
222 value);
223}
224
6c7a06a3
TT
225static const char *target_wide_charset_name = GDB_DEFAULT_TARGET_WIDE_CHARSET;
226static void
227show_target_wide_charset_name (struct ui_file *file, int from_tty,
228 struct cmd_list_element *c, const char *value)
e33d66ec 229{
6c7a06a3
TT
230 fprintf_filtered (file, _("The target wide character set is \"%s\".\n"),
231 value);
232}
e33d66ec 233
6c7a06a3 234static const char *default_charset_names[] =
e33d66ec 235{
6c7a06a3 236 DEFAULT_CHARSET_NAMES
e33d66ec
EZ
237 0
238};
234b45d4 239
6c7a06a3 240static const char **charset_enum;
234b45d4 241
6c7a06a3
TT
242\f
243/* If the target wide character set has big- or little-endian
244 variants, these are the corresponding names. */
245static const char *target_wide_charset_be_name;
246static const char *target_wide_charset_le_name;
234b45d4 247
6c7a06a3
TT
248/* A helper function for validate which sets the target wide big- and
249 little-endian character set names, if possible. */
234b45d4 250
6c7a06a3
TT
251static void
252set_be_le_names (void)
234b45d4 253{
6c7a06a3 254 int i, len;
234b45d4 255
6c7a06a3
TT
256 target_wide_charset_le_name = NULL;
257 target_wide_charset_be_name = NULL;
234b45d4 258
6c7a06a3
TT
259 len = strlen (target_wide_charset_name);
260 for (i = 0; charset_enum[i]; ++i)
261 {
262 if (strncmp (target_wide_charset_name, charset_enum[i], len))
263 continue;
264 if ((charset_enum[i][len] == 'B'
265 || charset_enum[i][len] == 'L')
266 && charset_enum[i][len + 1] == 'E'
267 && charset_enum[i][len + 2] == '\0')
268 {
269 if (charset_enum[i][len] == 'B')
270 target_wide_charset_be_name = charset_enum[i];
271 else
272 target_wide_charset_le_name = charset_enum[i];
273 }
274 }
234b45d4
KB
275}
276
6c7a06a3
TT
277/* 'Set charset', 'set host-charset', 'set target-charset', 'set
278 target-wide-charset', 'set charset' sfunc's. */
234b45d4
KB
279
280static void
6c7a06a3 281validate (void)
234b45d4 282{
6c7a06a3
TT
283 iconv_t desc;
284 const char *host_cset = host_charset ();
234b45d4 285
6c7a06a3
TT
286 desc = iconv_open (target_wide_charset_name, host_cset);
287 if (desc == (iconv_t) -1)
288 error ("Cannot convert between character sets `%s' and `%s'",
289 target_wide_charset_name, host_cset);
290 iconv_close (desc);
234b45d4 291
6c7a06a3
TT
292 desc = iconv_open (target_charset_name, host_cset);
293 if (desc == (iconv_t) -1)
294 error ("Cannot convert between character sets `%s' and `%s'",
295 target_charset_name, host_cset);
296 iconv_close (desc);
234b45d4 297
6c7a06a3 298 set_be_le_names ();
234b45d4
KB
299}
300
6c7a06a3
TT
301/* This is the sfunc for the 'set charset' command. */
302static void
303set_charset_sfunc (char *charset, int from_tty, struct cmd_list_element *c)
234b45d4 304{
6c7a06a3
TT
305 /* CAREFUL: set the target charset here as well. */
306 target_charset_name = host_charset_name;
307 validate ();
234b45d4
KB
308}
309
6c7a06a3
TT
310/* 'set host-charset' command sfunc. We need a wrapper here because
311 the function needs to have a specific signature. */
312static void
313set_host_charset_sfunc (char *charset, int from_tty,
314 struct cmd_list_element *c)
234b45d4 315{
6c7a06a3 316 validate ();
234b45d4
KB
317}
318
6c7a06a3
TT
319/* Wrapper for the 'set target-charset' command. */
320static void
321set_target_charset_sfunc (char *charset, int from_tty,
322 struct cmd_list_element *c)
234b45d4 323{
6c7a06a3 324 validate ();
234b45d4
KB
325}
326
6c7a06a3
TT
327/* Wrapper for the 'set target-wide-charset' command. */
328static void
329set_target_wide_charset_sfunc (char *charset, int from_tty,
330 struct cmd_list_element *c)
234b45d4 331{
6c7a06a3 332 validate ();
234b45d4
KB
333}
334
6c7a06a3
TT
335/* sfunc for the 'show charset' command. */
336static void
337show_charset (struct ui_file *file, int from_tty, struct cmd_list_element *c,
338 const char *name)
234b45d4 339{
6c7a06a3
TT
340 show_host_charset_name (file, from_tty, c, host_charset_name);
341 show_target_charset_name (file, from_tty, c, target_charset_name);
342 show_target_wide_charset_name (file, from_tty, c, target_wide_charset_name);
234b45d4
KB
343}
344
234b45d4 345\f
6c7a06a3 346/* Accessor functions. */
234b45d4 347
6c7a06a3
TT
348const char *
349host_charset (void)
234b45d4 350{
6c7a06a3
TT
351 if (!strcmp (host_charset_name, "auto"))
352 return auto_host_charset_name;
353 return host_charset_name;
234b45d4
KB
354}
355
6c7a06a3
TT
356const char *
357target_charset (void)
234b45d4 358{
6c7a06a3 359 return target_charset_name;
234b45d4 360}
234b45d4 361
6c7a06a3 362const char *
e17a4113 363target_wide_charset (enum bfd_endian byte_order)
234b45d4 364{
e17a4113 365 if (byte_order == BFD_ENDIAN_BIG)
234b45d4 366 {
6c7a06a3
TT
367 if (target_wide_charset_be_name)
368 return target_wide_charset_be_name;
234b45d4 369 }
6c7a06a3 370 else
234b45d4 371 {
6c7a06a3
TT
372 if (target_wide_charset_le_name)
373 return target_wide_charset_le_name;
234b45d4
KB
374 }
375
6c7a06a3 376 return target_wide_charset_name;
234b45d4
KB
377}
378
234b45d4 379\f
6c7a06a3
TT
380/* Host character set management. For the time being, we assume that
381 the host character set is some superset of ASCII. */
234b45d4 382
6c7a06a3
TT
383char
384host_letter_to_control_character (char c)
234b45d4 385{
6c7a06a3
TT
386 if (c == '?')
387 return 0177;
388 return c & 0237;
234b45d4
KB
389}
390
6c7a06a3
TT
391/* Convert a host character, C, to its hex value. C must already have
392 been validated using isxdigit. */
234b45d4 393
6c7a06a3
TT
394int
395host_hex_value (char c)
234b45d4 396{
6c7a06a3
TT
397 if (isdigit (c))
398 return c - '0';
399 if (c >= 'a' && c <= 'f')
400 return 10 + c - 'a';
401 gdb_assert (c >= 'A' && c <= 'F');
402 return 10 + c - 'A';
234b45d4
KB
403}
404
234b45d4 405\f
6c7a06a3 406/* Public character management functions. */
234b45d4 407
6c7a06a3 408/* A cleanup function which is run to close an iconv descriptor. */
234b45d4 409
6c7a06a3
TT
410static void
411cleanup_iconv (void *p)
234b45d4 412{
6c7a06a3
TT
413 iconv_t *descp = p;
414 iconv_close (*descp);
234b45d4
KB
415}
416
6c7a06a3
TT
417void
418convert_between_encodings (const char *from, const char *to,
419 const gdb_byte *bytes, unsigned int num_bytes,
420 int width, struct obstack *output,
421 enum transliterations translit)
422{
423 iconv_t desc;
424 struct cleanup *cleanups;
425 size_t inleft;
426 char *inp;
427 unsigned int space_request;
428
429 /* Often, the host and target charsets will be the same. */
430 if (!strcmp (from, to))
431 {
432 obstack_grow (output, bytes, num_bytes);
433 return;
434 }
234b45d4 435
6c7a06a3
TT
436 desc = iconv_open (to, from);
437 if (desc == (iconv_t) -1)
438 perror_with_name ("Converting character sets");
439 cleanups = make_cleanup (cleanup_iconv, &desc);
234b45d4 440
6c7a06a3
TT
441 inleft = num_bytes;
442 inp = (char *) bytes;
234b45d4 443
6c7a06a3 444 space_request = num_bytes;
234b45d4 445
6c7a06a3 446 while (inleft > 0)
234b45d4 447 {
6c7a06a3
TT
448 char *outp;
449 size_t outleft, r;
450 int old_size;
451
452 old_size = obstack_object_size (output);
453 obstack_blank (output, space_request);
454
455 outp = obstack_base (output) + old_size;
456 outleft = space_request;
457
0dd7fb99 458 r = iconv (desc, (ICONV_CONST char **) &inp, &inleft, &outp, &outleft);
6c7a06a3
TT
459
460 /* Now make sure that the object on the obstack only includes
461 bytes we have converted. */
462 obstack_blank (output, - (int) outleft);
463
464 if (r == (size_t) -1)
465 {
466 switch (errno)
467 {
468 case EILSEQ:
469 {
470 int i;
471
472 /* Invalid input sequence. */
473 if (translit == translit_none)
474 error (_("Could not convert character to `%s' character set"),
475 to);
476
477 /* We emit escape sequence for the bytes, skip them,
478 and try again. */
479 for (i = 0; i < width; ++i)
480 {
481 char octal[5];
482
483 sprintf (octal, "\\%.3o", *inp & 0xff);
484 obstack_grow_str (output, octal);
485
486 ++inp;
487 --inleft;
488 }
489 }
490 break;
491
492 case E2BIG:
493 /* We ran out of space in the output buffer. Make it
494 bigger next time around. */
495 space_request *= 2;
496 break;
497
498 case EINVAL:
499 /* Incomplete input sequence. FIXME: ought to report this
500 to the caller somehow. */
501 inleft = 0;
502 break;
503
504 default:
505 perror_with_name ("Internal error while converting character sets");
506 }
507 }
234b45d4 508 }
234b45d4 509
6c7a06a3 510 do_cleanups (cleanups);
234b45d4
KB
511}
512
e33d66ec 513\f
e33d66ec 514
6c7a06a3
TT
515/* An iterator that returns host wchar_t's from a target string. */
516struct wchar_iterator
e33d66ec 517{
6c7a06a3
TT
518 /* The underlying iconv descriptor. */
519 iconv_t desc;
e33d66ec 520
6c7a06a3
TT
521 /* The input string. This is updated as convert characters. */
522 char *input;
523 /* The number of bytes remaining in the input. */
524 size_t bytes;
e33d66ec 525
6c7a06a3
TT
526 /* The width of an input character. */
527 size_t width;
e33d66ec 528
6c7a06a3
TT
529 /* The output buffer and its size. */
530 gdb_wchar_t *out;
531 size_t out_size;
532};
234b45d4 533
6c7a06a3
TT
534/* Create a new iterator. */
535struct wchar_iterator *
536make_wchar_iterator (const gdb_byte *input, size_t bytes, const char *charset,
537 size_t width)
234b45d4 538{
6c7a06a3
TT
539 struct wchar_iterator *result;
540 iconv_t desc;
234b45d4 541
732f6a93 542 desc = iconv_open (INTERMEDIATE_ENCODING, charset);
6c7a06a3
TT
543 if (desc == (iconv_t) -1)
544 perror_with_name ("Converting character sets");
234b45d4 545
6c7a06a3
TT
546 result = XNEW (struct wchar_iterator);
547 result->desc = desc;
548 result->input = (char *) input;
549 result->bytes = bytes;
550 result->width = width;
234b45d4 551
6c7a06a3
TT
552 result->out = XNEW (gdb_wchar_t);
553 result->out_size = 1;
234b45d4 554
6c7a06a3 555 return result;
e33d66ec 556}
234b45d4 557
e33d66ec 558static void
6c7a06a3 559do_cleanup_iterator (void *p)
e33d66ec 560{
6c7a06a3 561 struct wchar_iterator *iter = p;
234b45d4 562
6c7a06a3
TT
563 iconv_close (iter->desc);
564 xfree (iter->out);
565 xfree (iter);
234b45d4
KB
566}
567
6c7a06a3
TT
568struct cleanup *
569make_cleanup_wchar_iterator (struct wchar_iterator *iter)
e33d66ec 570{
6c7a06a3 571 return make_cleanup (do_cleanup_iterator, iter);
e33d66ec 572}
234b45d4 573
6c7a06a3
TT
574int
575wchar_iterate (struct wchar_iterator *iter,
576 enum wchar_iterate_result *out_result,
577 gdb_wchar_t **out_chars,
578 const gdb_byte **ptr,
579 size_t *len)
580{
581 size_t out_request;
582
583 /* Try to convert some characters. At first we try to convert just
584 a single character. The reason for this is that iconv does not
585 necessarily update its outgoing arguments when it encounters an
586 invalid input sequence -- but we want to reliably report this to
587 our caller so it can emit an escape sequence. */
588 out_request = 1;
589 while (iter->bytes > 0)
e33d66ec 590 {
6c7a06a3
TT
591 char *outptr = (char *) &iter->out[0];
592 char *orig_inptr = iter->input;
593 size_t orig_in = iter->bytes;
594 size_t out_avail = out_request * sizeof (gdb_wchar_t);
595 size_t num;
596 gdb_wchar_t result;
597
0dd7fb99
TT
598 size_t r = iconv (iter->desc,
599 (ICONV_CONST char **) &iter->input, &iter->bytes,
6c7a06a3
TT
600 &outptr, &out_avail);
601 if (r == (size_t) -1)
602 {
603 switch (errno)
604 {
605 case EILSEQ:
606 /* Invalid input sequence. Skip it, and let the caller
607 know about it. */
608 *out_result = wchar_iterate_invalid;
609 *ptr = iter->input;
610 *len = iter->width;
611 iter->input += iter->width;
612 iter->bytes -= iter->width;
613 return 0;
614
615 case E2BIG:
616 /* We ran out of space. We still might have converted a
617 character; if so, return it. Otherwise, grow the
618 buffer and try again. */
619 if (out_avail < out_request * sizeof (gdb_wchar_t))
620 break;
621
622 ++out_request;
623 if (out_request > iter->out_size)
624 {
625 iter->out_size = out_request;
626 iter->out = xrealloc (iter->out,
627 out_request * sizeof (gdb_wchar_t));
628 }
629 continue;
630
631 case EINVAL:
632 /* Incomplete input sequence. Let the caller know, and
633 arrange for future calls to see EOF. */
634 *out_result = wchar_iterate_incomplete;
635 *ptr = iter->input;
636 *len = iter->bytes;
637 iter->bytes = 0;
638 return 0;
639
640 default:
641 perror_with_name ("Internal error while converting character sets");
642 }
643 }
644
645 /* We converted something. */
646 num = out_request - out_avail / sizeof (gdb_wchar_t);
647 *out_result = wchar_iterate_ok;
648 *out_chars = iter->out;
649 *ptr = orig_inptr;
650 *len = orig_in - iter->bytes;
651 return num;
e33d66ec 652 }
6c7a06a3
TT
653
654 /* Really done. */
655 *out_result = wchar_iterate_eof;
656 return -1;
234b45d4
KB
657}
658
e33d66ec 659\f
6c7a06a3 660/* The charset.c module initialization function. */
234b45d4 661
6c7a06a3 662extern initialize_file_ftype _initialize_charset; /* -Wmissing-prototype */
234b45d4 663
6c7a06a3
TT
664typedef char *char_ptr;
665DEF_VEC_P (char_ptr);
234b45d4 666
6c7a06a3 667static VEC (char_ptr) *charsets;
234b45d4 668
6c7a06a3 669#ifdef PHONY_ICONV
234b45d4 670
6c7a06a3
TT
671static void
672find_charset_names (void)
234b45d4 673{
6c7a06a3
TT
674 VEC_safe_push (char_ptr, charsets, GDB_DEFAULT_HOST_CHARSET);
675 VEC_safe_push (char_ptr, charsets, NULL);
234b45d4
KB
676}
677
6c7a06a3 678#else /* PHONY_ICONV */
fc3b640d
TT
679
680/* Sometimes, libiconv redefines iconvlist as libiconvlist -- but
681 provides different symbols in the static and dynamic libraries.
682 So, configure may see libiconvlist but not iconvlist. But, calling
683 iconvlist is the right thing to do and will work. Hence we do a
684 check here but unconditionally call iconvlist below. */
685#if defined (HAVE_ICONVLIST) || defined (HAVE_LIBICONVLIST)
234b45d4 686
6c7a06a3
TT
687/* A helper function that adds some character sets to the vector of
688 all character sets. This is a callback function for iconvlist. */
689
690static int
691add_one (unsigned int count, const char *const *names, void *data)
234b45d4 692{
6c7a06a3 693 unsigned int i;
234b45d4 694
6c7a06a3
TT
695 for (i = 0; i < count; ++i)
696 VEC_safe_push (char_ptr, charsets, xstrdup (names[i]));
234b45d4 697
6c7a06a3 698 return 0;
234b45d4
KB
699}
700
6c7a06a3
TT
701static void
702find_charset_names (void)
234b45d4 703{
6c7a06a3
TT
704 iconvlist (add_one, NULL);
705 VEC_safe_push (char_ptr, charsets, NULL);
234b45d4
KB
706}
707
6c7a06a3 708#else
234b45d4 709
40b5c9fb
DE
710/* Return non-zero if LINE (output from iconv) should be ignored.
711 Older iconv programs (e.g. 2.2.2) include the human readable
712 introduction even when stdout is not a tty. Newer versions omit
713 the intro if stdout is not a tty. */
714
715static int
716ignore_line_p (const char *line)
717{
718 /* This table is used to filter the output. If this text appears
719 anywhere in the line, it is ignored (strstr is used). */
720 static const char * const ignore_lines[] =
721 {
722 "The following",
723 "not necessarily",
724 "the FROM and TO",
725 "listed with several",
726 NULL
727 };
728 int i;
729
730 for (i = 0; ignore_lines[i] != NULL; ++i)
731 {
732 if (strstr (line, ignore_lines[i]) != NULL)
733 return 1;
734 }
735
736 return 0;
737}
738
6c7a06a3
TT
739static void
740find_charset_names (void)
234b45d4 741{
732f6a93
TT
742 struct pex_obj *child;
743 char *args[3];
744 int err, status;
745 int fail = 1;
40b5c9fb
DE
746 struct gdb_environ *iconv_env;
747
748 /* Older iconvs, e.g. 2.2.2, don't omit the intro text if stdout is not
749 a tty. We need to recognize it and ignore it. This text is subject
750 to translation, so force LANGUAGE=C. */
751 iconv_env = make_environ ();
752 init_environ (iconv_env);
753 set_in_environ (iconv_env, "LANGUAGE", "C");
754 set_in_environ (iconv_env, "LC_ALL", "C");
732f6a93
TT
755
756 child = pex_init (0, "iconv", NULL);
757
758 args[0] = "iconv";
759 args[1] = "-l";
760 args[2] = NULL;
761 /* Note that we simply ignore errors here. */
40b5c9fb
DE
762 if (!pex_run_in_environment (child, PEX_SEARCH | PEX_STDERR_TO_STDOUT,
763 "iconv", args, environ_vector (iconv_env),
764 NULL, NULL, &err))
732f6a93
TT
765 {
766 FILE *in = pex_read_output (child, 0);
767
768 /* POSIX says that iconv -l uses an unspecified format. We
769 parse the glibc and libiconv formats; feel free to add others
770 as needed. */
40b5c9fb 771
732f6a93
TT
772 while (!feof (in))
773 {
774 /* The size of buf is chosen arbitrarily. */
775 char buf[1024];
776 char *start, *r;
777 int len, keep_going;
778
779 r = fgets (buf, sizeof (buf), in);
780 if (!r)
781 break;
782 len = strlen (r);
783 if (len <= 3)
784 continue;
40b5c9fb
DE
785 if (ignore_line_p (r))
786 continue;
787
732f6a93
TT
788 /* Strip off the newline. */
789 --len;
790 /* Strip off one or two '/'s. glibc will print lines like
791 "8859_7//", but also "10646-1:1993/UCS4/". */
792 if (buf[len - 1] == '/')
793 --len;
794 if (buf[len - 1] == '/')
795 --len;
796 buf[len] = '\0';
797
798 /* libiconv will print multiple entries per line, separated
40b5c9fb
DE
799 by spaces. Older iconvs will print multiple entries per line,
800 indented by two spaces, and separated by ", "
801 (i.e. the human readable form). */
732f6a93
TT
802 start = buf;
803 while (1)
804 {
805 int keep_going;
806 char *p;
807
40b5c9fb
DE
808 /* Skip leading blanks. */
809 for (p = start; *p && *p == ' '; ++p)
810 ;
811 start = p;
812 /* Find the next space, comma, or end-of-line. */
813 for ( ; *p && *p != ' ' && *p != ','; ++p)
732f6a93
TT
814 ;
815 /* Ignore an empty result. */
816 if (p == start)
817 break;
818 keep_going = *p;
819 *p = '\0';
820 VEC_safe_push (char_ptr, charsets, xstrdup (start));
821 if (!keep_going)
822 break;
823 /* Skip any extra spaces. */
824 for (start = p + 1; *start && *start == ' '; ++start)
825 ;
826 }
827 }
234b45d4 828
732f6a93
TT
829 if (pex_get_status (child, 1, &status)
830 && WIFEXITED (status) && !WEXITSTATUS (status))
831 fail = 0;
234b45d4 832
6c7a06a3 833 }
234b45d4 834
732f6a93 835 pex_free (child);
40b5c9fb 836 free_environ (iconv_env);
234b45d4 837
732f6a93
TT
838 if (fail)
839 {
840 /* Some error occurred, so drop the vector. */
841 int ix;
842 char *elt;
843 for (ix = 0; VEC_iterate (char_ptr, charsets, ix, elt); ++ix)
844 xfree (elt);
845 VEC_truncate (char_ptr, charsets, 0);
846 }
847 else
848 VEC_safe_push (char_ptr, charsets, NULL);
6c7a06a3 849}
234b45d4 850
fc3b640d 851#endif /* HAVE_ICONVLIST || HAVE_LIBICONVLIST */
6c7a06a3 852#endif /* PHONY_ICONV */
234b45d4
KB
853
854void
855_initialize_charset (void)
856{
e33d66ec
EZ
857 struct cmd_list_element *new_cmd;
858
6c7a06a3
TT
859 /* The first element is always "auto"; then we skip it for the
860 commands where it is not allowed. */
732f6a93 861 VEC_safe_push (char_ptr, charsets, xstrdup ("auto"));
6c7a06a3
TT
862 find_charset_names ();
863
864 if (VEC_length (char_ptr, charsets) > 1)
865 charset_enum = (const char **) VEC_address (char_ptr, charsets);
866 else
867 charset_enum = default_charset_names;
868
869#ifndef PHONY_ICONV
870#ifdef HAVE_LANGINFO_CODESET
871 auto_host_charset_name = nl_langinfo (CODESET);
58720494 872 /* Solaris will return `646' here -- but the Solaris iconv then
06be6983
TG
873 does not accept this. Darwin (and maybe FreeBSD) may return "" here,
874 which GNU libiconv doesn't like (infinite loop). */
875 if (!strcmp (auto_host_charset_name, "646") || !*auto_host_charset_name)
58720494 876 auto_host_charset_name = "ASCII";
6c7a06a3
TT
877 target_charset_name = auto_host_charset_name;
878
879 set_be_le_names ();
880#endif
881#endif
e33d66ec 882
7ab04401 883 add_setshow_enum_cmd ("charset", class_support,
6c7a06a3 884 &charset_enum[1], &host_charset_name, _("\
7ab04401
AC
885Set the host and target character sets."), _("\
886Show the host and target character sets."), _("\
3d263c1d
BI
887The `host character set' is the one used by the system GDB is running on.\n\
888The `target character set' is the one used by the program being debugged.\n\
889You may only use supersets of ASCII for your host character set; GDB does\n\
890not support any others.\n\
891To see a list of the character sets GDB supports, type `set charset <TAB>'."),
7ab04401
AC
892 /* Note that the sfunc below needs to set
893 target_charset_name, because the 'set
894 charset' command sets two variables. */
895 set_charset_sfunc,
896 show_charset,
897 &setlist, &showlist);
898
899 add_setshow_enum_cmd ("host-charset", class_support,
6c7a06a3 900 charset_enum, &host_charset_name, _("\
7ab04401
AC
901Set the host character set."), _("\
902Show the host character set."), _("\
3d263c1d
BI
903The `host character set' is the one used by the system GDB is running on.\n\
904You may only use supersets of ASCII for your host character set; GDB does\n\
905not support any others.\n\
906To see a list of the character sets GDB supports, type `set host-charset <TAB>'."),
7ab04401 907 set_host_charset_sfunc,
920d2a44 908 show_host_charset_name,
7ab04401
AC
909 &setlist, &showlist);
910
911 add_setshow_enum_cmd ("target-charset", class_support,
6c7a06a3 912 &charset_enum[1], &target_charset_name, _("\
7ab04401
AC
913Set the target character set."), _("\
914Show the target character set."), _("\
3d263c1d
BI
915The `target character set' is the one used by the program being debugged.\n\
916GDB translates characters and strings between the host and target\n\
917character sets as needed.\n\
918To see a list of the character sets GDB supports, type `set target-charset'<TAB>"),
7ab04401 919 set_target_charset_sfunc,
920d2a44 920 show_target_charset_name,
7ab04401 921 &setlist, &showlist);
6c7a06a3
TT
922
923 add_setshow_enum_cmd ("target-wide-charset", class_support,
924 &charset_enum[1], &target_wide_charset_name,
925 _("\
926Set the target wide character set."), _("\
927Show the target wide character set."), _("\
928The `target wide character set' is the one used by the program being debugged.\n\
929In particular it is the encoding used by `wchar_t'.\n\
930GDB translates characters and strings between the host and target\n\
931character sets as needed.\n\
932To see a list of the character sets GDB supports, type\n\
933`set target-wide-charset'<TAB>"),
934 set_target_wide_charset_sfunc,
935 show_target_wide_charset_name,
936 &setlist, &showlist);
234b45d4 937}
This page took 0.642642 seconds and 4 git commands to generate.