* linux-low.c (linux_write_memory): Bail out early if peeking
[deliverable/binutils-gdb.git] / gdb / charset.c
CommitLineData
234b45d4 1/* Character set conversion support for GDB.
1bac305b 2
4c38e0a4
JB
3 Copyright (C) 2001, 2003, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
234b45d4
KB
5
6 This file is part of GDB.
7
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
a9762ec7 10 the Free Software Foundation; either version 3 of the License, or
234b45d4
KB
11 (at your option) any later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
a9762ec7 19 along with this program. If not, see <http://www.gnu.org/licenses/>. */
234b45d4
KB
20
21#include "defs.h"
22#include "charset.h"
23#include "gdbcmd.h"
24#include "gdb_assert.h"
6c7a06a3 25#include "gdb_obstack.h"
732f6a93 26#include "gdb_wait.h"
6c7a06a3
TT
27#include "charset-list.h"
28#include "vec.h"
40b5c9fb 29#include "environ.h"
f870a310 30#include "arch-utils.h"
234b45d4
KB
31
32#include <stddef.h>
4ef3f3be 33#include "gdb_string.h"
234b45d4
KB
34#include <ctype.h>
35
234b45d4
KB
36\f
37/* How GDB's character set support works
38
6c7a06a3 39 GDB has three global settings:
234b45d4
KB
40
41 - The `current host character set' is the character set GDB should
42 use in talking to the user, and which (hopefully) the user's
6c7a06a3
TT
43 terminal knows how to display properly. Most users should not
44 change this.
234b45d4
KB
45
46 - The `current target character set' is the character set the
47 program being debugged uses.
48
6c7a06a3
TT
49 - The `current target wide character set' is the wide character set
50 the program being debugged uses, that is, the encoding used for
51 wchar_t.
52
234b45d4
KB
53 There are commands to set each of these, and mechanisms for
54 choosing reasonable default values. GDB has a global list of
55 character sets that it can use as its host or target character
56 sets.
57
58 The header file `charset.h' declares various functions that
59 different pieces of GDB need to perform tasks like:
60
61 - printing target strings and characters to the user's terminal
62 (mostly target->host conversions),
63
64 - building target-appropriate representations of strings and
65 characters the user enters in expressions (mostly host->target
66 conversions),
67
6c7a06a3
TT
68 and so on.
69
70 To avoid excessive code duplication and maintenance efforts,
71 GDB simply requires a capable iconv function. Users on platforms
72 without a suitable iconv can use the GNU iconv library. */
234b45d4
KB
73
74\f
6c7a06a3 75#ifdef PHONY_ICONV
234b45d4 76
6c7a06a3
TT
77/* Provide a phony iconv that does as little as possible. Also,
78 arrange for there to be a single available character set. */
234b45d4 79
6c7a06a3
TT
80#undef GDB_DEFAULT_HOST_CHARSET
81#define GDB_DEFAULT_HOST_CHARSET "ISO-8859-1"
82#define GDB_DEFAULT_TARGET_CHARSET "ISO-8859-1"
83#define GDB_DEFAULT_TARGET_WIDE_CHARSET "ISO-8859-1"
84#undef DEFAULT_CHARSET_NAMES
85#define DEFAULT_CHARSET_NAMES GDB_DEFAULT_HOST_CHARSET ,
86
87#undef iconv_t
88#define iconv_t int
89#undef iconv_open
90#undef iconv
91#undef iconv_close
92
0dd7fb99
TT
93#undef ICONV_CONST
94#define ICONV_CONST const
95
76208fec 96/* Some systems don't have EILSEQ, so we define it here, but not as
e726d784
EZ
97 EINVAL, because callers of `iconv' want to distinguish EINVAL and
98 EILSEQ. This is what iconv.h from libiconv does as well. Note
99 that wchar.h may also define EILSEQ, so this needs to be after we
100 include wchar.h, which happens in defs.h through gdb_wchar.h. */
101#ifndef EILSEQ
102#define EILSEQ ENOENT
103#endif
104
6c7a06a3
TT
105iconv_t
106iconv_open (const char *to, const char *from)
107{
b8899f2b 108 /* We allow conversions from UTF-32BE, wchar_t, and the host charset.
6c7a06a3 109 We allow conversions to wchar_t and the host charset. */
b8899f2b 110 if (strcmp (from, "UTF-32BE") && strcmp (from, "wchar_t")
6c7a06a3
TT
111 && strcmp (from, GDB_DEFAULT_HOST_CHARSET))
112 return -1;
113 if (strcmp (to, "wchar_t") && strcmp (to, GDB_DEFAULT_HOST_CHARSET))
114 return -1;
234b45d4 115
b8899f2b 116 /* Return 1 if we are converting from UTF-32BE, 0 otherwise. This is
6c7a06a3 117 used as a flag in calls to iconv. */
b8899f2b 118 return !strcmp (from, "UTF-32BE");
6c7a06a3 119}
234b45d4 120
6c7a06a3
TT
121int
122iconv_close (iconv_t arg)
123{
124 return 0;
125}
234b45d4 126
6c7a06a3 127size_t
b8899f2b 128iconv (iconv_t utf_flag, const char **inbuf, size_t *inbytesleft,
6c7a06a3
TT
129 char **outbuf, size_t *outbytesleft)
130{
b8899f2b 131 if (utf_flag)
6c7a06a3
TT
132 {
133 while (*inbytesleft >= 4)
134 {
135 size_t j;
136 unsigned long c = 0;
137
138 for (j = 0; j < 4; ++j)
139 {
140 c <<= 8;
141 c += (*inbuf)[j] & 0xff;
142 }
143
144 if (c >= 256)
145 {
146 errno = EILSEQ;
147 return -1;
148 }
149 **outbuf = c & 0xff;
150 ++*outbuf;
151 --*outbytesleft;
152
153 ++*inbuf;
154 *inbytesleft -= 4;
155 }
156 if (*inbytesleft < 4)
157 {
158 errno = EINVAL;
159 return -1;
160 }
161 }
162 else
163 {
164 /* In all other cases we simply copy input bytes to the
165 output. */
166 size_t amt = *inbytesleft;
167 if (amt > *outbytesleft)
168 amt = *outbytesleft;
169 memcpy (*outbuf, *inbuf, amt);
170 *inbuf += amt;
171 *outbuf += amt;
172 *inbytesleft -= amt;
173 *outbytesleft -= amt;
174 }
234b45d4 175
6c7a06a3
TT
176 if (*inbytesleft)
177 {
178 errno = E2BIG;
179 return -1;
180 }
234b45d4 181
6c7a06a3
TT
182 /* The number of non-reversible conversions -- but they were all
183 reversible. */
184 return 0;
185}
234b45d4 186
6c7a06a3 187#endif
234b45d4
KB
188
189
190\f
191/* The global lists of character sets and translations. */
192
193
e33d66ec
EZ
194#ifndef GDB_DEFAULT_TARGET_CHARSET
195#define GDB_DEFAULT_TARGET_CHARSET "ISO-8859-1"
196#endif
197
6c7a06a3 198#ifndef GDB_DEFAULT_TARGET_WIDE_CHARSET
b8899f2b 199#define GDB_DEFAULT_TARGET_WIDE_CHARSET "UTF-32"
6c7a06a3
TT
200#endif
201
202static const char *auto_host_charset_name = GDB_DEFAULT_HOST_CHARSET;
203static const char *host_charset_name = "auto";
920d2a44
AC
204static void
205show_host_charset_name (struct ui_file *file, int from_tty,
206 struct cmd_list_element *c,
207 const char *value)
208{
6c7a06a3
TT
209 if (!strcmp (value, "auto"))
210 fprintf_filtered (file,
211 _("The host character set is \"auto; currently %s\".\n"),
212 auto_host_charset_name);
213 else
214 fprintf_filtered (file, _("The host character set is \"%s\".\n"), value);
920d2a44
AC
215}
216
f870a310 217static const char *target_charset_name = "auto";
920d2a44
AC
218static void
219show_target_charset_name (struct ui_file *file, int from_tty,
220 struct cmd_list_element *c, const char *value)
221{
f870a310
TT
222 if (!strcmp (value, "auto"))
223 fprintf_filtered (file,
224 _("The target character set is \"auto; "
225 "currently %s\".\n"),
226 gdbarch_auto_charset (get_current_arch ()));
227 else
228 fprintf_filtered (file, _("The target character set is \"%s\".\n"),
229 value);
920d2a44
AC
230}
231
f870a310 232static const char *target_wide_charset_name = "auto";
6c7a06a3
TT
233static void
234show_target_wide_charset_name (struct ui_file *file, int from_tty,
235 struct cmd_list_element *c, const char *value)
e33d66ec 236{
f870a310
TT
237 if (!strcmp (value, "auto"))
238 fprintf_filtered (file,
239 _("The target wide character set is \"auto; "
240 "currently %s\".\n"),
241 gdbarch_auto_wide_charset (get_current_arch ()));
242 else
243 fprintf_filtered (file, _("The target wide character set is \"%s\".\n"),
244 value);
6c7a06a3 245}
e33d66ec 246
6c7a06a3 247static const char *default_charset_names[] =
e33d66ec 248{
6c7a06a3 249 DEFAULT_CHARSET_NAMES
e33d66ec
EZ
250 0
251};
234b45d4 252
6c7a06a3 253static const char **charset_enum;
234b45d4 254
6c7a06a3
TT
255\f
256/* If the target wide character set has big- or little-endian
257 variants, these are the corresponding names. */
258static const char *target_wide_charset_be_name;
259static const char *target_wide_charset_le_name;
234b45d4 260
f870a310
TT
261/* The architecture for which the BE- and LE-names are valid. */
262static struct gdbarch *be_le_arch;
263
264/* A helper function which sets the target wide big- and little-endian
265 character set names, if possible. */
234b45d4 266
6c7a06a3 267static void
f870a310 268set_be_le_names (struct gdbarch *gdbarch)
234b45d4 269{
6c7a06a3 270 int i, len;
f870a310
TT
271 const char *target_wide;
272
273 if (be_le_arch == gdbarch)
274 return;
275 be_le_arch = gdbarch;
234b45d4 276
6c7a06a3
TT
277 target_wide_charset_le_name = NULL;
278 target_wide_charset_be_name = NULL;
234b45d4 279
f870a310
TT
280 target_wide = target_wide_charset_name;
281 if (!strcmp (target_wide, "auto"))
282 target_wide = gdbarch_auto_wide_charset (gdbarch);
283
284 len = strlen (target_wide);
6c7a06a3
TT
285 for (i = 0; charset_enum[i]; ++i)
286 {
f870a310 287 if (strncmp (target_wide, charset_enum[i], len))
6c7a06a3
TT
288 continue;
289 if ((charset_enum[i][len] == 'B'
290 || charset_enum[i][len] == 'L')
291 && charset_enum[i][len + 1] == 'E'
292 && charset_enum[i][len + 2] == '\0')
293 {
294 if (charset_enum[i][len] == 'B')
295 target_wide_charset_be_name = charset_enum[i];
296 else
297 target_wide_charset_le_name = charset_enum[i];
298 }
299 }
234b45d4
KB
300}
301
6c7a06a3
TT
302/* 'Set charset', 'set host-charset', 'set target-charset', 'set
303 target-wide-charset', 'set charset' sfunc's. */
234b45d4
KB
304
305static void
f870a310 306validate (struct gdbarch *gdbarch)
234b45d4 307{
6c7a06a3
TT
308 iconv_t desc;
309 const char *host_cset = host_charset ();
f870a310
TT
310 const char *target_cset = target_charset (gdbarch);
311 const char *target_wide_cset = target_wide_charset_name;
312 if (!strcmp (target_wide_cset, "auto"))
313 target_wide_cset = gdbarch_auto_wide_charset (gdbarch);
234b45d4 314
f870a310 315 desc = iconv_open (target_wide_cset, host_cset);
6c7a06a3
TT
316 if (desc == (iconv_t) -1)
317 error ("Cannot convert between character sets `%s' and `%s'",
f870a310 318 target_wide_cset, host_cset);
6c7a06a3 319 iconv_close (desc);
234b45d4 320
f870a310 321 desc = iconv_open (target_cset, host_cset);
6c7a06a3
TT
322 if (desc == (iconv_t) -1)
323 error ("Cannot convert between character sets `%s' and `%s'",
f870a310 324 target_cset, host_cset);
6c7a06a3 325 iconv_close (desc);
234b45d4 326
f870a310
TT
327 /* Clear the cache. */
328 be_le_arch = NULL;
234b45d4
KB
329}
330
6c7a06a3
TT
331/* This is the sfunc for the 'set charset' command. */
332static void
333set_charset_sfunc (char *charset, int from_tty, struct cmd_list_element *c)
234b45d4 334{
6c7a06a3
TT
335 /* CAREFUL: set the target charset here as well. */
336 target_charset_name = host_charset_name;
f870a310 337 validate (get_current_arch ());
234b45d4
KB
338}
339
6c7a06a3
TT
340/* 'set host-charset' command sfunc. We need a wrapper here because
341 the function needs to have a specific signature. */
342static void
343set_host_charset_sfunc (char *charset, int from_tty,
344 struct cmd_list_element *c)
234b45d4 345{
f870a310 346 validate (get_current_arch ());
234b45d4
KB
347}
348
6c7a06a3
TT
349/* Wrapper for the 'set target-charset' command. */
350static void
351set_target_charset_sfunc (char *charset, int from_tty,
352 struct cmd_list_element *c)
234b45d4 353{
f870a310 354 validate (get_current_arch ());
234b45d4
KB
355}
356
6c7a06a3
TT
357/* Wrapper for the 'set target-wide-charset' command. */
358static void
359set_target_wide_charset_sfunc (char *charset, int from_tty,
360 struct cmd_list_element *c)
234b45d4 361{
f870a310 362 validate (get_current_arch ());
234b45d4
KB
363}
364
6c7a06a3
TT
365/* sfunc for the 'show charset' command. */
366static void
367show_charset (struct ui_file *file, int from_tty, struct cmd_list_element *c,
368 const char *name)
234b45d4 369{
6c7a06a3
TT
370 show_host_charset_name (file, from_tty, c, host_charset_name);
371 show_target_charset_name (file, from_tty, c, target_charset_name);
372 show_target_wide_charset_name (file, from_tty, c, target_wide_charset_name);
234b45d4
KB
373}
374
234b45d4 375\f
6c7a06a3 376/* Accessor functions. */
234b45d4 377
6c7a06a3
TT
378const char *
379host_charset (void)
234b45d4 380{
6c7a06a3
TT
381 if (!strcmp (host_charset_name, "auto"))
382 return auto_host_charset_name;
383 return host_charset_name;
234b45d4
KB
384}
385
6c7a06a3 386const char *
f870a310 387target_charset (struct gdbarch *gdbarch)
234b45d4 388{
f870a310
TT
389 if (!strcmp (target_charset_name, "auto"))
390 return gdbarch_auto_charset (gdbarch);
6c7a06a3 391 return target_charset_name;
234b45d4 392}
234b45d4 393
6c7a06a3 394const char *
f870a310 395target_wide_charset (struct gdbarch *gdbarch)
234b45d4 396{
f870a310
TT
397 enum bfd_endian byte_order = gdbarch_byte_order (gdbarch);
398
399 set_be_le_names (gdbarch);
e17a4113 400 if (byte_order == BFD_ENDIAN_BIG)
234b45d4 401 {
6c7a06a3
TT
402 if (target_wide_charset_be_name)
403 return target_wide_charset_be_name;
234b45d4 404 }
6c7a06a3 405 else
234b45d4 406 {
6c7a06a3
TT
407 if (target_wide_charset_le_name)
408 return target_wide_charset_le_name;
234b45d4
KB
409 }
410
f870a310
TT
411 if (!strcmp (target_wide_charset_name, "auto"))
412 return gdbarch_auto_wide_charset (gdbarch);
413
6c7a06a3 414 return target_wide_charset_name;
234b45d4
KB
415}
416
234b45d4 417\f
6c7a06a3
TT
418/* Host character set management. For the time being, we assume that
419 the host character set is some superset of ASCII. */
234b45d4 420
6c7a06a3
TT
421char
422host_letter_to_control_character (char c)
234b45d4 423{
6c7a06a3
TT
424 if (c == '?')
425 return 0177;
426 return c & 0237;
234b45d4
KB
427}
428
6c7a06a3
TT
429/* Convert a host character, C, to its hex value. C must already have
430 been validated using isxdigit. */
234b45d4 431
6c7a06a3
TT
432int
433host_hex_value (char c)
234b45d4 434{
6c7a06a3
TT
435 if (isdigit (c))
436 return c - '0';
437 if (c >= 'a' && c <= 'f')
438 return 10 + c - 'a';
439 gdb_assert (c >= 'A' && c <= 'F');
440 return 10 + c - 'A';
234b45d4
KB
441}
442
234b45d4 443\f
6c7a06a3 444/* Public character management functions. */
234b45d4 445
6c7a06a3 446/* A cleanup function which is run to close an iconv descriptor. */
234b45d4 447
6c7a06a3
TT
448static void
449cleanup_iconv (void *p)
234b45d4 450{
6c7a06a3
TT
451 iconv_t *descp = p;
452 iconv_close (*descp);
234b45d4
KB
453}
454
6c7a06a3
TT
455void
456convert_between_encodings (const char *from, const char *to,
457 const gdb_byte *bytes, unsigned int num_bytes,
458 int width, struct obstack *output,
459 enum transliterations translit)
460{
461 iconv_t desc;
462 struct cleanup *cleanups;
463 size_t inleft;
464 char *inp;
465 unsigned int space_request;
466
467 /* Often, the host and target charsets will be the same. */
468 if (!strcmp (from, to))
469 {
470 obstack_grow (output, bytes, num_bytes);
471 return;
472 }
234b45d4 473
6c7a06a3
TT
474 desc = iconv_open (to, from);
475 if (desc == (iconv_t) -1)
476 perror_with_name ("Converting character sets");
477 cleanups = make_cleanup (cleanup_iconv, &desc);
234b45d4 478
6c7a06a3
TT
479 inleft = num_bytes;
480 inp = (char *) bytes;
234b45d4 481
6c7a06a3 482 space_request = num_bytes;
234b45d4 483
6c7a06a3 484 while (inleft > 0)
234b45d4 485 {
6c7a06a3
TT
486 char *outp;
487 size_t outleft, r;
488 int old_size;
489
490 old_size = obstack_object_size (output);
491 obstack_blank (output, space_request);
492
493 outp = obstack_base (output) + old_size;
494 outleft = space_request;
495
0dd7fb99 496 r = iconv (desc, (ICONV_CONST char **) &inp, &inleft, &outp, &outleft);
6c7a06a3
TT
497
498 /* Now make sure that the object on the obstack only includes
499 bytes we have converted. */
500 obstack_blank (output, - (int) outleft);
501
502 if (r == (size_t) -1)
503 {
504 switch (errno)
505 {
506 case EILSEQ:
507 {
508 int i;
509
510 /* Invalid input sequence. */
511 if (translit == translit_none)
512 error (_("Could not convert character to `%s' character set"),
513 to);
514
515 /* We emit escape sequence for the bytes, skip them,
516 and try again. */
517 for (i = 0; i < width; ++i)
518 {
519 char octal[5];
520
521 sprintf (octal, "\\%.3o", *inp & 0xff);
522 obstack_grow_str (output, octal);
523
524 ++inp;
525 --inleft;
526 }
527 }
528 break;
529
530 case E2BIG:
531 /* We ran out of space in the output buffer. Make it
532 bigger next time around. */
533 space_request *= 2;
534 break;
535
536 case EINVAL:
537 /* Incomplete input sequence. FIXME: ought to report this
538 to the caller somehow. */
539 inleft = 0;
540 break;
541
542 default:
543 perror_with_name ("Internal error while converting character sets");
544 }
545 }
234b45d4 546 }
234b45d4 547
6c7a06a3 548 do_cleanups (cleanups);
234b45d4
KB
549}
550
e33d66ec 551\f
e33d66ec 552
6c7a06a3
TT
553/* An iterator that returns host wchar_t's from a target string. */
554struct wchar_iterator
e33d66ec 555{
6c7a06a3
TT
556 /* The underlying iconv descriptor. */
557 iconv_t desc;
e33d66ec 558
6c7a06a3
TT
559 /* The input string. This is updated as convert characters. */
560 char *input;
561 /* The number of bytes remaining in the input. */
562 size_t bytes;
e33d66ec 563
6c7a06a3
TT
564 /* The width of an input character. */
565 size_t width;
e33d66ec 566
6c7a06a3
TT
567 /* The output buffer and its size. */
568 gdb_wchar_t *out;
569 size_t out_size;
570};
234b45d4 571
6c7a06a3
TT
572/* Create a new iterator. */
573struct wchar_iterator *
574make_wchar_iterator (const gdb_byte *input, size_t bytes, const char *charset,
575 size_t width)
234b45d4 576{
6c7a06a3
TT
577 struct wchar_iterator *result;
578 iconv_t desc;
234b45d4 579
732f6a93 580 desc = iconv_open (INTERMEDIATE_ENCODING, charset);
6c7a06a3
TT
581 if (desc == (iconv_t) -1)
582 perror_with_name ("Converting character sets");
234b45d4 583
6c7a06a3
TT
584 result = XNEW (struct wchar_iterator);
585 result->desc = desc;
586 result->input = (char *) input;
587 result->bytes = bytes;
588 result->width = width;
234b45d4 589
6c7a06a3
TT
590 result->out = XNEW (gdb_wchar_t);
591 result->out_size = 1;
234b45d4 592
6c7a06a3 593 return result;
e33d66ec 594}
234b45d4 595
e33d66ec 596static void
6c7a06a3 597do_cleanup_iterator (void *p)
e33d66ec 598{
6c7a06a3 599 struct wchar_iterator *iter = p;
234b45d4 600
6c7a06a3
TT
601 iconv_close (iter->desc);
602 xfree (iter->out);
603 xfree (iter);
234b45d4
KB
604}
605
6c7a06a3
TT
606struct cleanup *
607make_cleanup_wchar_iterator (struct wchar_iterator *iter)
e33d66ec 608{
6c7a06a3 609 return make_cleanup (do_cleanup_iterator, iter);
e33d66ec 610}
234b45d4 611
6c7a06a3
TT
612int
613wchar_iterate (struct wchar_iterator *iter,
614 enum wchar_iterate_result *out_result,
615 gdb_wchar_t **out_chars,
616 const gdb_byte **ptr,
617 size_t *len)
618{
619 size_t out_request;
620
621 /* Try to convert some characters. At first we try to convert just
622 a single character. The reason for this is that iconv does not
623 necessarily update its outgoing arguments when it encounters an
624 invalid input sequence -- but we want to reliably report this to
625 our caller so it can emit an escape sequence. */
626 out_request = 1;
627 while (iter->bytes > 0)
e33d66ec 628 {
6c7a06a3
TT
629 char *outptr = (char *) &iter->out[0];
630 char *orig_inptr = iter->input;
631 size_t orig_in = iter->bytes;
632 size_t out_avail = out_request * sizeof (gdb_wchar_t);
633 size_t num;
634 gdb_wchar_t result;
635
0dd7fb99
TT
636 size_t r = iconv (iter->desc,
637 (ICONV_CONST char **) &iter->input, &iter->bytes,
6c7a06a3
TT
638 &outptr, &out_avail);
639 if (r == (size_t) -1)
640 {
641 switch (errno)
642 {
643 case EILSEQ:
644 /* Invalid input sequence. Skip it, and let the caller
645 know about it. */
646 *out_result = wchar_iterate_invalid;
647 *ptr = iter->input;
648 *len = iter->width;
649 iter->input += iter->width;
650 iter->bytes -= iter->width;
651 return 0;
652
653 case E2BIG:
654 /* We ran out of space. We still might have converted a
655 character; if so, return it. Otherwise, grow the
656 buffer and try again. */
657 if (out_avail < out_request * sizeof (gdb_wchar_t))
658 break;
659
660 ++out_request;
661 if (out_request > iter->out_size)
662 {
663 iter->out_size = out_request;
664 iter->out = xrealloc (iter->out,
665 out_request * sizeof (gdb_wchar_t));
666 }
667 continue;
668
669 case EINVAL:
670 /* Incomplete input sequence. Let the caller know, and
671 arrange for future calls to see EOF. */
672 *out_result = wchar_iterate_incomplete;
673 *ptr = iter->input;
674 *len = iter->bytes;
675 iter->bytes = 0;
676 return 0;
677
678 default:
679 perror_with_name ("Internal error while converting character sets");
680 }
681 }
682
683 /* We converted something. */
684 num = out_request - out_avail / sizeof (gdb_wchar_t);
685 *out_result = wchar_iterate_ok;
686 *out_chars = iter->out;
687 *ptr = orig_inptr;
688 *len = orig_in - iter->bytes;
689 return num;
e33d66ec 690 }
6c7a06a3
TT
691
692 /* Really done. */
693 *out_result = wchar_iterate_eof;
694 return -1;
234b45d4
KB
695}
696
e33d66ec 697\f
6c7a06a3 698/* The charset.c module initialization function. */
234b45d4 699
6c7a06a3 700extern initialize_file_ftype _initialize_charset; /* -Wmissing-prototype */
234b45d4 701
6c7a06a3
TT
702typedef char *char_ptr;
703DEF_VEC_P (char_ptr);
234b45d4 704
6c7a06a3 705static VEC (char_ptr) *charsets;
234b45d4 706
6c7a06a3 707#ifdef PHONY_ICONV
234b45d4 708
6c7a06a3
TT
709static void
710find_charset_names (void)
234b45d4 711{
6c7a06a3
TT
712 VEC_safe_push (char_ptr, charsets, GDB_DEFAULT_HOST_CHARSET);
713 VEC_safe_push (char_ptr, charsets, NULL);
234b45d4
KB
714}
715
6c7a06a3 716#else /* PHONY_ICONV */
fc3b640d
TT
717
718/* Sometimes, libiconv redefines iconvlist as libiconvlist -- but
719 provides different symbols in the static and dynamic libraries.
720 So, configure may see libiconvlist but not iconvlist. But, calling
721 iconvlist is the right thing to do and will work. Hence we do a
722 check here but unconditionally call iconvlist below. */
723#if defined (HAVE_ICONVLIST) || defined (HAVE_LIBICONVLIST)
234b45d4 724
6c7a06a3
TT
725/* A helper function that adds some character sets to the vector of
726 all character sets. This is a callback function for iconvlist. */
727
728static int
729add_one (unsigned int count, const char *const *names, void *data)
234b45d4 730{
6c7a06a3 731 unsigned int i;
234b45d4 732
6c7a06a3
TT
733 for (i = 0; i < count; ++i)
734 VEC_safe_push (char_ptr, charsets, xstrdup (names[i]));
234b45d4 735
6c7a06a3 736 return 0;
234b45d4
KB
737}
738
6c7a06a3
TT
739static void
740find_charset_names (void)
234b45d4 741{
6c7a06a3
TT
742 iconvlist (add_one, NULL);
743 VEC_safe_push (char_ptr, charsets, NULL);
234b45d4
KB
744}
745
6c7a06a3 746#else
234b45d4 747
40b5c9fb
DE
748/* Return non-zero if LINE (output from iconv) should be ignored.
749 Older iconv programs (e.g. 2.2.2) include the human readable
750 introduction even when stdout is not a tty. Newer versions omit
751 the intro if stdout is not a tty. */
752
753static int
754ignore_line_p (const char *line)
755{
756 /* This table is used to filter the output. If this text appears
757 anywhere in the line, it is ignored (strstr is used). */
758 static const char * const ignore_lines[] =
759 {
760 "The following",
761 "not necessarily",
762 "the FROM and TO",
763 "listed with several",
764 NULL
765 };
766 int i;
767
768 for (i = 0; ignore_lines[i] != NULL; ++i)
769 {
770 if (strstr (line, ignore_lines[i]) != NULL)
771 return 1;
772 }
773
774 return 0;
775}
776
6c7a06a3
TT
777static void
778find_charset_names (void)
234b45d4 779{
732f6a93
TT
780 struct pex_obj *child;
781 char *args[3];
782 int err, status;
783 int fail = 1;
40b5c9fb
DE
784 struct gdb_environ *iconv_env;
785
786 /* Older iconvs, e.g. 2.2.2, don't omit the intro text if stdout is not
787 a tty. We need to recognize it and ignore it. This text is subject
788 to translation, so force LANGUAGE=C. */
789 iconv_env = make_environ ();
790 init_environ (iconv_env);
791 set_in_environ (iconv_env, "LANGUAGE", "C");
792 set_in_environ (iconv_env, "LC_ALL", "C");
732f6a93
TT
793
794 child = pex_init (0, "iconv", NULL);
795
796 args[0] = "iconv";
797 args[1] = "-l";
798 args[2] = NULL;
799 /* Note that we simply ignore errors here. */
40b5c9fb
DE
800 if (!pex_run_in_environment (child, PEX_SEARCH | PEX_STDERR_TO_STDOUT,
801 "iconv", args, environ_vector (iconv_env),
802 NULL, NULL, &err))
732f6a93
TT
803 {
804 FILE *in = pex_read_output (child, 0);
805
806 /* POSIX says that iconv -l uses an unspecified format. We
807 parse the glibc and libiconv formats; feel free to add others
808 as needed. */
40b5c9fb 809
732f6a93
TT
810 while (!feof (in))
811 {
812 /* The size of buf is chosen arbitrarily. */
813 char buf[1024];
814 char *start, *r;
815 int len, keep_going;
816
817 r = fgets (buf, sizeof (buf), in);
818 if (!r)
819 break;
820 len = strlen (r);
821 if (len <= 3)
822 continue;
40b5c9fb
DE
823 if (ignore_line_p (r))
824 continue;
825
732f6a93
TT
826 /* Strip off the newline. */
827 --len;
828 /* Strip off one or two '/'s. glibc will print lines like
829 "8859_7//", but also "10646-1:1993/UCS4/". */
830 if (buf[len - 1] == '/')
831 --len;
832 if (buf[len - 1] == '/')
833 --len;
834 buf[len] = '\0';
835
836 /* libiconv will print multiple entries per line, separated
40b5c9fb
DE
837 by spaces. Older iconvs will print multiple entries per line,
838 indented by two spaces, and separated by ", "
839 (i.e. the human readable form). */
732f6a93
TT
840 start = buf;
841 while (1)
842 {
843 int keep_going;
844 char *p;
845
40b5c9fb
DE
846 /* Skip leading blanks. */
847 for (p = start; *p && *p == ' '; ++p)
848 ;
849 start = p;
850 /* Find the next space, comma, or end-of-line. */
851 for ( ; *p && *p != ' ' && *p != ','; ++p)
732f6a93
TT
852 ;
853 /* Ignore an empty result. */
854 if (p == start)
855 break;
856 keep_going = *p;
857 *p = '\0';
858 VEC_safe_push (char_ptr, charsets, xstrdup (start));
859 if (!keep_going)
860 break;
861 /* Skip any extra spaces. */
862 for (start = p + 1; *start && *start == ' '; ++start)
863 ;
864 }
865 }
234b45d4 866
732f6a93
TT
867 if (pex_get_status (child, 1, &status)
868 && WIFEXITED (status) && !WEXITSTATUS (status))
869 fail = 0;
234b45d4 870
6c7a06a3 871 }
234b45d4 872
732f6a93 873 pex_free (child);
40b5c9fb 874 free_environ (iconv_env);
234b45d4 875
732f6a93
TT
876 if (fail)
877 {
878 /* Some error occurred, so drop the vector. */
879 int ix;
880 char *elt;
881 for (ix = 0; VEC_iterate (char_ptr, charsets, ix, elt); ++ix)
882 xfree (elt);
883 VEC_truncate (char_ptr, charsets, 0);
884 }
885 else
886 VEC_safe_push (char_ptr, charsets, NULL);
6c7a06a3 887}
234b45d4 888
fc3b640d 889#endif /* HAVE_ICONVLIST || HAVE_LIBICONVLIST */
6c7a06a3 890#endif /* PHONY_ICONV */
234b45d4 891
f870a310
TT
892/* The "auto" target charset used by default_auto_charset. */
893static const char *auto_target_charset_name = GDB_DEFAULT_TARGET_CHARSET;
894
895const char *
896default_auto_charset (void)
897{
898 return auto_target_charset_name;
899}
900
901const char *
902default_auto_wide_charset (void)
903{
904 return GDB_DEFAULT_TARGET_WIDE_CHARSET;
905}
906
234b45d4
KB
907void
908_initialize_charset (void)
909{
e33d66ec
EZ
910 struct cmd_list_element *new_cmd;
911
f870a310 912 /* The first element is always "auto". */
732f6a93 913 VEC_safe_push (char_ptr, charsets, xstrdup ("auto"));
6c7a06a3
TT
914 find_charset_names ();
915
916 if (VEC_length (char_ptr, charsets) > 1)
917 charset_enum = (const char **) VEC_address (char_ptr, charsets);
918 else
919 charset_enum = default_charset_names;
920
921#ifndef PHONY_ICONV
922#ifdef HAVE_LANGINFO_CODESET
f870a310
TT
923 /* The result of nl_langinfo may be overwritten later. This may
924 leak a little memory, if the user later changes the host charset,
925 but that doesn't matter much. */
926 auto_host_charset_name = xstrdup (nl_langinfo (CODESET));
58720494 927 /* Solaris will return `646' here -- but the Solaris iconv then
06be6983
TG
928 does not accept this. Darwin (and maybe FreeBSD) may return "" here,
929 which GNU libiconv doesn't like (infinite loop). */
930 if (!strcmp (auto_host_charset_name, "646") || !*auto_host_charset_name)
58720494 931 auto_host_charset_name = "ASCII";
f870a310
TT
932 auto_target_charset_name = auto_host_charset_name;
933#elif defined (USE_WIN32API)
934 {
935 static w32_host_default_charset[16]; /* "CP" + x<=5 digits + paranoia. */
936
937 snprintf (w32_host_default_charset, sizeof w32_host_default_charset,
938 "CP%d", GetACP());
939 auto_host_charset_name = w32_host_default_charset;
940 auto_target_charset_name = auto_host_charset_name;
941 }
6c7a06a3
TT
942#endif
943#endif
e33d66ec 944
7ab04401 945 add_setshow_enum_cmd ("charset", class_support,
f870a310 946 charset_enum, &host_charset_name, _("\
7ab04401
AC
947Set the host and target character sets."), _("\
948Show the host and target character sets."), _("\
3d263c1d
BI
949The `host character set' is the one used by the system GDB is running on.\n\
950The `target character set' is the one used by the program being debugged.\n\
951You may only use supersets of ASCII for your host character set; GDB does\n\
952not support any others.\n\
953To see a list of the character sets GDB supports, type `set charset <TAB>'."),
7ab04401
AC
954 /* Note that the sfunc below needs to set
955 target_charset_name, because the 'set
956 charset' command sets two variables. */
957 set_charset_sfunc,
958 show_charset,
959 &setlist, &showlist);
960
961 add_setshow_enum_cmd ("host-charset", class_support,
6c7a06a3 962 charset_enum, &host_charset_name, _("\
7ab04401
AC
963Set the host character set."), _("\
964Show the host character set."), _("\
3d263c1d
BI
965The `host character set' is the one used by the system GDB is running on.\n\
966You may only use supersets of ASCII for your host character set; GDB does\n\
967not support any others.\n\
968To see a list of the character sets GDB supports, type `set host-charset <TAB>'."),
7ab04401 969 set_host_charset_sfunc,
920d2a44 970 show_host_charset_name,
7ab04401
AC
971 &setlist, &showlist);
972
973 add_setshow_enum_cmd ("target-charset", class_support,
f870a310 974 charset_enum, &target_charset_name, _("\
7ab04401
AC
975Set the target character set."), _("\
976Show the target character set."), _("\
3d263c1d
BI
977The `target character set' is the one used by the program being debugged.\n\
978GDB translates characters and strings between the host and target\n\
979character sets as needed.\n\
980To see a list of the character sets GDB supports, type `set target-charset'<TAB>"),
7ab04401 981 set_target_charset_sfunc,
920d2a44 982 show_target_charset_name,
7ab04401 983 &setlist, &showlist);
6c7a06a3
TT
984
985 add_setshow_enum_cmd ("target-wide-charset", class_support,
f870a310 986 charset_enum, &target_wide_charset_name,
6c7a06a3
TT
987 _("\
988Set the target wide character set."), _("\
989Show the target wide character set."), _("\
990The `target wide character set' is the one used by the program being debugged.\n\
991In particular it is the encoding used by `wchar_t'.\n\
992GDB translates characters and strings between the host and target\n\
993character sets as needed.\n\
994To see a list of the character sets GDB supports, type\n\
995`set target-wide-charset'<TAB>"),
996 set_target_wide_charset_sfunc,
997 show_target_wide_charset_name,
998 &setlist, &showlist);
234b45d4 999}
This page took 0.613627 seconds and 4 git commands to generate.