* ld-undefined/undefined.exp (testline): XFAIL hppa*64*-*-*.
[deliverable/binutils-gdb.git] / gdb / charset.c
CommitLineData
234b45d4 1/* Character set conversion support for GDB.
1bac305b 2
4c38e0a4
JB
3 Copyright (C) 2001, 2003, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
234b45d4
KB
5
6 This file is part of GDB.
7
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
a9762ec7 10 the Free Software Foundation; either version 3 of the License, or
234b45d4
KB
11 (at your option) any later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
a9762ec7 19 along with this program. If not, see <http://www.gnu.org/licenses/>. */
234b45d4
KB
20
21#include "defs.h"
22#include "charset.h"
23#include "gdbcmd.h"
24#include "gdb_assert.h"
6c7a06a3 25#include "gdb_obstack.h"
732f6a93 26#include "gdb_wait.h"
6c7a06a3
TT
27#include "charset-list.h"
28#include "vec.h"
40b5c9fb 29#include "environ.h"
f870a310 30#include "arch-utils.h"
234b45d4
KB
31
32#include <stddef.h>
4ef3f3be 33#include "gdb_string.h"
234b45d4
KB
34#include <ctype.h>
35
43484f03
DJ
36#ifdef USE_WIN32API
37#include <windows.h>
38#endif
234b45d4
KB
39\f
40/* How GDB's character set support works
41
6c7a06a3 42 GDB has three global settings:
234b45d4
KB
43
44 - The `current host character set' is the character set GDB should
45 use in talking to the user, and which (hopefully) the user's
6c7a06a3
TT
46 terminal knows how to display properly. Most users should not
47 change this.
234b45d4
KB
48
49 - The `current target character set' is the character set the
50 program being debugged uses.
51
6c7a06a3
TT
52 - The `current target wide character set' is the wide character set
53 the program being debugged uses, that is, the encoding used for
54 wchar_t.
55
234b45d4
KB
56 There are commands to set each of these, and mechanisms for
57 choosing reasonable default values. GDB has a global list of
58 character sets that it can use as its host or target character
59 sets.
60
61 The header file `charset.h' declares various functions that
62 different pieces of GDB need to perform tasks like:
63
64 - printing target strings and characters to the user's terminal
65 (mostly target->host conversions),
66
67 - building target-appropriate representations of strings and
68 characters the user enters in expressions (mostly host->target
69 conversions),
70
6c7a06a3
TT
71 and so on.
72
73 To avoid excessive code duplication and maintenance efforts,
74 GDB simply requires a capable iconv function. Users on platforms
75 without a suitable iconv can use the GNU iconv library. */
234b45d4
KB
76
77\f
6c7a06a3 78#ifdef PHONY_ICONV
234b45d4 79
6c7a06a3
TT
80/* Provide a phony iconv that does as little as possible. Also,
81 arrange for there to be a single available character set. */
234b45d4 82
6c7a06a3
TT
83#undef GDB_DEFAULT_HOST_CHARSET
84#define GDB_DEFAULT_HOST_CHARSET "ISO-8859-1"
85#define GDB_DEFAULT_TARGET_CHARSET "ISO-8859-1"
86#define GDB_DEFAULT_TARGET_WIDE_CHARSET "ISO-8859-1"
87#undef DEFAULT_CHARSET_NAMES
88#define DEFAULT_CHARSET_NAMES GDB_DEFAULT_HOST_CHARSET ,
89
90#undef iconv_t
91#define iconv_t int
92#undef iconv_open
62234ccc 93#define iconv_open phony_iconv_open
6c7a06a3 94#undef iconv
62234ccc 95#define iconv phony_iconv
6c7a06a3 96#undef iconv_close
62234ccc 97#define iconv_close phony_iconv_close
6c7a06a3 98
0dd7fb99
TT
99#undef ICONV_CONST
100#define ICONV_CONST const
101
76208fec 102/* Some systems don't have EILSEQ, so we define it here, but not as
e726d784
EZ
103 EINVAL, because callers of `iconv' want to distinguish EINVAL and
104 EILSEQ. This is what iconv.h from libiconv does as well. Note
105 that wchar.h may also define EILSEQ, so this needs to be after we
106 include wchar.h, which happens in defs.h through gdb_wchar.h. */
107#ifndef EILSEQ
108#define EILSEQ ENOENT
109#endif
110
6c7a06a3 111iconv_t
62234ccc 112phony_iconv_open (const char *to, const char *from)
6c7a06a3 113{
b8899f2b 114 /* We allow conversions from UTF-32BE, wchar_t, and the host charset.
6c7a06a3 115 We allow conversions to wchar_t and the host charset. */
b8899f2b 116 if (strcmp (from, "UTF-32BE") && strcmp (from, "wchar_t")
6c7a06a3
TT
117 && strcmp (from, GDB_DEFAULT_HOST_CHARSET))
118 return -1;
119 if (strcmp (to, "wchar_t") && strcmp (to, GDB_DEFAULT_HOST_CHARSET))
120 return -1;
234b45d4 121
b8899f2b 122 /* Return 1 if we are converting from UTF-32BE, 0 otherwise. This is
6c7a06a3 123 used as a flag in calls to iconv. */
b8899f2b 124 return !strcmp (from, "UTF-32BE");
6c7a06a3 125}
234b45d4 126
6c7a06a3 127int
62234ccc 128phony_iconv_close (iconv_t arg)
6c7a06a3
TT
129{
130 return 0;
131}
234b45d4 132
6c7a06a3 133size_t
62234ccc
TT
134phony_iconv (iconv_t utf_flag, const char **inbuf, size_t *inbytesleft,
135 char **outbuf, size_t *outbytesleft)
6c7a06a3 136{
b8899f2b 137 if (utf_flag)
6c7a06a3
TT
138 {
139 while (*inbytesleft >= 4)
140 {
141 size_t j;
142 unsigned long c = 0;
143
144 for (j = 0; j < 4; ++j)
145 {
146 c <<= 8;
147 c += (*inbuf)[j] & 0xff;
148 }
149
150 if (c >= 256)
151 {
152 errno = EILSEQ;
153 return -1;
154 }
155 **outbuf = c & 0xff;
156 ++*outbuf;
157 --*outbytesleft;
158
159 ++*inbuf;
160 *inbytesleft -= 4;
161 }
162 if (*inbytesleft < 4)
163 {
164 errno = EINVAL;
165 return -1;
166 }
167 }
168 else
169 {
170 /* In all other cases we simply copy input bytes to the
171 output. */
172 size_t amt = *inbytesleft;
c5504eaf 173
6c7a06a3
TT
174 if (amt > *outbytesleft)
175 amt = *outbytesleft;
176 memcpy (*outbuf, *inbuf, amt);
177 *inbuf += amt;
178 *outbuf += amt;
179 *inbytesleft -= amt;
180 *outbytesleft -= amt;
181 }
234b45d4 182
6c7a06a3
TT
183 if (*inbytesleft)
184 {
185 errno = E2BIG;
186 return -1;
187 }
234b45d4 188
6c7a06a3
TT
189 /* The number of non-reversible conversions -- but they were all
190 reversible. */
191 return 0;
192}
234b45d4 193
6c7a06a3 194#endif
234b45d4
KB
195
196
197\f
198/* The global lists of character sets and translations. */
199
200
e33d66ec
EZ
201#ifndef GDB_DEFAULT_TARGET_CHARSET
202#define GDB_DEFAULT_TARGET_CHARSET "ISO-8859-1"
203#endif
204
6c7a06a3 205#ifndef GDB_DEFAULT_TARGET_WIDE_CHARSET
b8899f2b 206#define GDB_DEFAULT_TARGET_WIDE_CHARSET "UTF-32"
6c7a06a3
TT
207#endif
208
209static const char *auto_host_charset_name = GDB_DEFAULT_HOST_CHARSET;
210static const char *host_charset_name = "auto";
920d2a44
AC
211static void
212show_host_charset_name (struct ui_file *file, int from_tty,
213 struct cmd_list_element *c,
214 const char *value)
215{
6c7a06a3
TT
216 if (!strcmp (value, "auto"))
217 fprintf_filtered (file,
218 _("The host character set is \"auto; currently %s\".\n"),
219 auto_host_charset_name);
220 else
221 fprintf_filtered (file, _("The host character set is \"%s\".\n"), value);
920d2a44
AC
222}
223
f870a310 224static const char *target_charset_name = "auto";
920d2a44
AC
225static void
226show_target_charset_name (struct ui_file *file, int from_tty,
227 struct cmd_list_element *c, const char *value)
228{
f870a310
TT
229 if (!strcmp (value, "auto"))
230 fprintf_filtered (file,
231 _("The target character set is \"auto; "
232 "currently %s\".\n"),
233 gdbarch_auto_charset (get_current_arch ()));
234 else
235 fprintf_filtered (file, _("The target character set is \"%s\".\n"),
236 value);
920d2a44
AC
237}
238
f870a310 239static const char *target_wide_charset_name = "auto";
6c7a06a3
TT
240static void
241show_target_wide_charset_name (struct ui_file *file, int from_tty,
242 struct cmd_list_element *c, const char *value)
e33d66ec 243{
f870a310
TT
244 if (!strcmp (value, "auto"))
245 fprintf_filtered (file,
246 _("The target wide character set is \"auto; "
247 "currently %s\".\n"),
248 gdbarch_auto_wide_charset (get_current_arch ()));
249 else
250 fprintf_filtered (file, _("The target wide character set is \"%s\".\n"),
251 value);
6c7a06a3 252}
e33d66ec 253
6c7a06a3 254static const char *default_charset_names[] =
e33d66ec 255{
6c7a06a3 256 DEFAULT_CHARSET_NAMES
e33d66ec
EZ
257 0
258};
234b45d4 259
6c7a06a3 260static const char **charset_enum;
234b45d4 261
6c7a06a3
TT
262\f
263/* If the target wide character set has big- or little-endian
264 variants, these are the corresponding names. */
265static const char *target_wide_charset_be_name;
266static const char *target_wide_charset_le_name;
234b45d4 267
f870a310
TT
268/* The architecture for which the BE- and LE-names are valid. */
269static struct gdbarch *be_le_arch;
270
271/* A helper function which sets the target wide big- and little-endian
272 character set names, if possible. */
234b45d4 273
6c7a06a3 274static void
f870a310 275set_be_le_names (struct gdbarch *gdbarch)
234b45d4 276{
6c7a06a3 277 int i, len;
f870a310
TT
278 const char *target_wide;
279
280 if (be_le_arch == gdbarch)
281 return;
282 be_le_arch = gdbarch;
234b45d4 283
6c7a06a3
TT
284 target_wide_charset_le_name = NULL;
285 target_wide_charset_be_name = NULL;
234b45d4 286
f870a310
TT
287 target_wide = target_wide_charset_name;
288 if (!strcmp (target_wide, "auto"))
289 target_wide = gdbarch_auto_wide_charset (gdbarch);
290
291 len = strlen (target_wide);
6c7a06a3
TT
292 for (i = 0; charset_enum[i]; ++i)
293 {
f870a310 294 if (strncmp (target_wide, charset_enum[i], len))
6c7a06a3
TT
295 continue;
296 if ((charset_enum[i][len] == 'B'
297 || charset_enum[i][len] == 'L')
298 && charset_enum[i][len + 1] == 'E'
299 && charset_enum[i][len + 2] == '\0')
300 {
301 if (charset_enum[i][len] == 'B')
302 target_wide_charset_be_name = charset_enum[i];
303 else
304 target_wide_charset_le_name = charset_enum[i];
305 }
306 }
234b45d4
KB
307}
308
6c7a06a3
TT
309/* 'Set charset', 'set host-charset', 'set target-charset', 'set
310 target-wide-charset', 'set charset' sfunc's. */
234b45d4
KB
311
312static void
f870a310 313validate (struct gdbarch *gdbarch)
234b45d4 314{
6c7a06a3
TT
315 iconv_t desc;
316 const char *host_cset = host_charset ();
f870a310
TT
317 const char *target_cset = target_charset (gdbarch);
318 const char *target_wide_cset = target_wide_charset_name;
c5504eaf 319
f870a310
TT
320 if (!strcmp (target_wide_cset, "auto"))
321 target_wide_cset = gdbarch_auto_wide_charset (gdbarch);
234b45d4 322
f870a310 323 desc = iconv_open (target_wide_cset, host_cset);
6c7a06a3
TT
324 if (desc == (iconv_t) -1)
325 error ("Cannot convert between character sets `%s' and `%s'",
f870a310 326 target_wide_cset, host_cset);
6c7a06a3 327 iconv_close (desc);
234b45d4 328
f870a310 329 desc = iconv_open (target_cset, host_cset);
6c7a06a3
TT
330 if (desc == (iconv_t) -1)
331 error ("Cannot convert between character sets `%s' and `%s'",
f870a310 332 target_cset, host_cset);
6c7a06a3 333 iconv_close (desc);
234b45d4 334
f870a310
TT
335 /* Clear the cache. */
336 be_le_arch = NULL;
234b45d4
KB
337}
338
6c7a06a3
TT
339/* This is the sfunc for the 'set charset' command. */
340static void
341set_charset_sfunc (char *charset, int from_tty, struct cmd_list_element *c)
234b45d4 342{
6c7a06a3
TT
343 /* CAREFUL: set the target charset here as well. */
344 target_charset_name = host_charset_name;
f870a310 345 validate (get_current_arch ());
234b45d4
KB
346}
347
6c7a06a3
TT
348/* 'set host-charset' command sfunc. We need a wrapper here because
349 the function needs to have a specific signature. */
350static void
351set_host_charset_sfunc (char *charset, int from_tty,
352 struct cmd_list_element *c)
234b45d4 353{
f870a310 354 validate (get_current_arch ());
234b45d4
KB
355}
356
6c7a06a3
TT
357/* Wrapper for the 'set target-charset' command. */
358static void
359set_target_charset_sfunc (char *charset, int from_tty,
360 struct cmd_list_element *c)
234b45d4 361{
f870a310 362 validate (get_current_arch ());
234b45d4
KB
363}
364
6c7a06a3
TT
365/* Wrapper for the 'set target-wide-charset' command. */
366static void
367set_target_wide_charset_sfunc (char *charset, int from_tty,
368 struct cmd_list_element *c)
234b45d4 369{
f870a310 370 validate (get_current_arch ());
234b45d4
KB
371}
372
6c7a06a3
TT
373/* sfunc for the 'show charset' command. */
374static void
375show_charset (struct ui_file *file, int from_tty, struct cmd_list_element *c,
376 const char *name)
234b45d4 377{
6c7a06a3
TT
378 show_host_charset_name (file, from_tty, c, host_charset_name);
379 show_target_charset_name (file, from_tty, c, target_charset_name);
380 show_target_wide_charset_name (file, from_tty, c, target_wide_charset_name);
234b45d4
KB
381}
382
234b45d4 383\f
6c7a06a3 384/* Accessor functions. */
234b45d4 385
6c7a06a3
TT
386const char *
387host_charset (void)
234b45d4 388{
6c7a06a3
TT
389 if (!strcmp (host_charset_name, "auto"))
390 return auto_host_charset_name;
391 return host_charset_name;
234b45d4
KB
392}
393
6c7a06a3 394const char *
f870a310 395target_charset (struct gdbarch *gdbarch)
234b45d4 396{
f870a310
TT
397 if (!strcmp (target_charset_name, "auto"))
398 return gdbarch_auto_charset (gdbarch);
6c7a06a3 399 return target_charset_name;
234b45d4 400}
234b45d4 401
6c7a06a3 402const char *
f870a310 403target_wide_charset (struct gdbarch *gdbarch)
234b45d4 404{
f870a310
TT
405 enum bfd_endian byte_order = gdbarch_byte_order (gdbarch);
406
407 set_be_le_names (gdbarch);
e17a4113 408 if (byte_order == BFD_ENDIAN_BIG)
234b45d4 409 {
6c7a06a3
TT
410 if (target_wide_charset_be_name)
411 return target_wide_charset_be_name;
234b45d4 412 }
6c7a06a3 413 else
234b45d4 414 {
6c7a06a3
TT
415 if (target_wide_charset_le_name)
416 return target_wide_charset_le_name;
234b45d4
KB
417 }
418
f870a310
TT
419 if (!strcmp (target_wide_charset_name, "auto"))
420 return gdbarch_auto_wide_charset (gdbarch);
421
6c7a06a3 422 return target_wide_charset_name;
234b45d4
KB
423}
424
234b45d4 425\f
6c7a06a3
TT
426/* Host character set management. For the time being, we assume that
427 the host character set is some superset of ASCII. */
234b45d4 428
6c7a06a3
TT
429char
430host_letter_to_control_character (char c)
234b45d4 431{
6c7a06a3
TT
432 if (c == '?')
433 return 0177;
434 return c & 0237;
234b45d4
KB
435}
436
6c7a06a3
TT
437/* Convert a host character, C, to its hex value. C must already have
438 been validated using isxdigit. */
234b45d4 439
6c7a06a3
TT
440int
441host_hex_value (char c)
234b45d4 442{
6c7a06a3
TT
443 if (isdigit (c))
444 return c - '0';
445 if (c >= 'a' && c <= 'f')
446 return 10 + c - 'a';
447 gdb_assert (c >= 'A' && c <= 'F');
448 return 10 + c - 'A';
234b45d4
KB
449}
450
234b45d4 451\f
6c7a06a3 452/* Public character management functions. */
234b45d4 453
6c7a06a3 454/* A cleanup function which is run to close an iconv descriptor. */
234b45d4 455
6c7a06a3
TT
456static void
457cleanup_iconv (void *p)
234b45d4 458{
6c7a06a3
TT
459 iconv_t *descp = p;
460 iconv_close (*descp);
234b45d4
KB
461}
462
6c7a06a3
TT
463void
464convert_between_encodings (const char *from, const char *to,
465 const gdb_byte *bytes, unsigned int num_bytes,
466 int width, struct obstack *output,
467 enum transliterations translit)
468{
469 iconv_t desc;
470 struct cleanup *cleanups;
471 size_t inleft;
472 char *inp;
473 unsigned int space_request;
474
475 /* Often, the host and target charsets will be the same. */
476 if (!strcmp (from, to))
477 {
478 obstack_grow (output, bytes, num_bytes);
479 return;
480 }
234b45d4 481
6c7a06a3
TT
482 desc = iconv_open (to, from);
483 if (desc == (iconv_t) -1)
484 perror_with_name ("Converting character sets");
485 cleanups = make_cleanup (cleanup_iconv, &desc);
234b45d4 486
6c7a06a3
TT
487 inleft = num_bytes;
488 inp = (char *) bytes;
234b45d4 489
6c7a06a3 490 space_request = num_bytes;
234b45d4 491
6c7a06a3 492 while (inleft > 0)
234b45d4 493 {
6c7a06a3
TT
494 char *outp;
495 size_t outleft, r;
496 int old_size;
497
498 old_size = obstack_object_size (output);
499 obstack_blank (output, space_request);
500
501 outp = obstack_base (output) + old_size;
502 outleft = space_request;
503
0dd7fb99 504 r = iconv (desc, (ICONV_CONST char **) &inp, &inleft, &outp, &outleft);
6c7a06a3
TT
505
506 /* Now make sure that the object on the obstack only includes
507 bytes we have converted. */
508 obstack_blank (output, - (int) outleft);
509
510 if (r == (size_t) -1)
511 {
512 switch (errno)
513 {
514 case EILSEQ:
515 {
516 int i;
517
518 /* Invalid input sequence. */
519 if (translit == translit_none)
520 error (_("Could not convert character to `%s' character set"),
521 to);
522
523 /* We emit escape sequence for the bytes, skip them,
524 and try again. */
525 for (i = 0; i < width; ++i)
526 {
527 char octal[5];
528
529 sprintf (octal, "\\%.3o", *inp & 0xff);
530 obstack_grow_str (output, octal);
531
532 ++inp;
533 --inleft;
534 }
535 }
536 break;
537
538 case E2BIG:
539 /* We ran out of space in the output buffer. Make it
540 bigger next time around. */
541 space_request *= 2;
542 break;
543
544 case EINVAL:
545 /* Incomplete input sequence. FIXME: ought to report this
546 to the caller somehow. */
547 inleft = 0;
548 break;
549
550 default:
551 perror_with_name ("Internal error while converting character sets");
552 }
553 }
234b45d4 554 }
234b45d4 555
6c7a06a3 556 do_cleanups (cleanups);
234b45d4
KB
557}
558
e33d66ec 559\f
e33d66ec 560
6c7a06a3
TT
561/* An iterator that returns host wchar_t's from a target string. */
562struct wchar_iterator
e33d66ec 563{
6c7a06a3
TT
564 /* The underlying iconv descriptor. */
565 iconv_t desc;
e33d66ec 566
6c7a06a3
TT
567 /* The input string. This is updated as convert characters. */
568 char *input;
569 /* The number of bytes remaining in the input. */
570 size_t bytes;
e33d66ec 571
6c7a06a3
TT
572 /* The width of an input character. */
573 size_t width;
e33d66ec 574
6c7a06a3
TT
575 /* The output buffer and its size. */
576 gdb_wchar_t *out;
577 size_t out_size;
578};
234b45d4 579
6c7a06a3
TT
580/* Create a new iterator. */
581struct wchar_iterator *
582make_wchar_iterator (const gdb_byte *input, size_t bytes, const char *charset,
583 size_t width)
234b45d4 584{
6c7a06a3
TT
585 struct wchar_iterator *result;
586 iconv_t desc;
234b45d4 587
732f6a93 588 desc = iconv_open (INTERMEDIATE_ENCODING, charset);
6c7a06a3
TT
589 if (desc == (iconv_t) -1)
590 perror_with_name ("Converting character sets");
234b45d4 591
6c7a06a3
TT
592 result = XNEW (struct wchar_iterator);
593 result->desc = desc;
594 result->input = (char *) input;
595 result->bytes = bytes;
596 result->width = width;
234b45d4 597
6c7a06a3
TT
598 result->out = XNEW (gdb_wchar_t);
599 result->out_size = 1;
234b45d4 600
6c7a06a3 601 return result;
e33d66ec 602}
234b45d4 603
e33d66ec 604static void
6c7a06a3 605do_cleanup_iterator (void *p)
e33d66ec 606{
6c7a06a3 607 struct wchar_iterator *iter = p;
234b45d4 608
6c7a06a3
TT
609 iconv_close (iter->desc);
610 xfree (iter->out);
611 xfree (iter);
234b45d4
KB
612}
613
6c7a06a3
TT
614struct cleanup *
615make_cleanup_wchar_iterator (struct wchar_iterator *iter)
e33d66ec 616{
6c7a06a3 617 return make_cleanup (do_cleanup_iterator, iter);
e33d66ec 618}
234b45d4 619
6c7a06a3
TT
620int
621wchar_iterate (struct wchar_iterator *iter,
622 enum wchar_iterate_result *out_result,
623 gdb_wchar_t **out_chars,
624 const gdb_byte **ptr,
625 size_t *len)
626{
627 size_t out_request;
628
629 /* Try to convert some characters. At first we try to convert just
630 a single character. The reason for this is that iconv does not
631 necessarily update its outgoing arguments when it encounters an
632 invalid input sequence -- but we want to reliably report this to
633 our caller so it can emit an escape sequence. */
634 out_request = 1;
635 while (iter->bytes > 0)
e33d66ec 636 {
6c7a06a3
TT
637 char *outptr = (char *) &iter->out[0];
638 char *orig_inptr = iter->input;
639 size_t orig_in = iter->bytes;
640 size_t out_avail = out_request * sizeof (gdb_wchar_t);
641 size_t num;
0dd7fb99
TT
642 size_t r = iconv (iter->desc,
643 (ICONV_CONST char **) &iter->input, &iter->bytes,
6c7a06a3 644 &outptr, &out_avail);
c5504eaf 645
6c7a06a3
TT
646 if (r == (size_t) -1)
647 {
648 switch (errno)
649 {
650 case EILSEQ:
1558ab4c
JK
651 /* Invalid input sequence. We still might have converted a
652 character; if so, return it. */
653 if (out_avail < out_request * sizeof (gdb_wchar_t))
654 break;
655
656 /* Otherwise skip the first invalid character, and let the
657 caller know about it. */
6c7a06a3
TT
658 *out_result = wchar_iterate_invalid;
659 *ptr = iter->input;
660 *len = iter->width;
661 iter->input += iter->width;
662 iter->bytes -= iter->width;
663 return 0;
664
665 case E2BIG:
666 /* We ran out of space. We still might have converted a
667 character; if so, return it. Otherwise, grow the
668 buffer and try again. */
669 if (out_avail < out_request * sizeof (gdb_wchar_t))
670 break;
671
672 ++out_request;
673 if (out_request > iter->out_size)
674 {
675 iter->out_size = out_request;
676 iter->out = xrealloc (iter->out,
677 out_request * sizeof (gdb_wchar_t));
678 }
679 continue;
680
681 case EINVAL:
682 /* Incomplete input sequence. Let the caller know, and
683 arrange for future calls to see EOF. */
684 *out_result = wchar_iterate_incomplete;
685 *ptr = iter->input;
686 *len = iter->bytes;
687 iter->bytes = 0;
688 return 0;
689
690 default:
691 perror_with_name ("Internal error while converting character sets");
692 }
693 }
694
695 /* We converted something. */
696 num = out_request - out_avail / sizeof (gdb_wchar_t);
697 *out_result = wchar_iterate_ok;
698 *out_chars = iter->out;
699 *ptr = orig_inptr;
700 *len = orig_in - iter->bytes;
701 return num;
e33d66ec 702 }
6c7a06a3
TT
703
704 /* Really done. */
705 *out_result = wchar_iterate_eof;
706 return -1;
234b45d4
KB
707}
708
e33d66ec 709\f
6c7a06a3 710/* The charset.c module initialization function. */
234b45d4 711
6c7a06a3 712extern initialize_file_ftype _initialize_charset; /* -Wmissing-prototype */
234b45d4 713
6c7a06a3 714DEF_VEC_P (char_ptr);
234b45d4 715
6c7a06a3 716static VEC (char_ptr) *charsets;
234b45d4 717
6c7a06a3 718#ifdef PHONY_ICONV
234b45d4 719
6c7a06a3
TT
720static void
721find_charset_names (void)
234b45d4 722{
6c7a06a3
TT
723 VEC_safe_push (char_ptr, charsets, GDB_DEFAULT_HOST_CHARSET);
724 VEC_safe_push (char_ptr, charsets, NULL);
234b45d4
KB
725}
726
6c7a06a3 727#else /* PHONY_ICONV */
fc3b640d
TT
728
729/* Sometimes, libiconv redefines iconvlist as libiconvlist -- but
730 provides different symbols in the static and dynamic libraries.
731 So, configure may see libiconvlist but not iconvlist. But, calling
732 iconvlist is the right thing to do and will work. Hence we do a
733 check here but unconditionally call iconvlist below. */
734#if defined (HAVE_ICONVLIST) || defined (HAVE_LIBICONVLIST)
234b45d4 735
6c7a06a3
TT
736/* A helper function that adds some character sets to the vector of
737 all character sets. This is a callback function for iconvlist. */
738
739static int
740add_one (unsigned int count, const char *const *names, void *data)
234b45d4 741{
6c7a06a3 742 unsigned int i;
234b45d4 743
6c7a06a3
TT
744 for (i = 0; i < count; ++i)
745 VEC_safe_push (char_ptr, charsets, xstrdup (names[i]));
234b45d4 746
6c7a06a3 747 return 0;
234b45d4
KB
748}
749
6c7a06a3
TT
750static void
751find_charset_names (void)
234b45d4 752{
6c7a06a3
TT
753 iconvlist (add_one, NULL);
754 VEC_safe_push (char_ptr, charsets, NULL);
234b45d4
KB
755}
756
6c7a06a3 757#else
234b45d4 758
40b5c9fb
DE
759/* Return non-zero if LINE (output from iconv) should be ignored.
760 Older iconv programs (e.g. 2.2.2) include the human readable
761 introduction even when stdout is not a tty. Newer versions omit
762 the intro if stdout is not a tty. */
763
764static int
765ignore_line_p (const char *line)
766{
767 /* This table is used to filter the output. If this text appears
768 anywhere in the line, it is ignored (strstr is used). */
769 static const char * const ignore_lines[] =
770 {
771 "The following",
772 "not necessarily",
773 "the FROM and TO",
774 "listed with several",
775 NULL
776 };
777 int i;
778
779 for (i = 0; ignore_lines[i] != NULL; ++i)
780 {
781 if (strstr (line, ignore_lines[i]) != NULL)
782 return 1;
783 }
784
785 return 0;
786}
787
6c7a06a3
TT
788static void
789find_charset_names (void)
234b45d4 790{
732f6a93
TT
791 struct pex_obj *child;
792 char *args[3];
793 int err, status;
794 int fail = 1;
40b5c9fb
DE
795 struct gdb_environ *iconv_env;
796
797 /* Older iconvs, e.g. 2.2.2, don't omit the intro text if stdout is not
798 a tty. We need to recognize it and ignore it. This text is subject
799 to translation, so force LANGUAGE=C. */
800 iconv_env = make_environ ();
801 init_environ (iconv_env);
802 set_in_environ (iconv_env, "LANGUAGE", "C");
803 set_in_environ (iconv_env, "LC_ALL", "C");
732f6a93 804
40618926 805 child = pex_init (PEX_USE_PIPES, "iconv", NULL);
732f6a93
TT
806
807 args[0] = "iconv";
808 args[1] = "-l";
809 args[2] = NULL;
810 /* Note that we simply ignore errors here. */
40b5c9fb
DE
811 if (!pex_run_in_environment (child, PEX_SEARCH | PEX_STDERR_TO_STDOUT,
812 "iconv", args, environ_vector (iconv_env),
813 NULL, NULL, &err))
732f6a93
TT
814 {
815 FILE *in = pex_read_output (child, 0);
816
817 /* POSIX says that iconv -l uses an unspecified format. We
818 parse the glibc and libiconv formats; feel free to add others
819 as needed. */
40b5c9fb 820
732f6a93
TT
821 while (!feof (in))
822 {
823 /* The size of buf is chosen arbitrarily. */
824 char buf[1024];
825 char *start, *r;
8ea13695 826 int len;
732f6a93
TT
827
828 r = fgets (buf, sizeof (buf), in);
829 if (!r)
830 break;
831 len = strlen (r);
832 if (len <= 3)
833 continue;
40b5c9fb
DE
834 if (ignore_line_p (r))
835 continue;
836
732f6a93
TT
837 /* Strip off the newline. */
838 --len;
839 /* Strip off one or two '/'s. glibc will print lines like
840 "8859_7//", but also "10646-1:1993/UCS4/". */
841 if (buf[len - 1] == '/')
842 --len;
843 if (buf[len - 1] == '/')
844 --len;
845 buf[len] = '\0';
846
847 /* libiconv will print multiple entries per line, separated
40b5c9fb
DE
848 by spaces. Older iconvs will print multiple entries per line,
849 indented by two spaces, and separated by ", "
850 (i.e. the human readable form). */
732f6a93
TT
851 start = buf;
852 while (1)
853 {
854 int keep_going;
855 char *p;
856
40b5c9fb
DE
857 /* Skip leading blanks. */
858 for (p = start; *p && *p == ' '; ++p)
859 ;
860 start = p;
861 /* Find the next space, comma, or end-of-line. */
862 for ( ; *p && *p != ' ' && *p != ','; ++p)
732f6a93
TT
863 ;
864 /* Ignore an empty result. */
865 if (p == start)
866 break;
867 keep_going = *p;
868 *p = '\0';
869 VEC_safe_push (char_ptr, charsets, xstrdup (start));
870 if (!keep_going)
871 break;
872 /* Skip any extra spaces. */
873 for (start = p + 1; *start && *start == ' '; ++start)
874 ;
875 }
876 }
234b45d4 877
732f6a93
TT
878 if (pex_get_status (child, 1, &status)
879 && WIFEXITED (status) && !WEXITSTATUS (status))
880 fail = 0;
234b45d4 881
6c7a06a3 882 }
234b45d4 883
732f6a93 884 pex_free (child);
40b5c9fb 885 free_environ (iconv_env);
234b45d4 886
732f6a93
TT
887 if (fail)
888 {
889 /* Some error occurred, so drop the vector. */
890 int ix;
891 char *elt;
892 for (ix = 0; VEC_iterate (char_ptr, charsets, ix, elt); ++ix)
893 xfree (elt);
894 VEC_truncate (char_ptr, charsets, 0);
895 }
896 else
897 VEC_safe_push (char_ptr, charsets, NULL);
6c7a06a3 898}
234b45d4 899
fc3b640d 900#endif /* HAVE_ICONVLIST || HAVE_LIBICONVLIST */
6c7a06a3 901#endif /* PHONY_ICONV */
234b45d4 902
f870a310
TT
903/* The "auto" target charset used by default_auto_charset. */
904static const char *auto_target_charset_name = GDB_DEFAULT_TARGET_CHARSET;
905
906const char *
907default_auto_charset (void)
908{
909 return auto_target_charset_name;
910}
911
912const char *
913default_auto_wide_charset (void)
914{
915 return GDB_DEFAULT_TARGET_WIDE_CHARSET;
916}
917
234b45d4
KB
918void
919_initialize_charset (void)
920{
f870a310 921 /* The first element is always "auto". */
732f6a93 922 VEC_safe_push (char_ptr, charsets, xstrdup ("auto"));
6c7a06a3
TT
923 find_charset_names ();
924
925 if (VEC_length (char_ptr, charsets) > 1)
926 charset_enum = (const char **) VEC_address (char_ptr, charsets);
927 else
928 charset_enum = default_charset_names;
929
930#ifndef PHONY_ICONV
931#ifdef HAVE_LANGINFO_CODESET
f870a310
TT
932 /* The result of nl_langinfo may be overwritten later. This may
933 leak a little memory, if the user later changes the host charset,
934 but that doesn't matter much. */
935 auto_host_charset_name = xstrdup (nl_langinfo (CODESET));
58720494 936 /* Solaris will return `646' here -- but the Solaris iconv then
06be6983
TG
937 does not accept this. Darwin (and maybe FreeBSD) may return "" here,
938 which GNU libiconv doesn't like (infinite loop). */
939 if (!strcmp (auto_host_charset_name, "646") || !*auto_host_charset_name)
58720494 940 auto_host_charset_name = "ASCII";
f870a310
TT
941 auto_target_charset_name = auto_host_charset_name;
942#elif defined (USE_WIN32API)
943 {
43484f03 944 static char w32_host_default_charset[16]; /* "CP" + x<=5 digits + paranoia. */
f870a310
TT
945
946 snprintf (w32_host_default_charset, sizeof w32_host_default_charset,
947 "CP%d", GetACP());
948 auto_host_charset_name = w32_host_default_charset;
949 auto_target_charset_name = auto_host_charset_name;
950 }
6c7a06a3
TT
951#endif
952#endif
e33d66ec 953
7ab04401 954 add_setshow_enum_cmd ("charset", class_support,
f870a310 955 charset_enum, &host_charset_name, _("\
7ab04401
AC
956Set the host and target character sets."), _("\
957Show the host and target character sets."), _("\
3d263c1d
BI
958The `host character set' is the one used by the system GDB is running on.\n\
959The `target character set' is the one used by the program being debugged.\n\
960You may only use supersets of ASCII for your host character set; GDB does\n\
961not support any others.\n\
962To see a list of the character sets GDB supports, type `set charset <TAB>'."),
7ab04401
AC
963 /* Note that the sfunc below needs to set
964 target_charset_name, because the 'set
965 charset' command sets two variables. */
966 set_charset_sfunc,
967 show_charset,
968 &setlist, &showlist);
969
970 add_setshow_enum_cmd ("host-charset", class_support,
6c7a06a3 971 charset_enum, &host_charset_name, _("\
7ab04401
AC
972Set the host character set."), _("\
973Show the host character set."), _("\
3d263c1d
BI
974The `host character set' is the one used by the system GDB is running on.\n\
975You may only use supersets of ASCII for your host character set; GDB does\n\
976not support any others.\n\
977To see a list of the character sets GDB supports, type `set host-charset <TAB>'."),
7ab04401 978 set_host_charset_sfunc,
920d2a44 979 show_host_charset_name,
7ab04401
AC
980 &setlist, &showlist);
981
982 add_setshow_enum_cmd ("target-charset", class_support,
f870a310 983 charset_enum, &target_charset_name, _("\
7ab04401
AC
984Set the target character set."), _("\
985Show the target character set."), _("\
3d263c1d
BI
986The `target character set' is the one used by the program being debugged.\n\
987GDB translates characters and strings between the host and target\n\
988character sets as needed.\n\
989To see a list of the character sets GDB supports, type `set target-charset'<TAB>"),
7ab04401 990 set_target_charset_sfunc,
920d2a44 991 show_target_charset_name,
7ab04401 992 &setlist, &showlist);
6c7a06a3
TT
993
994 add_setshow_enum_cmd ("target-wide-charset", class_support,
f870a310 995 charset_enum, &target_wide_charset_name,
6c7a06a3
TT
996 _("\
997Set the target wide character set."), _("\
998Show the target wide character set."), _("\
999The `target wide character set' is the one used by the program being debugged.\n\
1000In particular it is the encoding used by `wchar_t'.\n\
1001GDB translates characters and strings between the host and target\n\
1002character sets as needed.\n\
1003To see a list of the character sets GDB supports, type\n\
1004`set target-wide-charset'<TAB>"),
1005 set_target_wide_charset_sfunc,
1006 show_target_wide_charset_name,
1007 &setlist, &showlist);
234b45d4 1008}
This page took 0.576738 seconds and 4 git commands to generate.