Fix intend
[deliverable/binutils-gdb.git] / gdb / charset.c
CommitLineData
234b45d4 1/* Character set conversion support for GDB.
1bac305b 2
4c38e0a4
JB
3 Copyright (C) 2001, 2003, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
234b45d4
KB
5
6 This file is part of GDB.
7
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
a9762ec7 10 the Free Software Foundation; either version 3 of the License, or
234b45d4
KB
11 (at your option) any later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
a9762ec7 19 along with this program. If not, see <http://www.gnu.org/licenses/>. */
234b45d4
KB
20
21#include "defs.h"
22#include "charset.h"
23#include "gdbcmd.h"
24#include "gdb_assert.h"
6c7a06a3 25#include "gdb_obstack.h"
732f6a93 26#include "gdb_wait.h"
6c7a06a3
TT
27#include "charset-list.h"
28#include "vec.h"
40b5c9fb 29#include "environ.h"
f870a310 30#include "arch-utils.h"
234b45d4
KB
31
32#include <stddef.h>
4ef3f3be 33#include "gdb_string.h"
234b45d4
KB
34#include <ctype.h>
35
43484f03
DJ
36#ifdef USE_WIN32API
37#include <windows.h>
38#endif
234b45d4
KB
39\f
40/* How GDB's character set support works
41
6c7a06a3 42 GDB has three global settings:
234b45d4
KB
43
44 - The `current host character set' is the character set GDB should
45 use in talking to the user, and which (hopefully) the user's
6c7a06a3
TT
46 terminal knows how to display properly. Most users should not
47 change this.
234b45d4
KB
48
49 - The `current target character set' is the character set the
50 program being debugged uses.
51
6c7a06a3
TT
52 - The `current target wide character set' is the wide character set
53 the program being debugged uses, that is, the encoding used for
54 wchar_t.
55
234b45d4
KB
56 There are commands to set each of these, and mechanisms for
57 choosing reasonable default values. GDB has a global list of
58 character sets that it can use as its host or target character
59 sets.
60
61 The header file `charset.h' declares various functions that
62 different pieces of GDB need to perform tasks like:
63
64 - printing target strings and characters to the user's terminal
65 (mostly target->host conversions),
66
67 - building target-appropriate representations of strings and
68 characters the user enters in expressions (mostly host->target
69 conversions),
70
6c7a06a3
TT
71 and so on.
72
73 To avoid excessive code duplication and maintenance efforts,
74 GDB simply requires a capable iconv function. Users on platforms
75 without a suitable iconv can use the GNU iconv library. */
234b45d4
KB
76
77\f
6c7a06a3 78#ifdef PHONY_ICONV
234b45d4 79
6c7a06a3
TT
80/* Provide a phony iconv that does as little as possible. Also,
81 arrange for there to be a single available character set. */
234b45d4 82
6c7a06a3
TT
83#undef GDB_DEFAULT_HOST_CHARSET
84#define GDB_DEFAULT_HOST_CHARSET "ISO-8859-1"
85#define GDB_DEFAULT_TARGET_CHARSET "ISO-8859-1"
86#define GDB_DEFAULT_TARGET_WIDE_CHARSET "ISO-8859-1"
87#undef DEFAULT_CHARSET_NAMES
88#define DEFAULT_CHARSET_NAMES GDB_DEFAULT_HOST_CHARSET ,
89
90#undef iconv_t
91#define iconv_t int
92#undef iconv_open
93#undef iconv
94#undef iconv_close
95
0dd7fb99
TT
96#undef ICONV_CONST
97#define ICONV_CONST const
98
76208fec 99/* Some systems don't have EILSEQ, so we define it here, but not as
e726d784
EZ
100 EINVAL, because callers of `iconv' want to distinguish EINVAL and
101 EILSEQ. This is what iconv.h from libiconv does as well. Note
102 that wchar.h may also define EILSEQ, so this needs to be after we
103 include wchar.h, which happens in defs.h through gdb_wchar.h. */
104#ifndef EILSEQ
105#define EILSEQ ENOENT
106#endif
107
6c7a06a3
TT
108iconv_t
109iconv_open (const char *to, const char *from)
110{
b8899f2b 111 /* We allow conversions from UTF-32BE, wchar_t, and the host charset.
6c7a06a3 112 We allow conversions to wchar_t and the host charset. */
b8899f2b 113 if (strcmp (from, "UTF-32BE") && strcmp (from, "wchar_t")
6c7a06a3
TT
114 && strcmp (from, GDB_DEFAULT_HOST_CHARSET))
115 return -1;
116 if (strcmp (to, "wchar_t") && strcmp (to, GDB_DEFAULT_HOST_CHARSET))
117 return -1;
234b45d4 118
b8899f2b 119 /* Return 1 if we are converting from UTF-32BE, 0 otherwise. This is
6c7a06a3 120 used as a flag in calls to iconv. */
b8899f2b 121 return !strcmp (from, "UTF-32BE");
6c7a06a3 122}
234b45d4 123
6c7a06a3
TT
124int
125iconv_close (iconv_t arg)
126{
127 return 0;
128}
234b45d4 129
6c7a06a3 130size_t
b8899f2b 131iconv (iconv_t utf_flag, const char **inbuf, size_t *inbytesleft,
6c7a06a3
TT
132 char **outbuf, size_t *outbytesleft)
133{
b8899f2b 134 if (utf_flag)
6c7a06a3
TT
135 {
136 while (*inbytesleft >= 4)
137 {
138 size_t j;
139 unsigned long c = 0;
140
141 for (j = 0; j < 4; ++j)
142 {
143 c <<= 8;
144 c += (*inbuf)[j] & 0xff;
145 }
146
147 if (c >= 256)
148 {
149 errno = EILSEQ;
150 return -1;
151 }
152 **outbuf = c & 0xff;
153 ++*outbuf;
154 --*outbytesleft;
155
156 ++*inbuf;
157 *inbytesleft -= 4;
158 }
159 if (*inbytesleft < 4)
160 {
161 errno = EINVAL;
162 return -1;
163 }
164 }
165 else
166 {
167 /* In all other cases we simply copy input bytes to the
168 output. */
169 size_t amt = *inbytesleft;
c5504eaf 170
6c7a06a3
TT
171 if (amt > *outbytesleft)
172 amt = *outbytesleft;
173 memcpy (*outbuf, *inbuf, amt);
174 *inbuf += amt;
175 *outbuf += amt;
176 *inbytesleft -= amt;
177 *outbytesleft -= amt;
178 }
234b45d4 179
6c7a06a3
TT
180 if (*inbytesleft)
181 {
182 errno = E2BIG;
183 return -1;
184 }
234b45d4 185
6c7a06a3
TT
186 /* The number of non-reversible conversions -- but they were all
187 reversible. */
188 return 0;
189}
234b45d4 190
6c7a06a3 191#endif
234b45d4
KB
192
193
194\f
195/* The global lists of character sets and translations. */
196
197
e33d66ec
EZ
198#ifndef GDB_DEFAULT_TARGET_CHARSET
199#define GDB_DEFAULT_TARGET_CHARSET "ISO-8859-1"
200#endif
201
6c7a06a3 202#ifndef GDB_DEFAULT_TARGET_WIDE_CHARSET
b8899f2b 203#define GDB_DEFAULT_TARGET_WIDE_CHARSET "UTF-32"
6c7a06a3
TT
204#endif
205
206static const char *auto_host_charset_name = GDB_DEFAULT_HOST_CHARSET;
207static const char *host_charset_name = "auto";
920d2a44
AC
208static void
209show_host_charset_name (struct ui_file *file, int from_tty,
210 struct cmd_list_element *c,
211 const char *value)
212{
6c7a06a3
TT
213 if (!strcmp (value, "auto"))
214 fprintf_filtered (file,
215 _("The host character set is \"auto; currently %s\".\n"),
216 auto_host_charset_name);
217 else
218 fprintf_filtered (file, _("The host character set is \"%s\".\n"), value);
920d2a44
AC
219}
220
f870a310 221static const char *target_charset_name = "auto";
920d2a44
AC
222static void
223show_target_charset_name (struct ui_file *file, int from_tty,
224 struct cmd_list_element *c, const char *value)
225{
f870a310
TT
226 if (!strcmp (value, "auto"))
227 fprintf_filtered (file,
228 _("The target character set is \"auto; "
229 "currently %s\".\n"),
230 gdbarch_auto_charset (get_current_arch ()));
231 else
232 fprintf_filtered (file, _("The target character set is \"%s\".\n"),
233 value);
920d2a44
AC
234}
235
f870a310 236static const char *target_wide_charset_name = "auto";
6c7a06a3
TT
237static void
238show_target_wide_charset_name (struct ui_file *file, int from_tty,
239 struct cmd_list_element *c, const char *value)
e33d66ec 240{
f870a310
TT
241 if (!strcmp (value, "auto"))
242 fprintf_filtered (file,
243 _("The target wide character set is \"auto; "
244 "currently %s\".\n"),
245 gdbarch_auto_wide_charset (get_current_arch ()));
246 else
247 fprintf_filtered (file, _("The target wide character set is \"%s\".\n"),
248 value);
6c7a06a3 249}
e33d66ec 250
6c7a06a3 251static const char *default_charset_names[] =
e33d66ec 252{
6c7a06a3 253 DEFAULT_CHARSET_NAMES
e33d66ec
EZ
254 0
255};
234b45d4 256
6c7a06a3 257static const char **charset_enum;
234b45d4 258
6c7a06a3
TT
259\f
260/* If the target wide character set has big- or little-endian
261 variants, these are the corresponding names. */
262static const char *target_wide_charset_be_name;
263static const char *target_wide_charset_le_name;
234b45d4 264
f870a310
TT
265/* The architecture for which the BE- and LE-names are valid. */
266static struct gdbarch *be_le_arch;
267
268/* A helper function which sets the target wide big- and little-endian
269 character set names, if possible. */
234b45d4 270
6c7a06a3 271static void
f870a310 272set_be_le_names (struct gdbarch *gdbarch)
234b45d4 273{
6c7a06a3 274 int i, len;
f870a310
TT
275 const char *target_wide;
276
277 if (be_le_arch == gdbarch)
278 return;
279 be_le_arch = gdbarch;
234b45d4 280
6c7a06a3
TT
281 target_wide_charset_le_name = NULL;
282 target_wide_charset_be_name = NULL;
234b45d4 283
f870a310
TT
284 target_wide = target_wide_charset_name;
285 if (!strcmp (target_wide, "auto"))
286 target_wide = gdbarch_auto_wide_charset (gdbarch);
287
288 len = strlen (target_wide);
6c7a06a3
TT
289 for (i = 0; charset_enum[i]; ++i)
290 {
f870a310 291 if (strncmp (target_wide, charset_enum[i], len))
6c7a06a3
TT
292 continue;
293 if ((charset_enum[i][len] == 'B'
294 || charset_enum[i][len] == 'L')
295 && charset_enum[i][len + 1] == 'E'
296 && charset_enum[i][len + 2] == '\0')
297 {
298 if (charset_enum[i][len] == 'B')
299 target_wide_charset_be_name = charset_enum[i];
300 else
301 target_wide_charset_le_name = charset_enum[i];
302 }
303 }
234b45d4
KB
304}
305
6c7a06a3
TT
306/* 'Set charset', 'set host-charset', 'set target-charset', 'set
307 target-wide-charset', 'set charset' sfunc's. */
234b45d4
KB
308
309static void
f870a310 310validate (struct gdbarch *gdbarch)
234b45d4 311{
6c7a06a3
TT
312 iconv_t desc;
313 const char *host_cset = host_charset ();
f870a310
TT
314 const char *target_cset = target_charset (gdbarch);
315 const char *target_wide_cset = target_wide_charset_name;
c5504eaf 316
f870a310
TT
317 if (!strcmp (target_wide_cset, "auto"))
318 target_wide_cset = gdbarch_auto_wide_charset (gdbarch);
234b45d4 319
f870a310 320 desc = iconv_open (target_wide_cset, host_cset);
6c7a06a3
TT
321 if (desc == (iconv_t) -1)
322 error ("Cannot convert between character sets `%s' and `%s'",
f870a310 323 target_wide_cset, host_cset);
6c7a06a3 324 iconv_close (desc);
234b45d4 325
f870a310 326 desc = iconv_open (target_cset, host_cset);
6c7a06a3
TT
327 if (desc == (iconv_t) -1)
328 error ("Cannot convert between character sets `%s' and `%s'",
f870a310 329 target_cset, host_cset);
6c7a06a3 330 iconv_close (desc);
234b45d4 331
f870a310
TT
332 /* Clear the cache. */
333 be_le_arch = NULL;
234b45d4
KB
334}
335
6c7a06a3
TT
336/* This is the sfunc for the 'set charset' command. */
337static void
338set_charset_sfunc (char *charset, int from_tty, struct cmd_list_element *c)
234b45d4 339{
6c7a06a3
TT
340 /* CAREFUL: set the target charset here as well. */
341 target_charset_name = host_charset_name;
f870a310 342 validate (get_current_arch ());
234b45d4
KB
343}
344
6c7a06a3
TT
345/* 'set host-charset' command sfunc. We need a wrapper here because
346 the function needs to have a specific signature. */
347static void
348set_host_charset_sfunc (char *charset, int from_tty,
349 struct cmd_list_element *c)
234b45d4 350{
f870a310 351 validate (get_current_arch ());
234b45d4
KB
352}
353
6c7a06a3
TT
354/* Wrapper for the 'set target-charset' command. */
355static void
356set_target_charset_sfunc (char *charset, int from_tty,
357 struct cmd_list_element *c)
234b45d4 358{
f870a310 359 validate (get_current_arch ());
234b45d4
KB
360}
361
6c7a06a3
TT
362/* Wrapper for the 'set target-wide-charset' command. */
363static void
364set_target_wide_charset_sfunc (char *charset, int from_tty,
365 struct cmd_list_element *c)
234b45d4 366{
f870a310 367 validate (get_current_arch ());
234b45d4
KB
368}
369
6c7a06a3
TT
370/* sfunc for the 'show charset' command. */
371static void
372show_charset (struct ui_file *file, int from_tty, struct cmd_list_element *c,
373 const char *name)
234b45d4 374{
6c7a06a3
TT
375 show_host_charset_name (file, from_tty, c, host_charset_name);
376 show_target_charset_name (file, from_tty, c, target_charset_name);
377 show_target_wide_charset_name (file, from_tty, c, target_wide_charset_name);
234b45d4
KB
378}
379
234b45d4 380\f
6c7a06a3 381/* Accessor functions. */
234b45d4 382
6c7a06a3
TT
383const char *
384host_charset (void)
234b45d4 385{
6c7a06a3
TT
386 if (!strcmp (host_charset_name, "auto"))
387 return auto_host_charset_name;
388 return host_charset_name;
234b45d4
KB
389}
390
6c7a06a3 391const char *
f870a310 392target_charset (struct gdbarch *gdbarch)
234b45d4 393{
f870a310
TT
394 if (!strcmp (target_charset_name, "auto"))
395 return gdbarch_auto_charset (gdbarch);
6c7a06a3 396 return target_charset_name;
234b45d4 397}
234b45d4 398
6c7a06a3 399const char *
f870a310 400target_wide_charset (struct gdbarch *gdbarch)
234b45d4 401{
f870a310
TT
402 enum bfd_endian byte_order = gdbarch_byte_order (gdbarch);
403
404 set_be_le_names (gdbarch);
e17a4113 405 if (byte_order == BFD_ENDIAN_BIG)
234b45d4 406 {
6c7a06a3
TT
407 if (target_wide_charset_be_name)
408 return target_wide_charset_be_name;
234b45d4 409 }
6c7a06a3 410 else
234b45d4 411 {
6c7a06a3
TT
412 if (target_wide_charset_le_name)
413 return target_wide_charset_le_name;
234b45d4
KB
414 }
415
f870a310
TT
416 if (!strcmp (target_wide_charset_name, "auto"))
417 return gdbarch_auto_wide_charset (gdbarch);
418
6c7a06a3 419 return target_wide_charset_name;
234b45d4
KB
420}
421
234b45d4 422\f
6c7a06a3
TT
423/* Host character set management. For the time being, we assume that
424 the host character set is some superset of ASCII. */
234b45d4 425
6c7a06a3
TT
426char
427host_letter_to_control_character (char c)
234b45d4 428{
6c7a06a3
TT
429 if (c == '?')
430 return 0177;
431 return c & 0237;
234b45d4
KB
432}
433
6c7a06a3
TT
434/* Convert a host character, C, to its hex value. C must already have
435 been validated using isxdigit. */
234b45d4 436
6c7a06a3
TT
437int
438host_hex_value (char c)
234b45d4 439{
6c7a06a3
TT
440 if (isdigit (c))
441 return c - '0';
442 if (c >= 'a' && c <= 'f')
443 return 10 + c - 'a';
444 gdb_assert (c >= 'A' && c <= 'F');
445 return 10 + c - 'A';
234b45d4
KB
446}
447
234b45d4 448\f
6c7a06a3 449/* Public character management functions. */
234b45d4 450
6c7a06a3 451/* A cleanup function which is run to close an iconv descriptor. */
234b45d4 452
6c7a06a3
TT
453static void
454cleanup_iconv (void *p)
234b45d4 455{
6c7a06a3
TT
456 iconv_t *descp = p;
457 iconv_close (*descp);
234b45d4
KB
458}
459
6c7a06a3
TT
460void
461convert_between_encodings (const char *from, const char *to,
462 const gdb_byte *bytes, unsigned int num_bytes,
463 int width, struct obstack *output,
464 enum transliterations translit)
465{
466 iconv_t desc;
467 struct cleanup *cleanups;
468 size_t inleft;
469 char *inp;
470 unsigned int space_request;
471
472 /* Often, the host and target charsets will be the same. */
473 if (!strcmp (from, to))
474 {
475 obstack_grow (output, bytes, num_bytes);
476 return;
477 }
234b45d4 478
6c7a06a3
TT
479 desc = iconv_open (to, from);
480 if (desc == (iconv_t) -1)
481 perror_with_name ("Converting character sets");
482 cleanups = make_cleanup (cleanup_iconv, &desc);
234b45d4 483
6c7a06a3
TT
484 inleft = num_bytes;
485 inp = (char *) bytes;
234b45d4 486
6c7a06a3 487 space_request = num_bytes;
234b45d4 488
6c7a06a3 489 while (inleft > 0)
234b45d4 490 {
6c7a06a3
TT
491 char *outp;
492 size_t outleft, r;
493 int old_size;
494
495 old_size = obstack_object_size (output);
496 obstack_blank (output, space_request);
497
498 outp = obstack_base (output) + old_size;
499 outleft = space_request;
500
0dd7fb99 501 r = iconv (desc, (ICONV_CONST char **) &inp, &inleft, &outp, &outleft);
6c7a06a3
TT
502
503 /* Now make sure that the object on the obstack only includes
504 bytes we have converted. */
505 obstack_blank (output, - (int) outleft);
506
507 if (r == (size_t) -1)
508 {
509 switch (errno)
510 {
511 case EILSEQ:
512 {
513 int i;
514
515 /* Invalid input sequence. */
516 if (translit == translit_none)
517 error (_("Could not convert character to `%s' character set"),
518 to);
519
520 /* We emit escape sequence for the bytes, skip them,
521 and try again. */
522 for (i = 0; i < width; ++i)
523 {
524 char octal[5];
525
526 sprintf (octal, "\\%.3o", *inp & 0xff);
527 obstack_grow_str (output, octal);
528
529 ++inp;
530 --inleft;
531 }
532 }
533 break;
534
535 case E2BIG:
536 /* We ran out of space in the output buffer. Make it
537 bigger next time around. */
538 space_request *= 2;
539 break;
540
541 case EINVAL:
542 /* Incomplete input sequence. FIXME: ought to report this
543 to the caller somehow. */
544 inleft = 0;
545 break;
546
547 default:
548 perror_with_name ("Internal error while converting character sets");
549 }
550 }
234b45d4 551 }
234b45d4 552
6c7a06a3 553 do_cleanups (cleanups);
234b45d4
KB
554}
555
e33d66ec 556\f
e33d66ec 557
6c7a06a3
TT
558/* An iterator that returns host wchar_t's from a target string. */
559struct wchar_iterator
e33d66ec 560{
6c7a06a3
TT
561 /* The underlying iconv descriptor. */
562 iconv_t desc;
e33d66ec 563
6c7a06a3
TT
564 /* The input string. This is updated as convert characters. */
565 char *input;
566 /* The number of bytes remaining in the input. */
567 size_t bytes;
e33d66ec 568
6c7a06a3
TT
569 /* The width of an input character. */
570 size_t width;
e33d66ec 571
6c7a06a3
TT
572 /* The output buffer and its size. */
573 gdb_wchar_t *out;
574 size_t out_size;
575};
234b45d4 576
6c7a06a3
TT
577/* Create a new iterator. */
578struct wchar_iterator *
579make_wchar_iterator (const gdb_byte *input, size_t bytes, const char *charset,
580 size_t width)
234b45d4 581{
6c7a06a3
TT
582 struct wchar_iterator *result;
583 iconv_t desc;
234b45d4 584
732f6a93 585 desc = iconv_open (INTERMEDIATE_ENCODING, charset);
6c7a06a3
TT
586 if (desc == (iconv_t) -1)
587 perror_with_name ("Converting character sets");
234b45d4 588
6c7a06a3
TT
589 result = XNEW (struct wchar_iterator);
590 result->desc = desc;
591 result->input = (char *) input;
592 result->bytes = bytes;
593 result->width = width;
234b45d4 594
6c7a06a3
TT
595 result->out = XNEW (gdb_wchar_t);
596 result->out_size = 1;
234b45d4 597
6c7a06a3 598 return result;
e33d66ec 599}
234b45d4 600
e33d66ec 601static void
6c7a06a3 602do_cleanup_iterator (void *p)
e33d66ec 603{
6c7a06a3 604 struct wchar_iterator *iter = p;
234b45d4 605
6c7a06a3
TT
606 iconv_close (iter->desc);
607 xfree (iter->out);
608 xfree (iter);
234b45d4
KB
609}
610
6c7a06a3
TT
611struct cleanup *
612make_cleanup_wchar_iterator (struct wchar_iterator *iter)
e33d66ec 613{
6c7a06a3 614 return make_cleanup (do_cleanup_iterator, iter);
e33d66ec 615}
234b45d4 616
6c7a06a3
TT
617int
618wchar_iterate (struct wchar_iterator *iter,
619 enum wchar_iterate_result *out_result,
620 gdb_wchar_t **out_chars,
621 const gdb_byte **ptr,
622 size_t *len)
623{
624 size_t out_request;
625
626 /* Try to convert some characters. At first we try to convert just
627 a single character. The reason for this is that iconv does not
628 necessarily update its outgoing arguments when it encounters an
629 invalid input sequence -- but we want to reliably report this to
630 our caller so it can emit an escape sequence. */
631 out_request = 1;
632 while (iter->bytes > 0)
e33d66ec 633 {
6c7a06a3
TT
634 char *outptr = (char *) &iter->out[0];
635 char *orig_inptr = iter->input;
636 size_t orig_in = iter->bytes;
637 size_t out_avail = out_request * sizeof (gdb_wchar_t);
638 size_t num;
0dd7fb99
TT
639 size_t r = iconv (iter->desc,
640 (ICONV_CONST char **) &iter->input, &iter->bytes,
6c7a06a3 641 &outptr, &out_avail);
c5504eaf 642
6c7a06a3
TT
643 if (r == (size_t) -1)
644 {
645 switch (errno)
646 {
647 case EILSEQ:
648 /* Invalid input sequence. Skip it, and let the caller
649 know about it. */
650 *out_result = wchar_iterate_invalid;
651 *ptr = iter->input;
652 *len = iter->width;
653 iter->input += iter->width;
654 iter->bytes -= iter->width;
655 return 0;
656
657 case E2BIG:
658 /* We ran out of space. We still might have converted a
659 character; if so, return it. Otherwise, grow the
660 buffer and try again. */
661 if (out_avail < out_request * sizeof (gdb_wchar_t))
662 break;
663
664 ++out_request;
665 if (out_request > iter->out_size)
666 {
667 iter->out_size = out_request;
668 iter->out = xrealloc (iter->out,
669 out_request * sizeof (gdb_wchar_t));
670 }
671 continue;
672
673 case EINVAL:
674 /* Incomplete input sequence. Let the caller know, and
675 arrange for future calls to see EOF. */
676 *out_result = wchar_iterate_incomplete;
677 *ptr = iter->input;
678 *len = iter->bytes;
679 iter->bytes = 0;
680 return 0;
681
682 default:
683 perror_with_name ("Internal error while converting character sets");
684 }
685 }
686
687 /* We converted something. */
688 num = out_request - out_avail / sizeof (gdb_wchar_t);
689 *out_result = wchar_iterate_ok;
690 *out_chars = iter->out;
691 *ptr = orig_inptr;
692 *len = orig_in - iter->bytes;
693 return num;
e33d66ec 694 }
6c7a06a3
TT
695
696 /* Really done. */
697 *out_result = wchar_iterate_eof;
698 return -1;
234b45d4
KB
699}
700
e33d66ec 701\f
6c7a06a3 702/* The charset.c module initialization function. */
234b45d4 703
6c7a06a3 704extern initialize_file_ftype _initialize_charset; /* -Wmissing-prototype */
234b45d4 705
6c7a06a3 706DEF_VEC_P (char_ptr);
234b45d4 707
6c7a06a3 708static VEC (char_ptr) *charsets;
234b45d4 709
6c7a06a3 710#ifdef PHONY_ICONV
234b45d4 711
6c7a06a3
TT
712static void
713find_charset_names (void)
234b45d4 714{
6c7a06a3
TT
715 VEC_safe_push (char_ptr, charsets, GDB_DEFAULT_HOST_CHARSET);
716 VEC_safe_push (char_ptr, charsets, NULL);
234b45d4
KB
717}
718
6c7a06a3 719#else /* PHONY_ICONV */
fc3b640d
TT
720
721/* Sometimes, libiconv redefines iconvlist as libiconvlist -- but
722 provides different symbols in the static and dynamic libraries.
723 So, configure may see libiconvlist but not iconvlist. But, calling
724 iconvlist is the right thing to do and will work. Hence we do a
725 check here but unconditionally call iconvlist below. */
726#if defined (HAVE_ICONVLIST) || defined (HAVE_LIBICONVLIST)
234b45d4 727
6c7a06a3
TT
728/* A helper function that adds some character sets to the vector of
729 all character sets. This is a callback function for iconvlist. */
730
731static int
732add_one (unsigned int count, const char *const *names, void *data)
234b45d4 733{
6c7a06a3 734 unsigned int i;
234b45d4 735
6c7a06a3
TT
736 for (i = 0; i < count; ++i)
737 VEC_safe_push (char_ptr, charsets, xstrdup (names[i]));
234b45d4 738
6c7a06a3 739 return 0;
234b45d4
KB
740}
741
6c7a06a3
TT
742static void
743find_charset_names (void)
234b45d4 744{
6c7a06a3
TT
745 iconvlist (add_one, NULL);
746 VEC_safe_push (char_ptr, charsets, NULL);
234b45d4
KB
747}
748
6c7a06a3 749#else
234b45d4 750
40b5c9fb
DE
751/* Return non-zero if LINE (output from iconv) should be ignored.
752 Older iconv programs (e.g. 2.2.2) include the human readable
753 introduction even when stdout is not a tty. Newer versions omit
754 the intro if stdout is not a tty. */
755
756static int
757ignore_line_p (const char *line)
758{
759 /* This table is used to filter the output. If this text appears
760 anywhere in the line, it is ignored (strstr is used). */
761 static const char * const ignore_lines[] =
762 {
763 "The following",
764 "not necessarily",
765 "the FROM and TO",
766 "listed with several",
767 NULL
768 };
769 int i;
770
771 for (i = 0; ignore_lines[i] != NULL; ++i)
772 {
773 if (strstr (line, ignore_lines[i]) != NULL)
774 return 1;
775 }
776
777 return 0;
778}
779
6c7a06a3
TT
780static void
781find_charset_names (void)
234b45d4 782{
732f6a93
TT
783 struct pex_obj *child;
784 char *args[3];
785 int err, status;
786 int fail = 1;
40b5c9fb
DE
787 struct gdb_environ *iconv_env;
788
789 /* Older iconvs, e.g. 2.2.2, don't omit the intro text if stdout is not
790 a tty. We need to recognize it and ignore it. This text is subject
791 to translation, so force LANGUAGE=C. */
792 iconv_env = make_environ ();
793 init_environ (iconv_env);
794 set_in_environ (iconv_env, "LANGUAGE", "C");
795 set_in_environ (iconv_env, "LC_ALL", "C");
732f6a93 796
40618926 797 child = pex_init (PEX_USE_PIPES, "iconv", NULL);
732f6a93
TT
798
799 args[0] = "iconv";
800 args[1] = "-l";
801 args[2] = NULL;
802 /* Note that we simply ignore errors here. */
40b5c9fb
DE
803 if (!pex_run_in_environment (child, PEX_SEARCH | PEX_STDERR_TO_STDOUT,
804 "iconv", args, environ_vector (iconv_env),
805 NULL, NULL, &err))
732f6a93
TT
806 {
807 FILE *in = pex_read_output (child, 0);
808
809 /* POSIX says that iconv -l uses an unspecified format. We
810 parse the glibc and libiconv formats; feel free to add others
811 as needed. */
40b5c9fb 812
732f6a93
TT
813 while (!feof (in))
814 {
815 /* The size of buf is chosen arbitrarily. */
816 char buf[1024];
817 char *start, *r;
8ea13695 818 int len;
732f6a93
TT
819
820 r = fgets (buf, sizeof (buf), in);
821 if (!r)
822 break;
823 len = strlen (r);
824 if (len <= 3)
825 continue;
40b5c9fb
DE
826 if (ignore_line_p (r))
827 continue;
828
732f6a93
TT
829 /* Strip off the newline. */
830 --len;
831 /* Strip off one or two '/'s. glibc will print lines like
832 "8859_7//", but also "10646-1:1993/UCS4/". */
833 if (buf[len - 1] == '/')
834 --len;
835 if (buf[len - 1] == '/')
836 --len;
837 buf[len] = '\0';
838
839 /* libiconv will print multiple entries per line, separated
40b5c9fb
DE
840 by spaces. Older iconvs will print multiple entries per line,
841 indented by two spaces, and separated by ", "
842 (i.e. the human readable form). */
732f6a93
TT
843 start = buf;
844 while (1)
845 {
846 int keep_going;
847 char *p;
848
40b5c9fb
DE
849 /* Skip leading blanks. */
850 for (p = start; *p && *p == ' '; ++p)
851 ;
852 start = p;
853 /* Find the next space, comma, or end-of-line. */
854 for ( ; *p && *p != ' ' && *p != ','; ++p)
732f6a93
TT
855 ;
856 /* Ignore an empty result. */
857 if (p == start)
858 break;
859 keep_going = *p;
860 *p = '\0';
861 VEC_safe_push (char_ptr, charsets, xstrdup (start));
862 if (!keep_going)
863 break;
864 /* Skip any extra spaces. */
865 for (start = p + 1; *start && *start == ' '; ++start)
866 ;
867 }
868 }
234b45d4 869
732f6a93
TT
870 if (pex_get_status (child, 1, &status)
871 && WIFEXITED (status) && !WEXITSTATUS (status))
872 fail = 0;
234b45d4 873
6c7a06a3 874 }
234b45d4 875
732f6a93 876 pex_free (child);
40b5c9fb 877 free_environ (iconv_env);
234b45d4 878
732f6a93
TT
879 if (fail)
880 {
881 /* Some error occurred, so drop the vector. */
882 int ix;
883 char *elt;
884 for (ix = 0; VEC_iterate (char_ptr, charsets, ix, elt); ++ix)
885 xfree (elt);
886 VEC_truncate (char_ptr, charsets, 0);
887 }
888 else
889 VEC_safe_push (char_ptr, charsets, NULL);
6c7a06a3 890}
234b45d4 891
fc3b640d 892#endif /* HAVE_ICONVLIST || HAVE_LIBICONVLIST */
6c7a06a3 893#endif /* PHONY_ICONV */
234b45d4 894
f870a310
TT
895/* The "auto" target charset used by default_auto_charset. */
896static const char *auto_target_charset_name = GDB_DEFAULT_TARGET_CHARSET;
897
898const char *
899default_auto_charset (void)
900{
901 return auto_target_charset_name;
902}
903
904const char *
905default_auto_wide_charset (void)
906{
907 return GDB_DEFAULT_TARGET_WIDE_CHARSET;
908}
909
234b45d4
KB
910void
911_initialize_charset (void)
912{
f870a310 913 /* The first element is always "auto". */
732f6a93 914 VEC_safe_push (char_ptr, charsets, xstrdup ("auto"));
6c7a06a3
TT
915 find_charset_names ();
916
917 if (VEC_length (char_ptr, charsets) > 1)
918 charset_enum = (const char **) VEC_address (char_ptr, charsets);
919 else
920 charset_enum = default_charset_names;
921
922#ifndef PHONY_ICONV
923#ifdef HAVE_LANGINFO_CODESET
f870a310
TT
924 /* The result of nl_langinfo may be overwritten later. This may
925 leak a little memory, if the user later changes the host charset,
926 but that doesn't matter much. */
927 auto_host_charset_name = xstrdup (nl_langinfo (CODESET));
58720494 928 /* Solaris will return `646' here -- but the Solaris iconv then
06be6983
TG
929 does not accept this. Darwin (and maybe FreeBSD) may return "" here,
930 which GNU libiconv doesn't like (infinite loop). */
931 if (!strcmp (auto_host_charset_name, "646") || !*auto_host_charset_name)
58720494 932 auto_host_charset_name = "ASCII";
f870a310
TT
933 auto_target_charset_name = auto_host_charset_name;
934#elif defined (USE_WIN32API)
935 {
43484f03 936 static char w32_host_default_charset[16]; /* "CP" + x<=5 digits + paranoia. */
f870a310
TT
937
938 snprintf (w32_host_default_charset, sizeof w32_host_default_charset,
939 "CP%d", GetACP());
940 auto_host_charset_name = w32_host_default_charset;
941 auto_target_charset_name = auto_host_charset_name;
942 }
6c7a06a3
TT
943#endif
944#endif
e33d66ec 945
7ab04401 946 add_setshow_enum_cmd ("charset", class_support,
f870a310 947 charset_enum, &host_charset_name, _("\
7ab04401
AC
948Set the host and target character sets."), _("\
949Show the host and target character sets."), _("\
3d263c1d
BI
950The `host character set' is the one used by the system GDB is running on.\n\
951The `target character set' is the one used by the program being debugged.\n\
952You may only use supersets of ASCII for your host character set; GDB does\n\
953not support any others.\n\
954To see a list of the character sets GDB supports, type `set charset <TAB>'."),
7ab04401
AC
955 /* Note that the sfunc below needs to set
956 target_charset_name, because the 'set
957 charset' command sets two variables. */
958 set_charset_sfunc,
959 show_charset,
960 &setlist, &showlist);
961
962 add_setshow_enum_cmd ("host-charset", class_support,
6c7a06a3 963 charset_enum, &host_charset_name, _("\
7ab04401
AC
964Set the host character set."), _("\
965Show the host character set."), _("\
3d263c1d
BI
966The `host character set' is the one used by the system GDB is running on.\n\
967You may only use supersets of ASCII for your host character set; GDB does\n\
968not support any others.\n\
969To see a list of the character sets GDB supports, type `set host-charset <TAB>'."),
7ab04401 970 set_host_charset_sfunc,
920d2a44 971 show_host_charset_name,
7ab04401
AC
972 &setlist, &showlist);
973
974 add_setshow_enum_cmd ("target-charset", class_support,
f870a310 975 charset_enum, &target_charset_name, _("\
7ab04401
AC
976Set the target character set."), _("\
977Show the target character set."), _("\
3d263c1d
BI
978The `target character set' is the one used by the program being debugged.\n\
979GDB translates characters and strings between the host and target\n\
980character sets as needed.\n\
981To see a list of the character sets GDB supports, type `set target-charset'<TAB>"),
7ab04401 982 set_target_charset_sfunc,
920d2a44 983 show_target_charset_name,
7ab04401 984 &setlist, &showlist);
6c7a06a3
TT
985
986 add_setshow_enum_cmd ("target-wide-charset", class_support,
f870a310 987 charset_enum, &target_wide_charset_name,
6c7a06a3
TT
988 _("\
989Set the target wide character set."), _("\
990Show the target wide character set."), _("\
991The `target wide character set' is the one used by the program being debugged.\n\
992In particular it is the encoding used by `wchar_t'.\n\
993GDB translates characters and strings between the host and target\n\
994character sets as needed.\n\
995To see a list of the character sets GDB supports, type\n\
996`set target-wide-charset'<TAB>"),
997 set_target_wide_charset_sfunc,
998 show_target_wide_charset_name,
999 &setlist, &showlist);
234b45d4 1000}
This page took 0.667091 seconds and 4 git commands to generate.