gdb/testsuite/
[deliverable/binutils-gdb.git] / gdb / charset.c
1 /* Character set conversion support for GDB.
2
3 Copyright (C) 2001, 2003, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
5
6 This file is part of GDB.
7
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <http://www.gnu.org/licenses/>. */
20
21 #include "defs.h"
22 #include "charset.h"
23 #include "gdbcmd.h"
24 #include "gdb_assert.h"
25 #include "gdb_obstack.h"
26 #include "gdb_wait.h"
27 #include "charset-list.h"
28 #include "vec.h"
29 #include "environ.h"
30 #include "arch-utils.h"
31
32 #include <stddef.h>
33 #include "gdb_string.h"
34 #include <ctype.h>
35
36 #ifdef USE_WIN32API
37 #include <windows.h>
38 #endif
39 \f
40 /* How GDB's character set support works
41
42 GDB has three global settings:
43
44 - The `current host character set' is the character set GDB should
45 use in talking to the user, and which (hopefully) the user's
46 terminal knows how to display properly. Most users should not
47 change this.
48
49 - The `current target character set' is the character set the
50 program being debugged uses.
51
52 - The `current target wide character set' is the wide character set
53 the program being debugged uses, that is, the encoding used for
54 wchar_t.
55
56 There are commands to set each of these, and mechanisms for
57 choosing reasonable default values. GDB has a global list of
58 character sets that it can use as its host or target character
59 sets.
60
61 The header file `charset.h' declares various functions that
62 different pieces of GDB need to perform tasks like:
63
64 - printing target strings and characters to the user's terminal
65 (mostly target->host conversions),
66
67 - building target-appropriate representations of strings and
68 characters the user enters in expressions (mostly host->target
69 conversions),
70
71 and so on.
72
73 To avoid excessive code duplication and maintenance efforts,
74 GDB simply requires a capable iconv function. Users on platforms
75 without a suitable iconv can use the GNU iconv library. */
76
77 \f
78 #ifdef PHONY_ICONV
79
80 /* Provide a phony iconv that does as little as possible. Also,
81 arrange for there to be a single available character set. */
82
83 #undef GDB_DEFAULT_HOST_CHARSET
84 #define GDB_DEFAULT_HOST_CHARSET "ISO-8859-1"
85 #define GDB_DEFAULT_TARGET_CHARSET "ISO-8859-1"
86 #define GDB_DEFAULT_TARGET_WIDE_CHARSET "ISO-8859-1"
87 #undef DEFAULT_CHARSET_NAMES
88 #define DEFAULT_CHARSET_NAMES GDB_DEFAULT_HOST_CHARSET ,
89
90 #undef iconv_t
91 #define iconv_t int
92 #undef iconv_open
93 #undef iconv
94 #undef iconv_close
95
96 #undef ICONV_CONST
97 #define ICONV_CONST const
98
99 /* Some systems don't have EILSEQ, so we define it here, but not as
100 EINVAL, because callers of `iconv' want to distinguish EINVAL and
101 EILSEQ. This is what iconv.h from libiconv does as well. Note
102 that wchar.h may also define EILSEQ, so this needs to be after we
103 include wchar.h, which happens in defs.h through gdb_wchar.h. */
104 #ifndef EILSEQ
105 #define EILSEQ ENOENT
106 #endif
107
108 iconv_t
109 iconv_open (const char *to, const char *from)
110 {
111 /* We allow conversions from UTF-32BE, wchar_t, and the host charset.
112 We allow conversions to wchar_t and the host charset. */
113 if (strcmp (from, "UTF-32BE") && strcmp (from, "wchar_t")
114 && strcmp (from, GDB_DEFAULT_HOST_CHARSET))
115 return -1;
116 if (strcmp (to, "wchar_t") && strcmp (to, GDB_DEFAULT_HOST_CHARSET))
117 return -1;
118
119 /* Return 1 if we are converting from UTF-32BE, 0 otherwise. This is
120 used as a flag in calls to iconv. */
121 return !strcmp (from, "UTF-32BE");
122 }
123
124 int
125 iconv_close (iconv_t arg)
126 {
127 return 0;
128 }
129
130 size_t
131 iconv (iconv_t utf_flag, const char **inbuf, size_t *inbytesleft,
132 char **outbuf, size_t *outbytesleft)
133 {
134 if (utf_flag)
135 {
136 while (*inbytesleft >= 4)
137 {
138 size_t j;
139 unsigned long c = 0;
140
141 for (j = 0; j < 4; ++j)
142 {
143 c <<= 8;
144 c += (*inbuf)[j] & 0xff;
145 }
146
147 if (c >= 256)
148 {
149 errno = EILSEQ;
150 return -1;
151 }
152 **outbuf = c & 0xff;
153 ++*outbuf;
154 --*outbytesleft;
155
156 ++*inbuf;
157 *inbytesleft -= 4;
158 }
159 if (*inbytesleft < 4)
160 {
161 errno = EINVAL;
162 return -1;
163 }
164 }
165 else
166 {
167 /* In all other cases we simply copy input bytes to the
168 output. */
169 size_t amt = *inbytesleft;
170
171 if (amt > *outbytesleft)
172 amt = *outbytesleft;
173 memcpy (*outbuf, *inbuf, amt);
174 *inbuf += amt;
175 *outbuf += amt;
176 *inbytesleft -= amt;
177 *outbytesleft -= amt;
178 }
179
180 if (*inbytesleft)
181 {
182 errno = E2BIG;
183 return -1;
184 }
185
186 /* The number of non-reversible conversions -- but they were all
187 reversible. */
188 return 0;
189 }
190
191 #endif
192
193
194 \f
195 /* The global lists of character sets and translations. */
196
197
198 #ifndef GDB_DEFAULT_TARGET_CHARSET
199 #define GDB_DEFAULT_TARGET_CHARSET "ISO-8859-1"
200 #endif
201
202 #ifndef GDB_DEFAULT_TARGET_WIDE_CHARSET
203 #define GDB_DEFAULT_TARGET_WIDE_CHARSET "UTF-32"
204 #endif
205
206 static const char *auto_host_charset_name = GDB_DEFAULT_HOST_CHARSET;
207 static const char *host_charset_name = "auto";
208 static void
209 show_host_charset_name (struct ui_file *file, int from_tty,
210 struct cmd_list_element *c,
211 const char *value)
212 {
213 if (!strcmp (value, "auto"))
214 fprintf_filtered (file,
215 _("The host character set is \"auto; currently %s\".\n"),
216 auto_host_charset_name);
217 else
218 fprintf_filtered (file, _("The host character set is \"%s\".\n"), value);
219 }
220
221 static const char *target_charset_name = "auto";
222 static void
223 show_target_charset_name (struct ui_file *file, int from_tty,
224 struct cmd_list_element *c, const char *value)
225 {
226 if (!strcmp (value, "auto"))
227 fprintf_filtered (file,
228 _("The target character set is \"auto; "
229 "currently %s\".\n"),
230 gdbarch_auto_charset (get_current_arch ()));
231 else
232 fprintf_filtered (file, _("The target character set is \"%s\".\n"),
233 value);
234 }
235
236 static const char *target_wide_charset_name = "auto";
237 static void
238 show_target_wide_charset_name (struct ui_file *file, int from_tty,
239 struct cmd_list_element *c, const char *value)
240 {
241 if (!strcmp (value, "auto"))
242 fprintf_filtered (file,
243 _("The target wide character set is \"auto; "
244 "currently %s\".\n"),
245 gdbarch_auto_wide_charset (get_current_arch ()));
246 else
247 fprintf_filtered (file, _("The target wide character set is \"%s\".\n"),
248 value);
249 }
250
251 static const char *default_charset_names[] =
252 {
253 DEFAULT_CHARSET_NAMES
254 0
255 };
256
257 static const char **charset_enum;
258
259 \f
260 /* If the target wide character set has big- or little-endian
261 variants, these are the corresponding names. */
262 static const char *target_wide_charset_be_name;
263 static const char *target_wide_charset_le_name;
264
265 /* The architecture for which the BE- and LE-names are valid. */
266 static struct gdbarch *be_le_arch;
267
268 /* A helper function which sets the target wide big- and little-endian
269 character set names, if possible. */
270
271 static void
272 set_be_le_names (struct gdbarch *gdbarch)
273 {
274 int i, len;
275 const char *target_wide;
276
277 if (be_le_arch == gdbarch)
278 return;
279 be_le_arch = gdbarch;
280
281 target_wide_charset_le_name = NULL;
282 target_wide_charset_be_name = NULL;
283
284 target_wide = target_wide_charset_name;
285 if (!strcmp (target_wide, "auto"))
286 target_wide = gdbarch_auto_wide_charset (gdbarch);
287
288 len = strlen (target_wide);
289 for (i = 0; charset_enum[i]; ++i)
290 {
291 if (strncmp (target_wide, charset_enum[i], len))
292 continue;
293 if ((charset_enum[i][len] == 'B'
294 || charset_enum[i][len] == 'L')
295 && charset_enum[i][len + 1] == 'E'
296 && charset_enum[i][len + 2] == '\0')
297 {
298 if (charset_enum[i][len] == 'B')
299 target_wide_charset_be_name = charset_enum[i];
300 else
301 target_wide_charset_le_name = charset_enum[i];
302 }
303 }
304 }
305
306 /* 'Set charset', 'set host-charset', 'set target-charset', 'set
307 target-wide-charset', 'set charset' sfunc's. */
308
309 static void
310 validate (struct gdbarch *gdbarch)
311 {
312 iconv_t desc;
313 const char *host_cset = host_charset ();
314 const char *target_cset = target_charset (gdbarch);
315 const char *target_wide_cset = target_wide_charset_name;
316
317 if (!strcmp (target_wide_cset, "auto"))
318 target_wide_cset = gdbarch_auto_wide_charset (gdbarch);
319
320 desc = iconv_open (target_wide_cset, host_cset);
321 if (desc == (iconv_t) -1)
322 error ("Cannot convert between character sets `%s' and `%s'",
323 target_wide_cset, host_cset);
324 iconv_close (desc);
325
326 desc = iconv_open (target_cset, host_cset);
327 if (desc == (iconv_t) -1)
328 error ("Cannot convert between character sets `%s' and `%s'",
329 target_cset, host_cset);
330 iconv_close (desc);
331
332 /* Clear the cache. */
333 be_le_arch = NULL;
334 }
335
336 /* This is the sfunc for the 'set charset' command. */
337 static void
338 set_charset_sfunc (char *charset, int from_tty, struct cmd_list_element *c)
339 {
340 /* CAREFUL: set the target charset here as well. */
341 target_charset_name = host_charset_name;
342 validate (get_current_arch ());
343 }
344
345 /* 'set host-charset' command sfunc. We need a wrapper here because
346 the function needs to have a specific signature. */
347 static void
348 set_host_charset_sfunc (char *charset, int from_tty,
349 struct cmd_list_element *c)
350 {
351 validate (get_current_arch ());
352 }
353
354 /* Wrapper for the 'set target-charset' command. */
355 static void
356 set_target_charset_sfunc (char *charset, int from_tty,
357 struct cmd_list_element *c)
358 {
359 validate (get_current_arch ());
360 }
361
362 /* Wrapper for the 'set target-wide-charset' command. */
363 static void
364 set_target_wide_charset_sfunc (char *charset, int from_tty,
365 struct cmd_list_element *c)
366 {
367 validate (get_current_arch ());
368 }
369
370 /* sfunc for the 'show charset' command. */
371 static void
372 show_charset (struct ui_file *file, int from_tty, struct cmd_list_element *c,
373 const char *name)
374 {
375 show_host_charset_name (file, from_tty, c, host_charset_name);
376 show_target_charset_name (file, from_tty, c, target_charset_name);
377 show_target_wide_charset_name (file, from_tty, c, target_wide_charset_name);
378 }
379
380 \f
381 /* Accessor functions. */
382
383 const char *
384 host_charset (void)
385 {
386 if (!strcmp (host_charset_name, "auto"))
387 return auto_host_charset_name;
388 return host_charset_name;
389 }
390
391 const char *
392 target_charset (struct gdbarch *gdbarch)
393 {
394 if (!strcmp (target_charset_name, "auto"))
395 return gdbarch_auto_charset (gdbarch);
396 return target_charset_name;
397 }
398
399 const char *
400 target_wide_charset (struct gdbarch *gdbarch)
401 {
402 enum bfd_endian byte_order = gdbarch_byte_order (gdbarch);
403
404 set_be_le_names (gdbarch);
405 if (byte_order == BFD_ENDIAN_BIG)
406 {
407 if (target_wide_charset_be_name)
408 return target_wide_charset_be_name;
409 }
410 else
411 {
412 if (target_wide_charset_le_name)
413 return target_wide_charset_le_name;
414 }
415
416 if (!strcmp (target_wide_charset_name, "auto"))
417 return gdbarch_auto_wide_charset (gdbarch);
418
419 return target_wide_charset_name;
420 }
421
422 \f
423 /* Host character set management. For the time being, we assume that
424 the host character set is some superset of ASCII. */
425
426 char
427 host_letter_to_control_character (char c)
428 {
429 if (c == '?')
430 return 0177;
431 return c & 0237;
432 }
433
434 /* Convert a host character, C, to its hex value. C must already have
435 been validated using isxdigit. */
436
437 int
438 host_hex_value (char c)
439 {
440 if (isdigit (c))
441 return c - '0';
442 if (c >= 'a' && c <= 'f')
443 return 10 + c - 'a';
444 gdb_assert (c >= 'A' && c <= 'F');
445 return 10 + c - 'A';
446 }
447
448 \f
449 /* Public character management functions. */
450
451 /* A cleanup function which is run to close an iconv descriptor. */
452
453 static void
454 cleanup_iconv (void *p)
455 {
456 iconv_t *descp = p;
457 iconv_close (*descp);
458 }
459
460 void
461 convert_between_encodings (const char *from, const char *to,
462 const gdb_byte *bytes, unsigned int num_bytes,
463 int width, struct obstack *output,
464 enum transliterations translit)
465 {
466 iconv_t desc;
467 struct cleanup *cleanups;
468 size_t inleft;
469 char *inp;
470 unsigned int space_request;
471
472 /* Often, the host and target charsets will be the same. */
473 if (!strcmp (from, to))
474 {
475 obstack_grow (output, bytes, num_bytes);
476 return;
477 }
478
479 desc = iconv_open (to, from);
480 if (desc == (iconv_t) -1)
481 perror_with_name ("Converting character sets");
482 cleanups = make_cleanup (cleanup_iconv, &desc);
483
484 inleft = num_bytes;
485 inp = (char *) bytes;
486
487 space_request = num_bytes;
488
489 while (inleft > 0)
490 {
491 char *outp;
492 size_t outleft, r;
493 int old_size;
494
495 old_size = obstack_object_size (output);
496 obstack_blank (output, space_request);
497
498 outp = obstack_base (output) + old_size;
499 outleft = space_request;
500
501 r = iconv (desc, (ICONV_CONST char **) &inp, &inleft, &outp, &outleft);
502
503 /* Now make sure that the object on the obstack only includes
504 bytes we have converted. */
505 obstack_blank (output, - (int) outleft);
506
507 if (r == (size_t) -1)
508 {
509 switch (errno)
510 {
511 case EILSEQ:
512 {
513 int i;
514
515 /* Invalid input sequence. */
516 if (translit == translit_none)
517 error (_("Could not convert character to `%s' character set"),
518 to);
519
520 /* We emit escape sequence for the bytes, skip them,
521 and try again. */
522 for (i = 0; i < width; ++i)
523 {
524 char octal[5];
525
526 sprintf (octal, "\\%.3o", *inp & 0xff);
527 obstack_grow_str (output, octal);
528
529 ++inp;
530 --inleft;
531 }
532 }
533 break;
534
535 case E2BIG:
536 /* We ran out of space in the output buffer. Make it
537 bigger next time around. */
538 space_request *= 2;
539 break;
540
541 case EINVAL:
542 /* Incomplete input sequence. FIXME: ought to report this
543 to the caller somehow. */
544 inleft = 0;
545 break;
546
547 default:
548 perror_with_name ("Internal error while converting character sets");
549 }
550 }
551 }
552
553 do_cleanups (cleanups);
554 }
555
556 \f
557
558 /* An iterator that returns host wchar_t's from a target string. */
559 struct wchar_iterator
560 {
561 /* The underlying iconv descriptor. */
562 iconv_t desc;
563
564 /* The input string. This is updated as convert characters. */
565 char *input;
566 /* The number of bytes remaining in the input. */
567 size_t bytes;
568
569 /* The width of an input character. */
570 size_t width;
571
572 /* The output buffer and its size. */
573 gdb_wchar_t *out;
574 size_t out_size;
575 };
576
577 /* Create a new iterator. */
578 struct wchar_iterator *
579 make_wchar_iterator (const gdb_byte *input, size_t bytes, const char *charset,
580 size_t width)
581 {
582 struct wchar_iterator *result;
583 iconv_t desc;
584
585 desc = iconv_open (INTERMEDIATE_ENCODING, charset);
586 if (desc == (iconv_t) -1)
587 perror_with_name ("Converting character sets");
588
589 result = XNEW (struct wchar_iterator);
590 result->desc = desc;
591 result->input = (char *) input;
592 result->bytes = bytes;
593 result->width = width;
594
595 result->out = XNEW (gdb_wchar_t);
596 result->out_size = 1;
597
598 return result;
599 }
600
601 static void
602 do_cleanup_iterator (void *p)
603 {
604 struct wchar_iterator *iter = p;
605
606 iconv_close (iter->desc);
607 xfree (iter->out);
608 xfree (iter);
609 }
610
611 struct cleanup *
612 make_cleanup_wchar_iterator (struct wchar_iterator *iter)
613 {
614 return make_cleanup (do_cleanup_iterator, iter);
615 }
616
617 int
618 wchar_iterate (struct wchar_iterator *iter,
619 enum wchar_iterate_result *out_result,
620 gdb_wchar_t **out_chars,
621 const gdb_byte **ptr,
622 size_t *len)
623 {
624 size_t out_request;
625
626 /* Try to convert some characters. At first we try to convert just
627 a single character. The reason for this is that iconv does not
628 necessarily update its outgoing arguments when it encounters an
629 invalid input sequence -- but we want to reliably report this to
630 our caller so it can emit an escape sequence. */
631 out_request = 1;
632 while (iter->bytes > 0)
633 {
634 char *outptr = (char *) &iter->out[0];
635 char *orig_inptr = iter->input;
636 size_t orig_in = iter->bytes;
637 size_t out_avail = out_request * sizeof (gdb_wchar_t);
638 size_t num;
639 size_t r = iconv (iter->desc,
640 (ICONV_CONST char **) &iter->input, &iter->bytes,
641 &outptr, &out_avail);
642
643 if (r == (size_t) -1)
644 {
645 switch (errno)
646 {
647 case EILSEQ:
648 /* Invalid input sequence. Skip it, and let the caller
649 know about it. */
650 *out_result = wchar_iterate_invalid;
651 *ptr = iter->input;
652 *len = iter->width;
653 iter->input += iter->width;
654 iter->bytes -= iter->width;
655 return 0;
656
657 case E2BIG:
658 /* We ran out of space. We still might have converted a
659 character; if so, return it. Otherwise, grow the
660 buffer and try again. */
661 if (out_avail < out_request * sizeof (gdb_wchar_t))
662 break;
663
664 ++out_request;
665 if (out_request > iter->out_size)
666 {
667 iter->out_size = out_request;
668 iter->out = xrealloc (iter->out,
669 out_request * sizeof (gdb_wchar_t));
670 }
671 continue;
672
673 case EINVAL:
674 /* Incomplete input sequence. Let the caller know, and
675 arrange for future calls to see EOF. */
676 *out_result = wchar_iterate_incomplete;
677 *ptr = iter->input;
678 *len = iter->bytes;
679 iter->bytes = 0;
680 return 0;
681
682 default:
683 perror_with_name ("Internal error while converting character sets");
684 }
685 }
686
687 /* We converted something. */
688 num = out_request - out_avail / sizeof (gdb_wchar_t);
689 *out_result = wchar_iterate_ok;
690 *out_chars = iter->out;
691 *ptr = orig_inptr;
692 *len = orig_in - iter->bytes;
693 return num;
694 }
695
696 /* Really done. */
697 *out_result = wchar_iterate_eof;
698 return -1;
699 }
700
701 \f
702 /* The charset.c module initialization function. */
703
704 extern initialize_file_ftype _initialize_charset; /* -Wmissing-prototype */
705
706 DEF_VEC_P (char_ptr);
707
708 static VEC (char_ptr) *charsets;
709
710 #ifdef PHONY_ICONV
711
712 static void
713 find_charset_names (void)
714 {
715 VEC_safe_push (char_ptr, charsets, GDB_DEFAULT_HOST_CHARSET);
716 VEC_safe_push (char_ptr, charsets, NULL);
717 }
718
719 #else /* PHONY_ICONV */
720
721 /* Sometimes, libiconv redefines iconvlist as libiconvlist -- but
722 provides different symbols in the static and dynamic libraries.
723 So, configure may see libiconvlist but not iconvlist. But, calling
724 iconvlist is the right thing to do and will work. Hence we do a
725 check here but unconditionally call iconvlist below. */
726 #if defined (HAVE_ICONVLIST) || defined (HAVE_LIBICONVLIST)
727
728 /* A helper function that adds some character sets to the vector of
729 all character sets. This is a callback function for iconvlist. */
730
731 static int
732 add_one (unsigned int count, const char *const *names, void *data)
733 {
734 unsigned int i;
735
736 for (i = 0; i < count; ++i)
737 VEC_safe_push (char_ptr, charsets, xstrdup (names[i]));
738
739 return 0;
740 }
741
742 static void
743 find_charset_names (void)
744 {
745 iconvlist (add_one, NULL);
746 VEC_safe_push (char_ptr, charsets, NULL);
747 }
748
749 #else
750
751 /* Return non-zero if LINE (output from iconv) should be ignored.
752 Older iconv programs (e.g. 2.2.2) include the human readable
753 introduction even when stdout is not a tty. Newer versions omit
754 the intro if stdout is not a tty. */
755
756 static int
757 ignore_line_p (const char *line)
758 {
759 /* This table is used to filter the output. If this text appears
760 anywhere in the line, it is ignored (strstr is used). */
761 static const char * const ignore_lines[] =
762 {
763 "The following",
764 "not necessarily",
765 "the FROM and TO",
766 "listed with several",
767 NULL
768 };
769 int i;
770
771 for (i = 0; ignore_lines[i] != NULL; ++i)
772 {
773 if (strstr (line, ignore_lines[i]) != NULL)
774 return 1;
775 }
776
777 return 0;
778 }
779
780 static void
781 find_charset_names (void)
782 {
783 struct pex_obj *child;
784 char *args[3];
785 int err, status;
786 int fail = 1;
787 struct gdb_environ *iconv_env;
788
789 /* Older iconvs, e.g. 2.2.2, don't omit the intro text if stdout is not
790 a tty. We need to recognize it and ignore it. This text is subject
791 to translation, so force LANGUAGE=C. */
792 iconv_env = make_environ ();
793 init_environ (iconv_env);
794 set_in_environ (iconv_env, "LANGUAGE", "C");
795 set_in_environ (iconv_env, "LC_ALL", "C");
796
797 child = pex_init (0, "iconv", NULL);
798
799 args[0] = "iconv";
800 args[1] = "-l";
801 args[2] = NULL;
802 /* Note that we simply ignore errors here. */
803 if (!pex_run_in_environment (child, PEX_SEARCH | PEX_STDERR_TO_STDOUT,
804 "iconv", args, environ_vector (iconv_env),
805 NULL, NULL, &err))
806 {
807 FILE *in = pex_read_output (child, 0);
808
809 /* POSIX says that iconv -l uses an unspecified format. We
810 parse the glibc and libiconv formats; feel free to add others
811 as needed. */
812
813 while (!feof (in))
814 {
815 /* The size of buf is chosen arbitrarily. */
816 char buf[1024];
817 char *start, *r;
818 int len;
819
820 r = fgets (buf, sizeof (buf), in);
821 if (!r)
822 break;
823 len = strlen (r);
824 if (len <= 3)
825 continue;
826 if (ignore_line_p (r))
827 continue;
828
829 /* Strip off the newline. */
830 --len;
831 /* Strip off one or two '/'s. glibc will print lines like
832 "8859_7//", but also "10646-1:1993/UCS4/". */
833 if (buf[len - 1] == '/')
834 --len;
835 if (buf[len - 1] == '/')
836 --len;
837 buf[len] = '\0';
838
839 /* libiconv will print multiple entries per line, separated
840 by spaces. Older iconvs will print multiple entries per line,
841 indented by two spaces, and separated by ", "
842 (i.e. the human readable form). */
843 start = buf;
844 while (1)
845 {
846 int keep_going;
847 char *p;
848
849 /* Skip leading blanks. */
850 for (p = start; *p && *p == ' '; ++p)
851 ;
852 start = p;
853 /* Find the next space, comma, or end-of-line. */
854 for ( ; *p && *p != ' ' && *p != ','; ++p)
855 ;
856 /* Ignore an empty result. */
857 if (p == start)
858 break;
859 keep_going = *p;
860 *p = '\0';
861 VEC_safe_push (char_ptr, charsets, xstrdup (start));
862 if (!keep_going)
863 break;
864 /* Skip any extra spaces. */
865 for (start = p + 1; *start && *start == ' '; ++start)
866 ;
867 }
868 }
869
870 if (pex_get_status (child, 1, &status)
871 && WIFEXITED (status) && !WEXITSTATUS (status))
872 fail = 0;
873
874 }
875
876 pex_free (child);
877 free_environ (iconv_env);
878
879 if (fail)
880 {
881 /* Some error occurred, so drop the vector. */
882 int ix;
883 char *elt;
884 for (ix = 0; VEC_iterate (char_ptr, charsets, ix, elt); ++ix)
885 xfree (elt);
886 VEC_truncate (char_ptr, charsets, 0);
887 }
888 else
889 VEC_safe_push (char_ptr, charsets, NULL);
890 }
891
892 #endif /* HAVE_ICONVLIST || HAVE_LIBICONVLIST */
893 #endif /* PHONY_ICONV */
894
895 /* The "auto" target charset used by default_auto_charset. */
896 static const char *auto_target_charset_name = GDB_DEFAULT_TARGET_CHARSET;
897
898 const char *
899 default_auto_charset (void)
900 {
901 return auto_target_charset_name;
902 }
903
904 const char *
905 default_auto_wide_charset (void)
906 {
907 return GDB_DEFAULT_TARGET_WIDE_CHARSET;
908 }
909
910 void
911 _initialize_charset (void)
912 {
913 /* The first element is always "auto". */
914 VEC_safe_push (char_ptr, charsets, xstrdup ("auto"));
915 find_charset_names ();
916
917 if (VEC_length (char_ptr, charsets) > 1)
918 charset_enum = (const char **) VEC_address (char_ptr, charsets);
919 else
920 charset_enum = default_charset_names;
921
922 #ifndef PHONY_ICONV
923 #ifdef HAVE_LANGINFO_CODESET
924 /* The result of nl_langinfo may be overwritten later. This may
925 leak a little memory, if the user later changes the host charset,
926 but that doesn't matter much. */
927 auto_host_charset_name = xstrdup (nl_langinfo (CODESET));
928 /* Solaris will return `646' here -- but the Solaris iconv then
929 does not accept this. Darwin (and maybe FreeBSD) may return "" here,
930 which GNU libiconv doesn't like (infinite loop). */
931 if (!strcmp (auto_host_charset_name, "646") || !*auto_host_charset_name)
932 auto_host_charset_name = "ASCII";
933 auto_target_charset_name = auto_host_charset_name;
934 #elif defined (USE_WIN32API)
935 {
936 static char w32_host_default_charset[16]; /* "CP" + x<=5 digits + paranoia. */
937
938 snprintf (w32_host_default_charset, sizeof w32_host_default_charset,
939 "CP%d", GetACP());
940 auto_host_charset_name = w32_host_default_charset;
941 auto_target_charset_name = auto_host_charset_name;
942 }
943 #endif
944 #endif
945
946 add_setshow_enum_cmd ("charset", class_support,
947 charset_enum, &host_charset_name, _("\
948 Set the host and target character sets."), _("\
949 Show the host and target character sets."), _("\
950 The `host character set' is the one used by the system GDB is running on.\n\
951 The `target character set' is the one used by the program being debugged.\n\
952 You may only use supersets of ASCII for your host character set; GDB does\n\
953 not support any others.\n\
954 To see a list of the character sets GDB supports, type `set charset <TAB>'."),
955 /* Note that the sfunc below needs to set
956 target_charset_name, because the 'set
957 charset' command sets two variables. */
958 set_charset_sfunc,
959 show_charset,
960 &setlist, &showlist);
961
962 add_setshow_enum_cmd ("host-charset", class_support,
963 charset_enum, &host_charset_name, _("\
964 Set the host character set."), _("\
965 Show the host character set."), _("\
966 The `host character set' is the one used by the system GDB is running on.\n\
967 You may only use supersets of ASCII for your host character set; GDB does\n\
968 not support any others.\n\
969 To see a list of the character sets GDB supports, type `set host-charset <TAB>'."),
970 set_host_charset_sfunc,
971 show_host_charset_name,
972 &setlist, &showlist);
973
974 add_setshow_enum_cmd ("target-charset", class_support,
975 charset_enum, &target_charset_name, _("\
976 Set the target character set."), _("\
977 Show the target character set."), _("\
978 The `target character set' is the one used by the program being debugged.\n\
979 GDB translates characters and strings between the host and target\n\
980 character sets as needed.\n\
981 To see a list of the character sets GDB supports, type `set target-charset'<TAB>"),
982 set_target_charset_sfunc,
983 show_target_charset_name,
984 &setlist, &showlist);
985
986 add_setshow_enum_cmd ("target-wide-charset", class_support,
987 charset_enum, &target_wide_charset_name,
988 _("\
989 Set the target wide character set."), _("\
990 Show the target wide character set."), _("\
991 The `target wide character set' is the one used by the program being debugged.\n\
992 In particular it is the encoding used by `wchar_t'.\n\
993 GDB translates characters and strings between the host and target\n\
994 character sets as needed.\n\
995 To see a list of the character sets GDB supports, type\n\
996 `set target-wide-charset'<TAB>"),
997 set_target_wide_charset_sfunc,
998 show_target_wide_charset_name,
999 &setlist, &showlist);
1000 }
This page took 0.050359 seconds and 4 git commands to generate.