2010-11-19 Jan Kratochvil <jan.kratochvil@redhat.com>
[deliverable/binutils-gdb.git] / gdb / charset.c
1 /* Character set conversion support for GDB.
2
3 Copyright (C) 2001, 2003, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
5
6 This file is part of GDB.
7
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <http://www.gnu.org/licenses/>. */
20
21 #include "defs.h"
22 #include "charset.h"
23 #include "gdbcmd.h"
24 #include "gdb_assert.h"
25 #include "gdb_obstack.h"
26 #include "gdb_wait.h"
27 #include "charset-list.h"
28 #include "vec.h"
29 #include "environ.h"
30 #include "arch-utils.h"
31
32 #include <stddef.h>
33 #include "gdb_string.h"
34 #include <ctype.h>
35
36 #ifdef USE_WIN32API
37 #include <windows.h>
38 #endif
39 \f
40 /* How GDB's character set support works
41
42 GDB has three global settings:
43
44 - The `current host character set' is the character set GDB should
45 use in talking to the user, and which (hopefully) the user's
46 terminal knows how to display properly. Most users should not
47 change this.
48
49 - The `current target character set' is the character set the
50 program being debugged uses.
51
52 - The `current target wide character set' is the wide character set
53 the program being debugged uses, that is, the encoding used for
54 wchar_t.
55
56 There are commands to set each of these, and mechanisms for
57 choosing reasonable default values. GDB has a global list of
58 character sets that it can use as its host or target character
59 sets.
60
61 The header file `charset.h' declares various functions that
62 different pieces of GDB need to perform tasks like:
63
64 - printing target strings and characters to the user's terminal
65 (mostly target->host conversions),
66
67 - building target-appropriate representations of strings and
68 characters the user enters in expressions (mostly host->target
69 conversions),
70
71 and so on.
72
73 To avoid excessive code duplication and maintenance efforts,
74 GDB simply requires a capable iconv function. Users on platforms
75 without a suitable iconv can use the GNU iconv library. */
76
77 \f
78 #ifdef PHONY_ICONV
79
80 /* Provide a phony iconv that does as little as possible. Also,
81 arrange for there to be a single available character set. */
82
83 #undef GDB_DEFAULT_HOST_CHARSET
84 #define GDB_DEFAULT_HOST_CHARSET "ISO-8859-1"
85 #define GDB_DEFAULT_TARGET_CHARSET "ISO-8859-1"
86 #define GDB_DEFAULT_TARGET_WIDE_CHARSET "ISO-8859-1"
87 #undef DEFAULT_CHARSET_NAMES
88 #define DEFAULT_CHARSET_NAMES GDB_DEFAULT_HOST_CHARSET ,
89
90 #undef iconv_t
91 #define iconv_t int
92 #undef iconv_open
93 #define iconv_open phony_iconv_open
94 #undef iconv
95 #define iconv phony_iconv
96 #undef iconv_close
97 #define iconv_close phony_iconv_close
98
99 #undef ICONV_CONST
100 #define ICONV_CONST const
101
102 /* Some systems don't have EILSEQ, so we define it here, but not as
103 EINVAL, because callers of `iconv' want to distinguish EINVAL and
104 EILSEQ. This is what iconv.h from libiconv does as well. Note
105 that wchar.h may also define EILSEQ, so this needs to be after we
106 include wchar.h, which happens in defs.h through gdb_wchar.h. */
107 #ifndef EILSEQ
108 #define EILSEQ ENOENT
109 #endif
110
111 iconv_t
112 phony_iconv_open (const char *to, const char *from)
113 {
114 /* We allow conversions from UTF-32BE, wchar_t, and the host charset.
115 We allow conversions to wchar_t and the host charset. */
116 if (strcmp (from, "UTF-32BE") && strcmp (from, "wchar_t")
117 && strcmp (from, GDB_DEFAULT_HOST_CHARSET))
118 return -1;
119 if (strcmp (to, "wchar_t") && strcmp (to, GDB_DEFAULT_HOST_CHARSET))
120 return -1;
121
122 /* Return 1 if we are converting from UTF-32BE, 0 otherwise. This is
123 used as a flag in calls to iconv. */
124 return !strcmp (from, "UTF-32BE");
125 }
126
127 int
128 phony_iconv_close (iconv_t arg)
129 {
130 return 0;
131 }
132
133 size_t
134 phony_iconv (iconv_t utf_flag, const char **inbuf, size_t *inbytesleft,
135 char **outbuf, size_t *outbytesleft)
136 {
137 if (utf_flag)
138 {
139 while (*inbytesleft >= 4)
140 {
141 size_t j;
142 unsigned long c = 0;
143
144 for (j = 0; j < 4; ++j)
145 {
146 c <<= 8;
147 c += (*inbuf)[j] & 0xff;
148 }
149
150 if (c >= 256)
151 {
152 errno = EILSEQ;
153 return -1;
154 }
155 **outbuf = c & 0xff;
156 ++*outbuf;
157 --*outbytesleft;
158
159 ++*inbuf;
160 *inbytesleft -= 4;
161 }
162 if (*inbytesleft < 4)
163 {
164 errno = EINVAL;
165 return -1;
166 }
167 }
168 else
169 {
170 /* In all other cases we simply copy input bytes to the
171 output. */
172 size_t amt = *inbytesleft;
173
174 if (amt > *outbytesleft)
175 amt = *outbytesleft;
176 memcpy (*outbuf, *inbuf, amt);
177 *inbuf += amt;
178 *outbuf += amt;
179 *inbytesleft -= amt;
180 *outbytesleft -= amt;
181 }
182
183 if (*inbytesleft)
184 {
185 errno = E2BIG;
186 return -1;
187 }
188
189 /* The number of non-reversible conversions -- but they were all
190 reversible. */
191 return 0;
192 }
193
194 #endif
195
196
197 \f
198 /* The global lists of character sets and translations. */
199
200
201 #ifndef GDB_DEFAULT_TARGET_CHARSET
202 #define GDB_DEFAULT_TARGET_CHARSET "ISO-8859-1"
203 #endif
204
205 #ifndef GDB_DEFAULT_TARGET_WIDE_CHARSET
206 #define GDB_DEFAULT_TARGET_WIDE_CHARSET "UTF-32"
207 #endif
208
209 static const char *auto_host_charset_name = GDB_DEFAULT_HOST_CHARSET;
210 static const char *host_charset_name = "auto";
211 static void
212 show_host_charset_name (struct ui_file *file, int from_tty,
213 struct cmd_list_element *c,
214 const char *value)
215 {
216 if (!strcmp (value, "auto"))
217 fprintf_filtered (file,
218 _("The host character set is \"auto; currently %s\".\n"),
219 auto_host_charset_name);
220 else
221 fprintf_filtered (file, _("The host character set is \"%s\".\n"), value);
222 }
223
224 static const char *target_charset_name = "auto";
225 static void
226 show_target_charset_name (struct ui_file *file, int from_tty,
227 struct cmd_list_element *c, const char *value)
228 {
229 if (!strcmp (value, "auto"))
230 fprintf_filtered (file,
231 _("The target character set is \"auto; "
232 "currently %s\".\n"),
233 gdbarch_auto_charset (get_current_arch ()));
234 else
235 fprintf_filtered (file, _("The target character set is \"%s\".\n"),
236 value);
237 }
238
239 static const char *target_wide_charset_name = "auto";
240 static void
241 show_target_wide_charset_name (struct ui_file *file, int from_tty,
242 struct cmd_list_element *c, const char *value)
243 {
244 if (!strcmp (value, "auto"))
245 fprintf_filtered (file,
246 _("The target wide character set is \"auto; "
247 "currently %s\".\n"),
248 gdbarch_auto_wide_charset (get_current_arch ()));
249 else
250 fprintf_filtered (file, _("The target wide character set is \"%s\".\n"),
251 value);
252 }
253
254 static const char *default_charset_names[] =
255 {
256 DEFAULT_CHARSET_NAMES
257 0
258 };
259
260 static const char **charset_enum;
261
262 \f
263 /* If the target wide character set has big- or little-endian
264 variants, these are the corresponding names. */
265 static const char *target_wide_charset_be_name;
266 static const char *target_wide_charset_le_name;
267
268 /* The architecture for which the BE- and LE-names are valid. */
269 static struct gdbarch *be_le_arch;
270
271 /* A helper function which sets the target wide big- and little-endian
272 character set names, if possible. */
273
274 static void
275 set_be_le_names (struct gdbarch *gdbarch)
276 {
277 int i, len;
278 const char *target_wide;
279
280 if (be_le_arch == gdbarch)
281 return;
282 be_le_arch = gdbarch;
283
284 target_wide_charset_le_name = NULL;
285 target_wide_charset_be_name = NULL;
286
287 target_wide = target_wide_charset_name;
288 if (!strcmp (target_wide, "auto"))
289 target_wide = gdbarch_auto_wide_charset (gdbarch);
290
291 len = strlen (target_wide);
292 for (i = 0; charset_enum[i]; ++i)
293 {
294 if (strncmp (target_wide, charset_enum[i], len))
295 continue;
296 if ((charset_enum[i][len] == 'B'
297 || charset_enum[i][len] == 'L')
298 && charset_enum[i][len + 1] == 'E'
299 && charset_enum[i][len + 2] == '\0')
300 {
301 if (charset_enum[i][len] == 'B')
302 target_wide_charset_be_name = charset_enum[i];
303 else
304 target_wide_charset_le_name = charset_enum[i];
305 }
306 }
307 }
308
309 /* 'Set charset', 'set host-charset', 'set target-charset', 'set
310 target-wide-charset', 'set charset' sfunc's. */
311
312 static void
313 validate (struct gdbarch *gdbarch)
314 {
315 iconv_t desc;
316 const char *host_cset = host_charset ();
317 const char *target_cset = target_charset (gdbarch);
318 const char *target_wide_cset = target_wide_charset_name;
319
320 if (!strcmp (target_wide_cset, "auto"))
321 target_wide_cset = gdbarch_auto_wide_charset (gdbarch);
322
323 desc = iconv_open (target_wide_cset, host_cset);
324 if (desc == (iconv_t) -1)
325 error ("Cannot convert between character sets `%s' and `%s'",
326 target_wide_cset, host_cset);
327 iconv_close (desc);
328
329 desc = iconv_open (target_cset, host_cset);
330 if (desc == (iconv_t) -1)
331 error ("Cannot convert between character sets `%s' and `%s'",
332 target_cset, host_cset);
333 iconv_close (desc);
334
335 /* Clear the cache. */
336 be_le_arch = NULL;
337 }
338
339 /* This is the sfunc for the 'set charset' command. */
340 static void
341 set_charset_sfunc (char *charset, int from_tty, struct cmd_list_element *c)
342 {
343 /* CAREFUL: set the target charset here as well. */
344 target_charset_name = host_charset_name;
345 validate (get_current_arch ());
346 }
347
348 /* 'set host-charset' command sfunc. We need a wrapper here because
349 the function needs to have a specific signature. */
350 static void
351 set_host_charset_sfunc (char *charset, int from_tty,
352 struct cmd_list_element *c)
353 {
354 validate (get_current_arch ());
355 }
356
357 /* Wrapper for the 'set target-charset' command. */
358 static void
359 set_target_charset_sfunc (char *charset, int from_tty,
360 struct cmd_list_element *c)
361 {
362 validate (get_current_arch ());
363 }
364
365 /* Wrapper for the 'set target-wide-charset' command. */
366 static void
367 set_target_wide_charset_sfunc (char *charset, int from_tty,
368 struct cmd_list_element *c)
369 {
370 validate (get_current_arch ());
371 }
372
373 /* sfunc for the 'show charset' command. */
374 static void
375 show_charset (struct ui_file *file, int from_tty, struct cmd_list_element *c,
376 const char *name)
377 {
378 show_host_charset_name (file, from_tty, c, host_charset_name);
379 show_target_charset_name (file, from_tty, c, target_charset_name);
380 show_target_wide_charset_name (file, from_tty, c, target_wide_charset_name);
381 }
382
383 \f
384 /* Accessor functions. */
385
386 const char *
387 host_charset (void)
388 {
389 if (!strcmp (host_charset_name, "auto"))
390 return auto_host_charset_name;
391 return host_charset_name;
392 }
393
394 const char *
395 target_charset (struct gdbarch *gdbarch)
396 {
397 if (!strcmp (target_charset_name, "auto"))
398 return gdbarch_auto_charset (gdbarch);
399 return target_charset_name;
400 }
401
402 const char *
403 target_wide_charset (struct gdbarch *gdbarch)
404 {
405 enum bfd_endian byte_order = gdbarch_byte_order (gdbarch);
406
407 set_be_le_names (gdbarch);
408 if (byte_order == BFD_ENDIAN_BIG)
409 {
410 if (target_wide_charset_be_name)
411 return target_wide_charset_be_name;
412 }
413 else
414 {
415 if (target_wide_charset_le_name)
416 return target_wide_charset_le_name;
417 }
418
419 if (!strcmp (target_wide_charset_name, "auto"))
420 return gdbarch_auto_wide_charset (gdbarch);
421
422 return target_wide_charset_name;
423 }
424
425 \f
426 /* Host character set management. For the time being, we assume that
427 the host character set is some superset of ASCII. */
428
429 char
430 host_letter_to_control_character (char c)
431 {
432 if (c == '?')
433 return 0177;
434 return c & 0237;
435 }
436
437 /* Convert a host character, C, to its hex value. C must already have
438 been validated using isxdigit. */
439
440 int
441 host_hex_value (char c)
442 {
443 if (isdigit (c))
444 return c - '0';
445 if (c >= 'a' && c <= 'f')
446 return 10 + c - 'a';
447 gdb_assert (c >= 'A' && c <= 'F');
448 return 10 + c - 'A';
449 }
450
451 \f
452 /* Public character management functions. */
453
454 /* A cleanup function which is run to close an iconv descriptor. */
455
456 static void
457 cleanup_iconv (void *p)
458 {
459 iconv_t *descp = p;
460 iconv_close (*descp);
461 }
462
463 void
464 convert_between_encodings (const char *from, const char *to,
465 const gdb_byte *bytes, unsigned int num_bytes,
466 int width, struct obstack *output,
467 enum transliterations translit)
468 {
469 iconv_t desc;
470 struct cleanup *cleanups;
471 size_t inleft;
472 char *inp;
473 unsigned int space_request;
474
475 /* Often, the host and target charsets will be the same. */
476 if (!strcmp (from, to))
477 {
478 obstack_grow (output, bytes, num_bytes);
479 return;
480 }
481
482 desc = iconv_open (to, from);
483 if (desc == (iconv_t) -1)
484 perror_with_name ("Converting character sets");
485 cleanups = make_cleanup (cleanup_iconv, &desc);
486
487 inleft = num_bytes;
488 inp = (char *) bytes;
489
490 space_request = num_bytes;
491
492 while (inleft > 0)
493 {
494 char *outp;
495 size_t outleft, r;
496 int old_size;
497
498 old_size = obstack_object_size (output);
499 obstack_blank (output, space_request);
500
501 outp = obstack_base (output) + old_size;
502 outleft = space_request;
503
504 r = iconv (desc, (ICONV_CONST char **) &inp, &inleft, &outp, &outleft);
505
506 /* Now make sure that the object on the obstack only includes
507 bytes we have converted. */
508 obstack_blank (output, - (int) outleft);
509
510 if (r == (size_t) -1)
511 {
512 switch (errno)
513 {
514 case EILSEQ:
515 {
516 int i;
517
518 /* Invalid input sequence. */
519 if (translit == translit_none)
520 error (_("Could not convert character to `%s' character set"),
521 to);
522
523 /* We emit escape sequence for the bytes, skip them,
524 and try again. */
525 for (i = 0; i < width; ++i)
526 {
527 char octal[5];
528
529 sprintf (octal, "\\%.3o", *inp & 0xff);
530 obstack_grow_str (output, octal);
531
532 ++inp;
533 --inleft;
534 }
535 }
536 break;
537
538 case E2BIG:
539 /* We ran out of space in the output buffer. Make it
540 bigger next time around. */
541 space_request *= 2;
542 break;
543
544 case EINVAL:
545 /* Incomplete input sequence. FIXME: ought to report this
546 to the caller somehow. */
547 inleft = 0;
548 break;
549
550 default:
551 perror_with_name ("Internal error while converting character sets");
552 }
553 }
554 }
555
556 do_cleanups (cleanups);
557 }
558
559 \f
560
561 /* An iterator that returns host wchar_t's from a target string. */
562 struct wchar_iterator
563 {
564 /* The underlying iconv descriptor. */
565 iconv_t desc;
566
567 /* The input string. This is updated as convert characters. */
568 char *input;
569 /* The number of bytes remaining in the input. */
570 size_t bytes;
571
572 /* The width of an input character. */
573 size_t width;
574
575 /* The output buffer and its size. */
576 gdb_wchar_t *out;
577 size_t out_size;
578 };
579
580 /* Create a new iterator. */
581 struct wchar_iterator *
582 make_wchar_iterator (const gdb_byte *input, size_t bytes, const char *charset,
583 size_t width)
584 {
585 struct wchar_iterator *result;
586 iconv_t desc;
587
588 desc = iconv_open (INTERMEDIATE_ENCODING, charset);
589 if (desc == (iconv_t) -1)
590 perror_with_name ("Converting character sets");
591
592 result = XNEW (struct wchar_iterator);
593 result->desc = desc;
594 result->input = (char *) input;
595 result->bytes = bytes;
596 result->width = width;
597
598 result->out = XNEW (gdb_wchar_t);
599 result->out_size = 1;
600
601 return result;
602 }
603
604 static void
605 do_cleanup_iterator (void *p)
606 {
607 struct wchar_iterator *iter = p;
608
609 iconv_close (iter->desc);
610 xfree (iter->out);
611 xfree (iter);
612 }
613
614 struct cleanup *
615 make_cleanup_wchar_iterator (struct wchar_iterator *iter)
616 {
617 return make_cleanup (do_cleanup_iterator, iter);
618 }
619
620 int
621 wchar_iterate (struct wchar_iterator *iter,
622 enum wchar_iterate_result *out_result,
623 gdb_wchar_t **out_chars,
624 const gdb_byte **ptr,
625 size_t *len)
626 {
627 size_t out_request;
628
629 /* Try to convert some characters. At first we try to convert just
630 a single character. The reason for this is that iconv does not
631 necessarily update its outgoing arguments when it encounters an
632 invalid input sequence -- but we want to reliably report this to
633 our caller so it can emit an escape sequence. */
634 out_request = 1;
635 while (iter->bytes > 0)
636 {
637 char *outptr = (char *) &iter->out[0];
638 char *orig_inptr = iter->input;
639 size_t orig_in = iter->bytes;
640 size_t out_avail = out_request * sizeof (gdb_wchar_t);
641 size_t num;
642 size_t r = iconv (iter->desc,
643 (ICONV_CONST char **) &iter->input, &iter->bytes,
644 &outptr, &out_avail);
645
646 if (r == (size_t) -1)
647 {
648 switch (errno)
649 {
650 case EILSEQ:
651 /* Invalid input sequence. We still might have converted a
652 character; if so, return it. */
653 if (out_avail < out_request * sizeof (gdb_wchar_t))
654 break;
655
656 /* Otherwise skip the first invalid character, and let the
657 caller know about it. */
658 *out_result = wchar_iterate_invalid;
659 *ptr = iter->input;
660 *len = iter->width;
661 iter->input += iter->width;
662 iter->bytes -= iter->width;
663 return 0;
664
665 case E2BIG:
666 /* We ran out of space. We still might have converted a
667 character; if so, return it. Otherwise, grow the
668 buffer and try again. */
669 if (out_avail < out_request * sizeof (gdb_wchar_t))
670 break;
671
672 ++out_request;
673 if (out_request > iter->out_size)
674 {
675 iter->out_size = out_request;
676 iter->out = xrealloc (iter->out,
677 out_request * sizeof (gdb_wchar_t));
678 }
679 continue;
680
681 case EINVAL:
682 /* Incomplete input sequence. Let the caller know, and
683 arrange for future calls to see EOF. */
684 *out_result = wchar_iterate_incomplete;
685 *ptr = iter->input;
686 *len = iter->bytes;
687 iter->bytes = 0;
688 return 0;
689
690 default:
691 perror_with_name ("Internal error while converting character sets");
692 }
693 }
694
695 /* We converted something. */
696 num = out_request - out_avail / sizeof (gdb_wchar_t);
697 *out_result = wchar_iterate_ok;
698 *out_chars = iter->out;
699 *ptr = orig_inptr;
700 *len = orig_in - iter->bytes;
701 return num;
702 }
703
704 /* Really done. */
705 *out_result = wchar_iterate_eof;
706 return -1;
707 }
708
709 \f
710 /* The charset.c module initialization function. */
711
712 extern initialize_file_ftype _initialize_charset; /* -Wmissing-prototype */
713
714 DEF_VEC_P (char_ptr);
715
716 static VEC (char_ptr) *charsets;
717
718 #ifdef PHONY_ICONV
719
720 static void
721 find_charset_names (void)
722 {
723 VEC_safe_push (char_ptr, charsets, GDB_DEFAULT_HOST_CHARSET);
724 VEC_safe_push (char_ptr, charsets, NULL);
725 }
726
727 #else /* PHONY_ICONV */
728
729 /* Sometimes, libiconv redefines iconvlist as libiconvlist -- but
730 provides different symbols in the static and dynamic libraries.
731 So, configure may see libiconvlist but not iconvlist. But, calling
732 iconvlist is the right thing to do and will work. Hence we do a
733 check here but unconditionally call iconvlist below. */
734 #if defined (HAVE_ICONVLIST) || defined (HAVE_LIBICONVLIST)
735
736 /* A helper function that adds some character sets to the vector of
737 all character sets. This is a callback function for iconvlist. */
738
739 static int
740 add_one (unsigned int count, const char *const *names, void *data)
741 {
742 unsigned int i;
743
744 for (i = 0; i < count; ++i)
745 VEC_safe_push (char_ptr, charsets, xstrdup (names[i]));
746
747 return 0;
748 }
749
750 static void
751 find_charset_names (void)
752 {
753 iconvlist (add_one, NULL);
754 VEC_safe_push (char_ptr, charsets, NULL);
755 }
756
757 #else
758
759 /* Return non-zero if LINE (output from iconv) should be ignored.
760 Older iconv programs (e.g. 2.2.2) include the human readable
761 introduction even when stdout is not a tty. Newer versions omit
762 the intro if stdout is not a tty. */
763
764 static int
765 ignore_line_p (const char *line)
766 {
767 /* This table is used to filter the output. If this text appears
768 anywhere in the line, it is ignored (strstr is used). */
769 static const char * const ignore_lines[] =
770 {
771 "The following",
772 "not necessarily",
773 "the FROM and TO",
774 "listed with several",
775 NULL
776 };
777 int i;
778
779 for (i = 0; ignore_lines[i] != NULL; ++i)
780 {
781 if (strstr (line, ignore_lines[i]) != NULL)
782 return 1;
783 }
784
785 return 0;
786 }
787
788 static void
789 find_charset_names (void)
790 {
791 struct pex_obj *child;
792 char *args[3];
793 int err, status;
794 int fail = 1;
795 struct gdb_environ *iconv_env;
796
797 /* Older iconvs, e.g. 2.2.2, don't omit the intro text if stdout is not
798 a tty. We need to recognize it and ignore it. This text is subject
799 to translation, so force LANGUAGE=C. */
800 iconv_env = make_environ ();
801 init_environ (iconv_env);
802 set_in_environ (iconv_env, "LANGUAGE", "C");
803 set_in_environ (iconv_env, "LC_ALL", "C");
804
805 child = pex_init (PEX_USE_PIPES, "iconv", NULL);
806
807 args[0] = "iconv";
808 args[1] = "-l";
809 args[2] = NULL;
810 /* Note that we simply ignore errors here. */
811 if (!pex_run_in_environment (child, PEX_SEARCH | PEX_STDERR_TO_STDOUT,
812 "iconv", args, environ_vector (iconv_env),
813 NULL, NULL, &err))
814 {
815 FILE *in = pex_read_output (child, 0);
816
817 /* POSIX says that iconv -l uses an unspecified format. We
818 parse the glibc and libiconv formats; feel free to add others
819 as needed. */
820
821 while (!feof (in))
822 {
823 /* The size of buf is chosen arbitrarily. */
824 char buf[1024];
825 char *start, *r;
826 int len;
827
828 r = fgets (buf, sizeof (buf), in);
829 if (!r)
830 break;
831 len = strlen (r);
832 if (len <= 3)
833 continue;
834 if (ignore_line_p (r))
835 continue;
836
837 /* Strip off the newline. */
838 --len;
839 /* Strip off one or two '/'s. glibc will print lines like
840 "8859_7//", but also "10646-1:1993/UCS4/". */
841 if (buf[len - 1] == '/')
842 --len;
843 if (buf[len - 1] == '/')
844 --len;
845 buf[len] = '\0';
846
847 /* libiconv will print multiple entries per line, separated
848 by spaces. Older iconvs will print multiple entries per line,
849 indented by two spaces, and separated by ", "
850 (i.e. the human readable form). */
851 start = buf;
852 while (1)
853 {
854 int keep_going;
855 char *p;
856
857 /* Skip leading blanks. */
858 for (p = start; *p && *p == ' '; ++p)
859 ;
860 start = p;
861 /* Find the next space, comma, or end-of-line. */
862 for ( ; *p && *p != ' ' && *p != ','; ++p)
863 ;
864 /* Ignore an empty result. */
865 if (p == start)
866 break;
867 keep_going = *p;
868 *p = '\0';
869 VEC_safe_push (char_ptr, charsets, xstrdup (start));
870 if (!keep_going)
871 break;
872 /* Skip any extra spaces. */
873 for (start = p + 1; *start && *start == ' '; ++start)
874 ;
875 }
876 }
877
878 if (pex_get_status (child, 1, &status)
879 && WIFEXITED (status) && !WEXITSTATUS (status))
880 fail = 0;
881
882 }
883
884 pex_free (child);
885 free_environ (iconv_env);
886
887 if (fail)
888 {
889 /* Some error occurred, so drop the vector. */
890 int ix;
891 char *elt;
892 for (ix = 0; VEC_iterate (char_ptr, charsets, ix, elt); ++ix)
893 xfree (elt);
894 VEC_truncate (char_ptr, charsets, 0);
895 }
896 else
897 VEC_safe_push (char_ptr, charsets, NULL);
898 }
899
900 #endif /* HAVE_ICONVLIST || HAVE_LIBICONVLIST */
901 #endif /* PHONY_ICONV */
902
903 /* The "auto" target charset used by default_auto_charset. */
904 static const char *auto_target_charset_name = GDB_DEFAULT_TARGET_CHARSET;
905
906 const char *
907 default_auto_charset (void)
908 {
909 return auto_target_charset_name;
910 }
911
912 const char *
913 default_auto_wide_charset (void)
914 {
915 return GDB_DEFAULT_TARGET_WIDE_CHARSET;
916 }
917
918 void
919 _initialize_charset (void)
920 {
921 /* The first element is always "auto". */
922 VEC_safe_push (char_ptr, charsets, xstrdup ("auto"));
923 find_charset_names ();
924
925 if (VEC_length (char_ptr, charsets) > 1)
926 charset_enum = (const char **) VEC_address (char_ptr, charsets);
927 else
928 charset_enum = default_charset_names;
929
930 #ifndef PHONY_ICONV
931 #ifdef HAVE_LANGINFO_CODESET
932 /* The result of nl_langinfo may be overwritten later. This may
933 leak a little memory, if the user later changes the host charset,
934 but that doesn't matter much. */
935 auto_host_charset_name = xstrdup (nl_langinfo (CODESET));
936 /* Solaris will return `646' here -- but the Solaris iconv then
937 does not accept this. Darwin (and maybe FreeBSD) may return "" here,
938 which GNU libiconv doesn't like (infinite loop). */
939 if (!strcmp (auto_host_charset_name, "646") || !*auto_host_charset_name)
940 auto_host_charset_name = "ASCII";
941 auto_target_charset_name = auto_host_charset_name;
942 #elif defined (USE_WIN32API)
943 {
944 static char w32_host_default_charset[16]; /* "CP" + x<=5 digits + paranoia. */
945
946 snprintf (w32_host_default_charset, sizeof w32_host_default_charset,
947 "CP%d", GetACP());
948 auto_host_charset_name = w32_host_default_charset;
949 auto_target_charset_name = auto_host_charset_name;
950 }
951 #endif
952 #endif
953
954 add_setshow_enum_cmd ("charset", class_support,
955 charset_enum, &host_charset_name, _("\
956 Set the host and target character sets."), _("\
957 Show the host and target character sets."), _("\
958 The `host character set' is the one used by the system GDB is running on.\n\
959 The `target character set' is the one used by the program being debugged.\n\
960 You may only use supersets of ASCII for your host character set; GDB does\n\
961 not support any others.\n\
962 To see a list of the character sets GDB supports, type `set charset <TAB>'."),
963 /* Note that the sfunc below needs to set
964 target_charset_name, because the 'set
965 charset' command sets two variables. */
966 set_charset_sfunc,
967 show_charset,
968 &setlist, &showlist);
969
970 add_setshow_enum_cmd ("host-charset", class_support,
971 charset_enum, &host_charset_name, _("\
972 Set the host character set."), _("\
973 Show the host character set."), _("\
974 The `host character set' is the one used by the system GDB is running on.\n\
975 You may only use supersets of ASCII for your host character set; GDB does\n\
976 not support any others.\n\
977 To see a list of the character sets GDB supports, type `set host-charset <TAB>'."),
978 set_host_charset_sfunc,
979 show_host_charset_name,
980 &setlist, &showlist);
981
982 add_setshow_enum_cmd ("target-charset", class_support,
983 charset_enum, &target_charset_name, _("\
984 Set the target character set."), _("\
985 Show the target character set."), _("\
986 The `target character set' is the one used by the program being debugged.\n\
987 GDB translates characters and strings between the host and target\n\
988 character sets as needed.\n\
989 To see a list of the character sets GDB supports, type `set target-charset'<TAB>"),
990 set_target_charset_sfunc,
991 show_target_charset_name,
992 &setlist, &showlist);
993
994 add_setshow_enum_cmd ("target-wide-charset", class_support,
995 charset_enum, &target_wide_charset_name,
996 _("\
997 Set the target wide character set."), _("\
998 Show the target wide character set."), _("\
999 The `target wide character set' is the one used by the program being debugged.\n\
1000 In particular it is the encoding used by `wchar_t'.\n\
1001 GDB translates characters and strings between the host and target\n\
1002 character sets as needed.\n\
1003 To see a list of the character sets GDB supports, type\n\
1004 `set target-wide-charset'<TAB>"),
1005 set_target_wide_charset_sfunc,
1006 show_target_wide_charset_name,
1007 &setlist, &showlist);
1008 }
This page took 0.050453 seconds and 4 git commands to generate.