* charset.c (iconv_open): New define.
[deliverable/binutils-gdb.git] / gdb / charset.c
1 /* Character set conversion support for GDB.
2
3 Copyright (C) 2001, 2003, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
5
6 This file is part of GDB.
7
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <http://www.gnu.org/licenses/>. */
20
21 #include "defs.h"
22 #include "charset.h"
23 #include "gdbcmd.h"
24 #include "gdb_assert.h"
25 #include "gdb_obstack.h"
26 #include "gdb_wait.h"
27 #include "charset-list.h"
28 #include "vec.h"
29 #include "environ.h"
30 #include "arch-utils.h"
31
32 #include <stddef.h>
33 #include "gdb_string.h"
34 #include <ctype.h>
35
36 #ifdef USE_WIN32API
37 #include <windows.h>
38 #endif
39 \f
40 /* How GDB's character set support works
41
42 GDB has three global settings:
43
44 - The `current host character set' is the character set GDB should
45 use in talking to the user, and which (hopefully) the user's
46 terminal knows how to display properly. Most users should not
47 change this.
48
49 - The `current target character set' is the character set the
50 program being debugged uses.
51
52 - The `current target wide character set' is the wide character set
53 the program being debugged uses, that is, the encoding used for
54 wchar_t.
55
56 There are commands to set each of these, and mechanisms for
57 choosing reasonable default values. GDB has a global list of
58 character sets that it can use as its host or target character
59 sets.
60
61 The header file `charset.h' declares various functions that
62 different pieces of GDB need to perform tasks like:
63
64 - printing target strings and characters to the user's terminal
65 (mostly target->host conversions),
66
67 - building target-appropriate representations of strings and
68 characters the user enters in expressions (mostly host->target
69 conversions),
70
71 and so on.
72
73 To avoid excessive code duplication and maintenance efforts,
74 GDB simply requires a capable iconv function. Users on platforms
75 without a suitable iconv can use the GNU iconv library. */
76
77 \f
78 #ifdef PHONY_ICONV
79
80 /* Provide a phony iconv that does as little as possible. Also,
81 arrange for there to be a single available character set. */
82
83 #undef GDB_DEFAULT_HOST_CHARSET
84 #define GDB_DEFAULT_HOST_CHARSET "ISO-8859-1"
85 #define GDB_DEFAULT_TARGET_CHARSET "ISO-8859-1"
86 #define GDB_DEFAULT_TARGET_WIDE_CHARSET "ISO-8859-1"
87 #undef DEFAULT_CHARSET_NAMES
88 #define DEFAULT_CHARSET_NAMES GDB_DEFAULT_HOST_CHARSET ,
89
90 #undef iconv_t
91 #define iconv_t int
92 #undef iconv_open
93 #define iconv_open phony_iconv_open
94 #undef iconv
95 #define iconv phony_iconv
96 #undef iconv_close
97 #define iconv_close phony_iconv_close
98
99 #undef ICONV_CONST
100 #define ICONV_CONST const
101
102 /* Some systems don't have EILSEQ, so we define it here, but not as
103 EINVAL, because callers of `iconv' want to distinguish EINVAL and
104 EILSEQ. This is what iconv.h from libiconv does as well. Note
105 that wchar.h may also define EILSEQ, so this needs to be after we
106 include wchar.h, which happens in defs.h through gdb_wchar.h. */
107 #ifndef EILSEQ
108 #define EILSEQ ENOENT
109 #endif
110
111 iconv_t
112 phony_iconv_open (const char *to, const char *from)
113 {
114 /* We allow conversions from UTF-32BE, wchar_t, and the host charset.
115 We allow conversions to wchar_t and the host charset. */
116 if (strcmp (from, "UTF-32BE") && strcmp (from, "wchar_t")
117 && strcmp (from, GDB_DEFAULT_HOST_CHARSET))
118 return -1;
119 if (strcmp (to, "wchar_t") && strcmp (to, GDB_DEFAULT_HOST_CHARSET))
120 return -1;
121
122 /* Return 1 if we are converting from UTF-32BE, 0 otherwise. This is
123 used as a flag in calls to iconv. */
124 return !strcmp (from, "UTF-32BE");
125 }
126
127 int
128 phony_iconv_close (iconv_t arg)
129 {
130 return 0;
131 }
132
133 size_t
134 phony_iconv (iconv_t utf_flag, const char **inbuf, size_t *inbytesleft,
135 char **outbuf, size_t *outbytesleft)
136 {
137 if (utf_flag)
138 {
139 while (*inbytesleft >= 4)
140 {
141 size_t j;
142 unsigned long c = 0;
143
144 for (j = 0; j < 4; ++j)
145 {
146 c <<= 8;
147 c += (*inbuf)[j] & 0xff;
148 }
149
150 if (c >= 256)
151 {
152 errno = EILSEQ;
153 return -1;
154 }
155 **outbuf = c & 0xff;
156 ++*outbuf;
157 --*outbytesleft;
158
159 ++*inbuf;
160 *inbytesleft -= 4;
161 }
162 if (*inbytesleft < 4)
163 {
164 errno = EINVAL;
165 return -1;
166 }
167 }
168 else
169 {
170 /* In all other cases we simply copy input bytes to the
171 output. */
172 size_t amt = *inbytesleft;
173
174 if (amt > *outbytesleft)
175 amt = *outbytesleft;
176 memcpy (*outbuf, *inbuf, amt);
177 *inbuf += amt;
178 *outbuf += amt;
179 *inbytesleft -= amt;
180 *outbytesleft -= amt;
181 }
182
183 if (*inbytesleft)
184 {
185 errno = E2BIG;
186 return -1;
187 }
188
189 /* The number of non-reversible conversions -- but they were all
190 reversible. */
191 return 0;
192 }
193
194 #endif
195
196
197 \f
198 /* The global lists of character sets and translations. */
199
200
201 #ifndef GDB_DEFAULT_TARGET_CHARSET
202 #define GDB_DEFAULT_TARGET_CHARSET "ISO-8859-1"
203 #endif
204
205 #ifndef GDB_DEFAULT_TARGET_WIDE_CHARSET
206 #define GDB_DEFAULT_TARGET_WIDE_CHARSET "UTF-32"
207 #endif
208
209 static const char *auto_host_charset_name = GDB_DEFAULT_HOST_CHARSET;
210 static const char *host_charset_name = "auto";
211 static void
212 show_host_charset_name (struct ui_file *file, int from_tty,
213 struct cmd_list_element *c,
214 const char *value)
215 {
216 if (!strcmp (value, "auto"))
217 fprintf_filtered (file,
218 _("The host character set is \"auto; currently %s\".\n"),
219 auto_host_charset_name);
220 else
221 fprintf_filtered (file, _("The host character set is \"%s\".\n"), value);
222 }
223
224 static const char *target_charset_name = "auto";
225 static void
226 show_target_charset_name (struct ui_file *file, int from_tty,
227 struct cmd_list_element *c, const char *value)
228 {
229 if (!strcmp (value, "auto"))
230 fprintf_filtered (file,
231 _("The target character set is \"auto; "
232 "currently %s\".\n"),
233 gdbarch_auto_charset (get_current_arch ()));
234 else
235 fprintf_filtered (file, _("The target character set is \"%s\".\n"),
236 value);
237 }
238
239 static const char *target_wide_charset_name = "auto";
240 static void
241 show_target_wide_charset_name (struct ui_file *file, int from_tty,
242 struct cmd_list_element *c, const char *value)
243 {
244 if (!strcmp (value, "auto"))
245 fprintf_filtered (file,
246 _("The target wide character set is \"auto; "
247 "currently %s\".\n"),
248 gdbarch_auto_wide_charset (get_current_arch ()));
249 else
250 fprintf_filtered (file, _("The target wide character set is \"%s\".\n"),
251 value);
252 }
253
254 static const char *default_charset_names[] =
255 {
256 DEFAULT_CHARSET_NAMES
257 0
258 };
259
260 static const char **charset_enum;
261
262 \f
263 /* If the target wide character set has big- or little-endian
264 variants, these are the corresponding names. */
265 static const char *target_wide_charset_be_name;
266 static const char *target_wide_charset_le_name;
267
268 /* The architecture for which the BE- and LE-names are valid. */
269 static struct gdbarch *be_le_arch;
270
271 /* A helper function which sets the target wide big- and little-endian
272 character set names, if possible. */
273
274 static void
275 set_be_le_names (struct gdbarch *gdbarch)
276 {
277 int i, len;
278 const char *target_wide;
279
280 if (be_le_arch == gdbarch)
281 return;
282 be_le_arch = gdbarch;
283
284 target_wide_charset_le_name = NULL;
285 target_wide_charset_be_name = NULL;
286
287 target_wide = target_wide_charset_name;
288 if (!strcmp (target_wide, "auto"))
289 target_wide = gdbarch_auto_wide_charset (gdbarch);
290
291 len = strlen (target_wide);
292 for (i = 0; charset_enum[i]; ++i)
293 {
294 if (strncmp (target_wide, charset_enum[i], len))
295 continue;
296 if ((charset_enum[i][len] == 'B'
297 || charset_enum[i][len] == 'L')
298 && charset_enum[i][len + 1] == 'E'
299 && charset_enum[i][len + 2] == '\0')
300 {
301 if (charset_enum[i][len] == 'B')
302 target_wide_charset_be_name = charset_enum[i];
303 else
304 target_wide_charset_le_name = charset_enum[i];
305 }
306 }
307 }
308
309 /* 'Set charset', 'set host-charset', 'set target-charset', 'set
310 target-wide-charset', 'set charset' sfunc's. */
311
312 static void
313 validate (struct gdbarch *gdbarch)
314 {
315 iconv_t desc;
316 const char *host_cset = host_charset ();
317 const char *target_cset = target_charset (gdbarch);
318 const char *target_wide_cset = target_wide_charset_name;
319
320 if (!strcmp (target_wide_cset, "auto"))
321 target_wide_cset = gdbarch_auto_wide_charset (gdbarch);
322
323 desc = iconv_open (target_wide_cset, host_cset);
324 if (desc == (iconv_t) -1)
325 error ("Cannot convert between character sets `%s' and `%s'",
326 target_wide_cset, host_cset);
327 iconv_close (desc);
328
329 desc = iconv_open (target_cset, host_cset);
330 if (desc == (iconv_t) -1)
331 error ("Cannot convert between character sets `%s' and `%s'",
332 target_cset, host_cset);
333 iconv_close (desc);
334
335 /* Clear the cache. */
336 be_le_arch = NULL;
337 }
338
339 /* This is the sfunc for the 'set charset' command. */
340 static void
341 set_charset_sfunc (char *charset, int from_tty, struct cmd_list_element *c)
342 {
343 /* CAREFUL: set the target charset here as well. */
344 target_charset_name = host_charset_name;
345 validate (get_current_arch ());
346 }
347
348 /* 'set host-charset' command sfunc. We need a wrapper here because
349 the function needs to have a specific signature. */
350 static void
351 set_host_charset_sfunc (char *charset, int from_tty,
352 struct cmd_list_element *c)
353 {
354 validate (get_current_arch ());
355 }
356
357 /* Wrapper for the 'set target-charset' command. */
358 static void
359 set_target_charset_sfunc (char *charset, int from_tty,
360 struct cmd_list_element *c)
361 {
362 validate (get_current_arch ());
363 }
364
365 /* Wrapper for the 'set target-wide-charset' command. */
366 static void
367 set_target_wide_charset_sfunc (char *charset, int from_tty,
368 struct cmd_list_element *c)
369 {
370 validate (get_current_arch ());
371 }
372
373 /* sfunc for the 'show charset' command. */
374 static void
375 show_charset (struct ui_file *file, int from_tty, struct cmd_list_element *c,
376 const char *name)
377 {
378 show_host_charset_name (file, from_tty, c, host_charset_name);
379 show_target_charset_name (file, from_tty, c, target_charset_name);
380 show_target_wide_charset_name (file, from_tty, c, target_wide_charset_name);
381 }
382
383 \f
384 /* Accessor functions. */
385
386 const char *
387 host_charset (void)
388 {
389 if (!strcmp (host_charset_name, "auto"))
390 return auto_host_charset_name;
391 return host_charset_name;
392 }
393
394 const char *
395 target_charset (struct gdbarch *gdbarch)
396 {
397 if (!strcmp (target_charset_name, "auto"))
398 return gdbarch_auto_charset (gdbarch);
399 return target_charset_name;
400 }
401
402 const char *
403 target_wide_charset (struct gdbarch *gdbarch)
404 {
405 enum bfd_endian byte_order = gdbarch_byte_order (gdbarch);
406
407 set_be_le_names (gdbarch);
408 if (byte_order == BFD_ENDIAN_BIG)
409 {
410 if (target_wide_charset_be_name)
411 return target_wide_charset_be_name;
412 }
413 else
414 {
415 if (target_wide_charset_le_name)
416 return target_wide_charset_le_name;
417 }
418
419 if (!strcmp (target_wide_charset_name, "auto"))
420 return gdbarch_auto_wide_charset (gdbarch);
421
422 return target_wide_charset_name;
423 }
424
425 \f
426 /* Host character set management. For the time being, we assume that
427 the host character set is some superset of ASCII. */
428
429 char
430 host_letter_to_control_character (char c)
431 {
432 if (c == '?')
433 return 0177;
434 return c & 0237;
435 }
436
437 /* Convert a host character, C, to its hex value. C must already have
438 been validated using isxdigit. */
439
440 int
441 host_hex_value (char c)
442 {
443 if (isdigit (c))
444 return c - '0';
445 if (c >= 'a' && c <= 'f')
446 return 10 + c - 'a';
447 gdb_assert (c >= 'A' && c <= 'F');
448 return 10 + c - 'A';
449 }
450
451 \f
452 /* Public character management functions. */
453
454 /* A cleanup function which is run to close an iconv descriptor. */
455
456 static void
457 cleanup_iconv (void *p)
458 {
459 iconv_t *descp = p;
460 iconv_close (*descp);
461 }
462
463 void
464 convert_between_encodings (const char *from, const char *to,
465 const gdb_byte *bytes, unsigned int num_bytes,
466 int width, struct obstack *output,
467 enum transliterations translit)
468 {
469 iconv_t desc;
470 struct cleanup *cleanups;
471 size_t inleft;
472 char *inp;
473 unsigned int space_request;
474
475 /* Often, the host and target charsets will be the same. */
476 if (!strcmp (from, to))
477 {
478 obstack_grow (output, bytes, num_bytes);
479 return;
480 }
481
482 desc = iconv_open (to, from);
483 if (desc == (iconv_t) -1)
484 perror_with_name ("Converting character sets");
485 cleanups = make_cleanup (cleanup_iconv, &desc);
486
487 inleft = num_bytes;
488 inp = (char *) bytes;
489
490 space_request = num_bytes;
491
492 while (inleft > 0)
493 {
494 char *outp;
495 size_t outleft, r;
496 int old_size;
497
498 old_size = obstack_object_size (output);
499 obstack_blank (output, space_request);
500
501 outp = obstack_base (output) + old_size;
502 outleft = space_request;
503
504 r = iconv (desc, (ICONV_CONST char **) &inp, &inleft, &outp, &outleft);
505
506 /* Now make sure that the object on the obstack only includes
507 bytes we have converted. */
508 obstack_blank (output, - (int) outleft);
509
510 if (r == (size_t) -1)
511 {
512 switch (errno)
513 {
514 case EILSEQ:
515 {
516 int i;
517
518 /* Invalid input sequence. */
519 if (translit == translit_none)
520 error (_("Could not convert character to `%s' character set"),
521 to);
522
523 /* We emit escape sequence for the bytes, skip them,
524 and try again. */
525 for (i = 0; i < width; ++i)
526 {
527 char octal[5];
528
529 sprintf (octal, "\\%.3o", *inp & 0xff);
530 obstack_grow_str (output, octal);
531
532 ++inp;
533 --inleft;
534 }
535 }
536 break;
537
538 case E2BIG:
539 /* We ran out of space in the output buffer. Make it
540 bigger next time around. */
541 space_request *= 2;
542 break;
543
544 case EINVAL:
545 /* Incomplete input sequence. FIXME: ought to report this
546 to the caller somehow. */
547 inleft = 0;
548 break;
549
550 default:
551 perror_with_name ("Internal error while converting character sets");
552 }
553 }
554 }
555
556 do_cleanups (cleanups);
557 }
558
559 \f
560
561 /* An iterator that returns host wchar_t's from a target string. */
562 struct wchar_iterator
563 {
564 /* The underlying iconv descriptor. */
565 iconv_t desc;
566
567 /* The input string. This is updated as convert characters. */
568 char *input;
569 /* The number of bytes remaining in the input. */
570 size_t bytes;
571
572 /* The width of an input character. */
573 size_t width;
574
575 /* The output buffer and its size. */
576 gdb_wchar_t *out;
577 size_t out_size;
578 };
579
580 /* Create a new iterator. */
581 struct wchar_iterator *
582 make_wchar_iterator (const gdb_byte *input, size_t bytes, const char *charset,
583 size_t width)
584 {
585 struct wchar_iterator *result;
586 iconv_t desc;
587
588 desc = iconv_open (INTERMEDIATE_ENCODING, charset);
589 if (desc == (iconv_t) -1)
590 perror_with_name ("Converting character sets");
591
592 result = XNEW (struct wchar_iterator);
593 result->desc = desc;
594 result->input = (char *) input;
595 result->bytes = bytes;
596 result->width = width;
597
598 result->out = XNEW (gdb_wchar_t);
599 result->out_size = 1;
600
601 return result;
602 }
603
604 static void
605 do_cleanup_iterator (void *p)
606 {
607 struct wchar_iterator *iter = p;
608
609 iconv_close (iter->desc);
610 xfree (iter->out);
611 xfree (iter);
612 }
613
614 struct cleanup *
615 make_cleanup_wchar_iterator (struct wchar_iterator *iter)
616 {
617 return make_cleanup (do_cleanup_iterator, iter);
618 }
619
620 int
621 wchar_iterate (struct wchar_iterator *iter,
622 enum wchar_iterate_result *out_result,
623 gdb_wchar_t **out_chars,
624 const gdb_byte **ptr,
625 size_t *len)
626 {
627 size_t out_request;
628
629 /* Try to convert some characters. At first we try to convert just
630 a single character. The reason for this is that iconv does not
631 necessarily update its outgoing arguments when it encounters an
632 invalid input sequence -- but we want to reliably report this to
633 our caller so it can emit an escape sequence. */
634 out_request = 1;
635 while (iter->bytes > 0)
636 {
637 char *outptr = (char *) &iter->out[0];
638 char *orig_inptr = iter->input;
639 size_t orig_in = iter->bytes;
640 size_t out_avail = out_request * sizeof (gdb_wchar_t);
641 size_t num;
642 size_t r = iconv (iter->desc,
643 (ICONV_CONST char **) &iter->input, &iter->bytes,
644 &outptr, &out_avail);
645
646 if (r == (size_t) -1)
647 {
648 switch (errno)
649 {
650 case EILSEQ:
651 /* Invalid input sequence. Skip it, and let the caller
652 know about it. */
653 *out_result = wchar_iterate_invalid;
654 *ptr = iter->input;
655 *len = iter->width;
656 iter->input += iter->width;
657 iter->bytes -= iter->width;
658 return 0;
659
660 case E2BIG:
661 /* We ran out of space. We still might have converted a
662 character; if so, return it. Otherwise, grow the
663 buffer and try again. */
664 if (out_avail < out_request * sizeof (gdb_wchar_t))
665 break;
666
667 ++out_request;
668 if (out_request > iter->out_size)
669 {
670 iter->out_size = out_request;
671 iter->out = xrealloc (iter->out,
672 out_request * sizeof (gdb_wchar_t));
673 }
674 continue;
675
676 case EINVAL:
677 /* Incomplete input sequence. Let the caller know, and
678 arrange for future calls to see EOF. */
679 *out_result = wchar_iterate_incomplete;
680 *ptr = iter->input;
681 *len = iter->bytes;
682 iter->bytes = 0;
683 return 0;
684
685 default:
686 perror_with_name ("Internal error while converting character sets");
687 }
688 }
689
690 /* We converted something. */
691 num = out_request - out_avail / sizeof (gdb_wchar_t);
692 *out_result = wchar_iterate_ok;
693 *out_chars = iter->out;
694 *ptr = orig_inptr;
695 *len = orig_in - iter->bytes;
696 return num;
697 }
698
699 /* Really done. */
700 *out_result = wchar_iterate_eof;
701 return -1;
702 }
703
704 \f
705 /* The charset.c module initialization function. */
706
707 extern initialize_file_ftype _initialize_charset; /* -Wmissing-prototype */
708
709 DEF_VEC_P (char_ptr);
710
711 static VEC (char_ptr) *charsets;
712
713 #ifdef PHONY_ICONV
714
715 static void
716 find_charset_names (void)
717 {
718 VEC_safe_push (char_ptr, charsets, GDB_DEFAULT_HOST_CHARSET);
719 VEC_safe_push (char_ptr, charsets, NULL);
720 }
721
722 #else /* PHONY_ICONV */
723
724 /* Sometimes, libiconv redefines iconvlist as libiconvlist -- but
725 provides different symbols in the static and dynamic libraries.
726 So, configure may see libiconvlist but not iconvlist. But, calling
727 iconvlist is the right thing to do and will work. Hence we do a
728 check here but unconditionally call iconvlist below. */
729 #if defined (HAVE_ICONVLIST) || defined (HAVE_LIBICONVLIST)
730
731 /* A helper function that adds some character sets to the vector of
732 all character sets. This is a callback function for iconvlist. */
733
734 static int
735 add_one (unsigned int count, const char *const *names, void *data)
736 {
737 unsigned int i;
738
739 for (i = 0; i < count; ++i)
740 VEC_safe_push (char_ptr, charsets, xstrdup (names[i]));
741
742 return 0;
743 }
744
745 static void
746 find_charset_names (void)
747 {
748 iconvlist (add_one, NULL);
749 VEC_safe_push (char_ptr, charsets, NULL);
750 }
751
752 #else
753
754 /* Return non-zero if LINE (output from iconv) should be ignored.
755 Older iconv programs (e.g. 2.2.2) include the human readable
756 introduction even when stdout is not a tty. Newer versions omit
757 the intro if stdout is not a tty. */
758
759 static int
760 ignore_line_p (const char *line)
761 {
762 /* This table is used to filter the output. If this text appears
763 anywhere in the line, it is ignored (strstr is used). */
764 static const char * const ignore_lines[] =
765 {
766 "The following",
767 "not necessarily",
768 "the FROM and TO",
769 "listed with several",
770 NULL
771 };
772 int i;
773
774 for (i = 0; ignore_lines[i] != NULL; ++i)
775 {
776 if (strstr (line, ignore_lines[i]) != NULL)
777 return 1;
778 }
779
780 return 0;
781 }
782
783 static void
784 find_charset_names (void)
785 {
786 struct pex_obj *child;
787 char *args[3];
788 int err, status;
789 int fail = 1;
790 struct gdb_environ *iconv_env;
791
792 /* Older iconvs, e.g. 2.2.2, don't omit the intro text if stdout is not
793 a tty. We need to recognize it and ignore it. This text is subject
794 to translation, so force LANGUAGE=C. */
795 iconv_env = make_environ ();
796 init_environ (iconv_env);
797 set_in_environ (iconv_env, "LANGUAGE", "C");
798 set_in_environ (iconv_env, "LC_ALL", "C");
799
800 child = pex_init (PEX_USE_PIPES, "iconv", NULL);
801
802 args[0] = "iconv";
803 args[1] = "-l";
804 args[2] = NULL;
805 /* Note that we simply ignore errors here. */
806 if (!pex_run_in_environment (child, PEX_SEARCH | PEX_STDERR_TO_STDOUT,
807 "iconv", args, environ_vector (iconv_env),
808 NULL, NULL, &err))
809 {
810 FILE *in = pex_read_output (child, 0);
811
812 /* POSIX says that iconv -l uses an unspecified format. We
813 parse the glibc and libiconv formats; feel free to add others
814 as needed. */
815
816 while (!feof (in))
817 {
818 /* The size of buf is chosen arbitrarily. */
819 char buf[1024];
820 char *start, *r;
821 int len;
822
823 r = fgets (buf, sizeof (buf), in);
824 if (!r)
825 break;
826 len = strlen (r);
827 if (len <= 3)
828 continue;
829 if (ignore_line_p (r))
830 continue;
831
832 /* Strip off the newline. */
833 --len;
834 /* Strip off one or two '/'s. glibc will print lines like
835 "8859_7//", but also "10646-1:1993/UCS4/". */
836 if (buf[len - 1] == '/')
837 --len;
838 if (buf[len - 1] == '/')
839 --len;
840 buf[len] = '\0';
841
842 /* libiconv will print multiple entries per line, separated
843 by spaces. Older iconvs will print multiple entries per line,
844 indented by two spaces, and separated by ", "
845 (i.e. the human readable form). */
846 start = buf;
847 while (1)
848 {
849 int keep_going;
850 char *p;
851
852 /* Skip leading blanks. */
853 for (p = start; *p && *p == ' '; ++p)
854 ;
855 start = p;
856 /* Find the next space, comma, or end-of-line. */
857 for ( ; *p && *p != ' ' && *p != ','; ++p)
858 ;
859 /* Ignore an empty result. */
860 if (p == start)
861 break;
862 keep_going = *p;
863 *p = '\0';
864 VEC_safe_push (char_ptr, charsets, xstrdup (start));
865 if (!keep_going)
866 break;
867 /* Skip any extra spaces. */
868 for (start = p + 1; *start && *start == ' '; ++start)
869 ;
870 }
871 }
872
873 if (pex_get_status (child, 1, &status)
874 && WIFEXITED (status) && !WEXITSTATUS (status))
875 fail = 0;
876
877 }
878
879 pex_free (child);
880 free_environ (iconv_env);
881
882 if (fail)
883 {
884 /* Some error occurred, so drop the vector. */
885 int ix;
886 char *elt;
887 for (ix = 0; VEC_iterate (char_ptr, charsets, ix, elt); ++ix)
888 xfree (elt);
889 VEC_truncate (char_ptr, charsets, 0);
890 }
891 else
892 VEC_safe_push (char_ptr, charsets, NULL);
893 }
894
895 #endif /* HAVE_ICONVLIST || HAVE_LIBICONVLIST */
896 #endif /* PHONY_ICONV */
897
898 /* The "auto" target charset used by default_auto_charset. */
899 static const char *auto_target_charset_name = GDB_DEFAULT_TARGET_CHARSET;
900
901 const char *
902 default_auto_charset (void)
903 {
904 return auto_target_charset_name;
905 }
906
907 const char *
908 default_auto_wide_charset (void)
909 {
910 return GDB_DEFAULT_TARGET_WIDE_CHARSET;
911 }
912
913 void
914 _initialize_charset (void)
915 {
916 /* The first element is always "auto". */
917 VEC_safe_push (char_ptr, charsets, xstrdup ("auto"));
918 find_charset_names ();
919
920 if (VEC_length (char_ptr, charsets) > 1)
921 charset_enum = (const char **) VEC_address (char_ptr, charsets);
922 else
923 charset_enum = default_charset_names;
924
925 #ifndef PHONY_ICONV
926 #ifdef HAVE_LANGINFO_CODESET
927 /* The result of nl_langinfo may be overwritten later. This may
928 leak a little memory, if the user later changes the host charset,
929 but that doesn't matter much. */
930 auto_host_charset_name = xstrdup (nl_langinfo (CODESET));
931 /* Solaris will return `646' here -- but the Solaris iconv then
932 does not accept this. Darwin (and maybe FreeBSD) may return "" here,
933 which GNU libiconv doesn't like (infinite loop). */
934 if (!strcmp (auto_host_charset_name, "646") || !*auto_host_charset_name)
935 auto_host_charset_name = "ASCII";
936 auto_target_charset_name = auto_host_charset_name;
937 #elif defined (USE_WIN32API)
938 {
939 static char w32_host_default_charset[16]; /* "CP" + x<=5 digits + paranoia. */
940
941 snprintf (w32_host_default_charset, sizeof w32_host_default_charset,
942 "CP%d", GetACP());
943 auto_host_charset_name = w32_host_default_charset;
944 auto_target_charset_name = auto_host_charset_name;
945 }
946 #endif
947 #endif
948
949 add_setshow_enum_cmd ("charset", class_support,
950 charset_enum, &host_charset_name, _("\
951 Set the host and target character sets."), _("\
952 Show the host and target character sets."), _("\
953 The `host character set' is the one used by the system GDB is running on.\n\
954 The `target character set' is the one used by the program being debugged.\n\
955 You may only use supersets of ASCII for your host character set; GDB does\n\
956 not support any others.\n\
957 To see a list of the character sets GDB supports, type `set charset <TAB>'."),
958 /* Note that the sfunc below needs to set
959 target_charset_name, because the 'set
960 charset' command sets two variables. */
961 set_charset_sfunc,
962 show_charset,
963 &setlist, &showlist);
964
965 add_setshow_enum_cmd ("host-charset", class_support,
966 charset_enum, &host_charset_name, _("\
967 Set the host character set."), _("\
968 Show the host character set."), _("\
969 The `host character set' is the one used by the system GDB is running on.\n\
970 You may only use supersets of ASCII for your host character set; GDB does\n\
971 not support any others.\n\
972 To see a list of the character sets GDB supports, type `set host-charset <TAB>'."),
973 set_host_charset_sfunc,
974 show_host_charset_name,
975 &setlist, &showlist);
976
977 add_setshow_enum_cmd ("target-charset", class_support,
978 charset_enum, &target_charset_name, _("\
979 Set the target character set."), _("\
980 Show the target character set."), _("\
981 The `target character set' is the one used by the program being debugged.\n\
982 GDB translates characters and strings between the host and target\n\
983 character sets as needed.\n\
984 To see a list of the character sets GDB supports, type `set target-charset'<TAB>"),
985 set_target_charset_sfunc,
986 show_target_charset_name,
987 &setlist, &showlist);
988
989 add_setshow_enum_cmd ("target-wide-charset", class_support,
990 charset_enum, &target_wide_charset_name,
991 _("\
992 Set the target wide character set."), _("\
993 Show the target wide character set."), _("\
994 The `target wide character set' is the one used by the program being debugged.\n\
995 In particular it is the encoding used by `wchar_t'.\n\
996 GDB translates characters and strings between the host and target\n\
997 character sets as needed.\n\
998 To see a list of the character sets GDB supports, type\n\
999 `set target-wide-charset'<TAB>"),
1000 set_target_wide_charset_sfunc,
1001 show_target_wide_charset_name,
1002 &setlist, &showlist);
1003 }
This page took 0.066294 seconds and 4 git commands to generate.