X-Git-Url: http://git.efficios.com/?a=blobdiff_plain;f=gdb%2Fcharset.c;h=a93fe99c41fa78a54550548e346966d73c474480;hb=d0801dd8f22a3e739c6a7d126d45829df981794d;hp=8ef75f8d87c3062b6f4420c18e05162bb7c03f88;hpb=40b5c9fb4b794ff4337707edbd74af035bdbb0c2;p=deliverable%2Fbinutils-gdb.git diff --git a/gdb/charset.c b/gdb/charset.c index 8ef75f8d87..a93fe99c41 100644 --- a/gdb/charset.c +++ b/gdb/charset.c @@ -1,6 +1,6 @@ /* Character set conversion support for GDB. - Copyright (C) 2001, 2003, 2007, 2008, 2009 Free Software Foundation, Inc. + Copyright (C) 2001-2019 Free Software Foundation, Inc. This file is part of GDB. @@ -20,17 +20,17 @@ #include "defs.h" #include "charset.h" #include "gdbcmd.h" -#include "gdb_assert.h" #include "gdb_obstack.h" -#include "gdb_wait.h" +#include "gdbsupport/gdb_wait.h" #include "charset-list.h" -#include "vec.h" -#include "environ.h" - -#include -#include "gdb_string.h" +#include "gdbsupport/environ.h" +#include "arch-utils.h" +#include "gdbsupport/gdb_vecs.h" #include +#ifdef USE_WIN32API +#include +#endif /* How GDB's character set support works @@ -76,83 +76,90 @@ arrange for there to be a single available character set. */ #undef GDB_DEFAULT_HOST_CHARSET -#define GDB_DEFAULT_HOST_CHARSET "ISO-8859-1" -#define GDB_DEFAULT_TARGET_CHARSET "ISO-8859-1" -#define GDB_DEFAULT_TARGET_WIDE_CHARSET "ISO-8859-1" +#ifdef USE_WIN32API +# define GDB_DEFAULT_HOST_CHARSET "CP1252" +#else +# define GDB_DEFAULT_HOST_CHARSET "ISO-8859-1" +#endif +#define GDB_DEFAULT_TARGET_CHARSET GDB_DEFAULT_HOST_CHARSET +#define GDB_DEFAULT_TARGET_WIDE_CHARSET "UTF-32" #undef DEFAULT_CHARSET_NAMES #define DEFAULT_CHARSET_NAMES GDB_DEFAULT_HOST_CHARSET , #undef iconv_t #define iconv_t int #undef iconv_open +#define iconv_open phony_iconv_open #undef iconv +#define iconv phony_iconv #undef iconv_close +#define iconv_close phony_iconv_close #undef ICONV_CONST #define ICONV_CONST const -/* Some systems don't have EILSEQ, so we define it here, but not as - EINVAL, because callers of `iconv' want to distinguish EINVAL and - EILSEQ. This is what iconv.h from libiconv does as well. Note - that wchar.h may also define EILSEQ, so this needs to be after we - include wchar.h, which happens in defs.h through gdb_wchar.h. */ -#ifndef EILSEQ -#define EILSEQ ENOENT -#endif +/* We allow conversions from UTF-32, wchar_t, and the host charset. + We allow conversions to wchar_t and the host charset. + Return 1 if we are converting from UTF-32BE, 2 if from UTF32-LE, + 0 otherwise. This is used as a flag in calls to iconv. */ -iconv_t -iconv_open (const char *to, const char *from) +static iconv_t +phony_iconv_open (const char *to, const char *from) { - /* We allow conversions from UTF-32BE, wchar_t, and the host charset. - We allow conversions to wchar_t and the host charset. */ - if (strcmp (from, "UTF-32BE") && strcmp (from, "wchar_t") - && strcmp (from, GDB_DEFAULT_HOST_CHARSET)) - return -1; if (strcmp (to, "wchar_t") && strcmp (to, GDB_DEFAULT_HOST_CHARSET)) return -1; - /* Return 1 if we are converting from UTF-32BE, 0 otherwise. This is - used as a flag in calls to iconv. */ - return !strcmp (from, "UTF-32BE"); + if (!strcmp (from, "UTF-32BE") || !strcmp (from, "UTF-32")) + return 1; + + if (!strcmp (from, "UTF-32LE")) + return 2; + + if (strcmp (from, "wchar_t") && strcmp (from, GDB_DEFAULT_HOST_CHARSET)) + return -1; + + return 0; } -int -iconv_close (iconv_t arg) +static int +phony_iconv_close (iconv_t arg) { return 0; } -size_t -iconv (iconv_t utf_flag, const char **inbuf, size_t *inbytesleft, - char **outbuf, size_t *outbytesleft) +static size_t +phony_iconv (iconv_t utf_flag, const char **inbuf, size_t *inbytesleft, + char **outbuf, size_t *outbytesleft) { if (utf_flag) { + enum bfd_endian endian + = utf_flag == 1 ? BFD_ENDIAN_BIG : BFD_ENDIAN_LITTLE; while (*inbytesleft >= 4) { - size_t j; - unsigned long c = 0; - - for (j = 0; j < 4; ++j) - { - c <<= 8; - c += (*inbuf)[j] & 0xff; - } + unsigned long c + = extract_unsigned_integer ((const gdb_byte *)*inbuf, 4, endian); if (c >= 256) { errno = EILSEQ; return -1; } + if (*outbytesleft < 1) + { + errno = E2BIG; + return -1; + } **outbuf = c & 0xff; ++*outbuf; --*outbytesleft; - ++*inbuf; + *inbuf += 4; *inbytesleft -= 4; } - if (*inbytesleft < 4) + if (*inbytesleft) { + /* Partial sequence on input. */ errno = EINVAL; return -1; } @@ -162,6 +169,7 @@ iconv (iconv_t utf_flag, const char **inbuf, size_t *inbytesleft, /* In all other cases we simply copy input bytes to the output. */ size_t amt = *inbytesleft; + if (amt > *outbytesleft) amt = *outbytesleft; memcpy (*outbuf, *inbuf, amt); @@ -169,12 +177,11 @@ iconv (iconv_t utf_flag, const char **inbuf, size_t *inbytesleft, *outbuf += amt; *inbytesleft -= amt; *outbytesleft -= amt; - } - - if (*inbytesleft) - { - errno = E2BIG; - return -1; + if (*inbytesleft) + { + errno = E2BIG; + return -1; + } } /* The number of non-reversible conversions -- but they were all @@ -182,8 +189,28 @@ iconv (iconv_t utf_flag, const char **inbuf, size_t *inbytesleft, return 0; } -#endif +#else /* PHONY_ICONV */ +/* On systems that don't have EILSEQ, GNU iconv's iconv.h defines it + to ENOENT, while gnulib defines it to a different value. Always + map ENOENT to gnulib's EILSEQ, leaving callers agnostic. */ + +static size_t +gdb_iconv (iconv_t utf_flag, ICONV_CONST char **inbuf, size_t *inbytesleft, + char **outbuf, size_t *outbytesleft) +{ + size_t ret; + + ret = iconv (utf_flag, inbuf, inbytesleft, outbuf, outbytesleft); + if (errno == ENOENT) + errno = EILSEQ; + return ret; +} + +#undef iconv +#define iconv gdb_iconv + +#endif /* PHONY_ICONV */ /* The global lists of character sets and translations. */ @@ -212,22 +239,36 @@ show_host_charset_name (struct ui_file *file, int from_tty, fprintf_filtered (file, _("The host character set is \"%s\".\n"), value); } -static const char *target_charset_name = GDB_DEFAULT_TARGET_CHARSET; +static const char *target_charset_name = "auto"; static void show_target_charset_name (struct ui_file *file, int from_tty, struct cmd_list_element *c, const char *value) { - fprintf_filtered (file, _("The target character set is \"%s\".\n"), - value); + if (!strcmp (value, "auto")) + fprintf_filtered (file, + _("The target character set is \"auto; " + "currently %s\".\n"), + gdbarch_auto_charset (get_current_arch ())); + else + fprintf_filtered (file, _("The target character set is \"%s\".\n"), + value); } -static const char *target_wide_charset_name = GDB_DEFAULT_TARGET_WIDE_CHARSET; +static const char *target_wide_charset_name = "auto"; static void -show_target_wide_charset_name (struct ui_file *file, int from_tty, - struct cmd_list_element *c, const char *value) +show_target_wide_charset_name (struct ui_file *file, + int from_tty, + struct cmd_list_element *c, + const char *value) { - fprintf_filtered (file, _("The target wide character set is \"%s\".\n"), - value); + if (!strcmp (value, "auto")) + fprintf_filtered (file, + _("The target wide character set is \"auto; " + "currently %s\".\n"), + gdbarch_auto_wide_charset (get_current_arch ())); + else + fprintf_filtered (file, _("The target wide character set is \"%s\".\n"), + value); } static const char *default_charset_names[] = @@ -244,21 +285,38 @@ static const char **charset_enum; static const char *target_wide_charset_be_name; static const char *target_wide_charset_le_name; -/* A helper function for validate which sets the target wide big- and - little-endian character set names, if possible. */ +/* The architecture for which the BE- and LE-names are valid. */ +static struct gdbarch *be_le_arch; + +/* A helper function which sets the target wide big- and little-endian + character set names, if possible. */ static void -set_be_le_names (void) +set_be_le_names (struct gdbarch *gdbarch) { + if (be_le_arch == gdbarch) + return; + be_le_arch = gdbarch; + +#ifdef PHONY_ICONV + /* Match the wide charset names recognized by phony_iconv_open. */ + target_wide_charset_le_name = "UTF-32LE"; + target_wide_charset_be_name = "UTF-32BE"; +#else int i, len; + const char *target_wide; target_wide_charset_le_name = NULL; target_wide_charset_be_name = NULL; - len = strlen (target_wide_charset_name); + target_wide = target_wide_charset_name; + if (!strcmp (target_wide, "auto")) + target_wide = gdbarch_auto_wide_charset (gdbarch); + + len = strlen (target_wide); for (i = 0; charset_enum[i]; ++i) { - if (strncmp (target_wide_charset_name, charset_enum[i], len)) + if (strncmp (target_wide, charset_enum[i], len)) continue; if ((charset_enum[i][len] == 'B' || charset_enum[i][len] == 'L') @@ -271,74 +329,84 @@ set_be_le_names (void) target_wide_charset_le_name = charset_enum[i]; } } +# endif /* PHONY_ICONV */ } /* 'Set charset', 'set host-charset', 'set target-charset', 'set target-wide-charset', 'set charset' sfunc's. */ static void -validate (void) +validate (struct gdbarch *gdbarch) { iconv_t desc; const char *host_cset = host_charset (); + const char *target_cset = target_charset (gdbarch); + const char *target_wide_cset = target_wide_charset_name; + + if (!strcmp (target_wide_cset, "auto")) + target_wide_cset = gdbarch_auto_wide_charset (gdbarch); - desc = iconv_open (target_wide_charset_name, host_cset); + desc = iconv_open (target_wide_cset, host_cset); if (desc == (iconv_t) -1) - error ("Cannot convert between character sets `%s' and `%s'", - target_wide_charset_name, host_cset); + error (_("Cannot convert between character sets `%s' and `%s'"), + target_wide_cset, host_cset); iconv_close (desc); - desc = iconv_open (target_charset_name, host_cset); + desc = iconv_open (target_cset, host_cset); if (desc == (iconv_t) -1) - error ("Cannot convert between character sets `%s' and `%s'", - target_charset_name, host_cset); + error (_("Cannot convert between character sets `%s' and `%s'"), + target_cset, host_cset); iconv_close (desc); - set_be_le_names (); + /* Clear the cache. */ + be_le_arch = NULL; } /* This is the sfunc for the 'set charset' command. */ static void -set_charset_sfunc (char *charset, int from_tty, struct cmd_list_element *c) +set_charset_sfunc (const char *charset, int from_tty, + struct cmd_list_element *c) { - /* CAREFUL: set the target charset here as well. */ + /* CAREFUL: set the target charset here as well. */ target_charset_name = host_charset_name; - validate (); + validate (get_current_arch ()); } /* 'set host-charset' command sfunc. We need a wrapper here because the function needs to have a specific signature. */ static void -set_host_charset_sfunc (char *charset, int from_tty, +set_host_charset_sfunc (const char *charset, int from_tty, struct cmd_list_element *c) { - validate (); + validate (get_current_arch ()); } /* Wrapper for the 'set target-charset' command. */ static void -set_target_charset_sfunc (char *charset, int from_tty, +set_target_charset_sfunc (const char *charset, int from_tty, struct cmd_list_element *c) { - validate (); + validate (get_current_arch ()); } /* Wrapper for the 'set target-wide-charset' command. */ static void -set_target_wide_charset_sfunc (char *charset, int from_tty, +set_target_wide_charset_sfunc (const char *charset, int from_tty, struct cmd_list_element *c) { - validate (); + validate (get_current_arch ()); } /* sfunc for the 'show charset' command. */ static void -show_charset (struct ui_file *file, int from_tty, struct cmd_list_element *c, +show_charset (struct ui_file *file, int from_tty, + struct cmd_list_element *c, const char *name) { show_host_charset_name (file, from_tty, c, host_charset_name); show_target_charset_name (file, from_tty, c, target_charset_name); - show_target_wide_charset_name (file, from_tty, c, target_wide_charset_name); + show_target_wide_charset_name (file, from_tty, c, + target_wide_charset_name); } @@ -353,14 +421,19 @@ host_charset (void) } const char * -target_charset (void) +target_charset (struct gdbarch *gdbarch) { + if (!strcmp (target_charset_name, "auto")) + return gdbarch_auto_charset (gdbarch); return target_charset_name; } const char * -target_wide_charset (enum bfd_endian byte_order) +target_wide_charset (struct gdbarch *gdbarch) { + enum bfd_endian byte_order = gdbarch_byte_order (gdbarch); + + set_be_le_names (gdbarch); if (byte_order == BFD_ENDIAN_BIG) { if (target_wide_charset_be_name) @@ -372,6 +445,9 @@ target_wide_charset (enum bfd_endian byte_order) return target_wide_charset_le_name; } + if (!strcmp (target_wide_charset_name, "auto")) + return gdbarch_auto_wide_charset (gdbarch); + return target_wide_charset_name; } @@ -404,14 +480,32 @@ host_hex_value (char c) /* Public character management functions. */ -/* A cleanup function which is run to close an iconv descriptor. */ - -static void -cleanup_iconv (void *p) +class iconv_wrapper { - iconv_t *descp = p; - iconv_close (*descp); -} +public: + + iconv_wrapper (const char *to, const char *from) + { + m_desc = iconv_open (to, from); + if (m_desc == (iconv_t) -1) + perror_with_name (_("Converting character sets")); + } + + ~iconv_wrapper () + { + iconv_close (m_desc); + } + + size_t convert (ICONV_CONST char **inp, size_t *inleft, char **outp, + size_t *outleft) + { + return iconv (m_desc, inp, inleft, outp, outleft); + } + +private: + + iconv_t m_desc; +}; void convert_between_encodings (const char *from, const char *to, @@ -419,10 +513,8 @@ convert_between_encodings (const char *from, const char *to, int width, struct obstack *output, enum transliterations translit) { - iconv_t desc; - struct cleanup *cleanups; size_t inleft; - char *inp; + ICONV_CONST char *inp; unsigned int space_request; /* Often, the host and target charsets will be the same. */ @@ -432,13 +524,10 @@ convert_between_encodings (const char *from, const char *to, return; } - desc = iconv_open (to, from); - if (desc == (iconv_t) -1) - perror_with_name ("Converting character sets"); - cleanups = make_cleanup (cleanup_iconv, &desc); + iconv_wrapper desc (to, from); inleft = num_bytes; - inp = (char *) bytes; + inp = (ICONV_CONST char *) bytes; space_request = num_bytes; @@ -451,14 +540,14 @@ convert_between_encodings (const char *from, const char *to, old_size = obstack_object_size (output); obstack_blank (output, space_request); - outp = obstack_base (output) + old_size; + outp = (char *) obstack_base (output) + old_size; outleft = space_request; - r = iconv (desc, (ICONV_CONST char **) &inp, &inleft, &outp, &outleft); + r = desc.convert (&inp, &inleft, &outp, &outleft); /* Now make sure that the object on the obstack only includes bytes we have converted. */ - obstack_blank (output, - (int) outleft); + obstack_blank_fast (output, -(ssize_t) outleft); if (r == (size_t) -1) { @@ -470,8 +559,8 @@ convert_between_encodings (const char *from, const char *to, /* Invalid input sequence. */ if (translit == translit_none) - error (_("Could not convert character to `%s' character set"), - to); + error (_("Could not convert character " + "to `%s' character set"), to); /* We emit escape sequence for the bytes, skip them, and try again. */ @@ -479,7 +568,7 @@ convert_between_encodings (const char *from, const char *to, { char octal[5]; - sprintf (octal, "\\%.3o", *inp & 0xff); + xsnprintf (octal, sizeof (octal), "\\%.3o", *inp & 0xff); obstack_grow_str (output, octal); ++inp; @@ -501,81 +590,39 @@ convert_between_encodings (const char *from, const char *to, break; default: - perror_with_name ("Internal error while converting character sets"); + perror_with_name (_("Internal error while " + "converting character sets")); } } } - - do_cleanups (cleanups); } -/* An iterator that returns host wchar_t's from a target string. */ -struct wchar_iterator -{ - /* The underlying iconv descriptor. */ - iconv_t desc; - - /* The input string. This is updated as convert characters. */ - char *input; - /* The number of bytes remaining in the input. */ - size_t bytes; - - /* The width of an input character. */ - size_t width; - - /* The output buffer and its size. */ - gdb_wchar_t *out; - size_t out_size; -}; - /* Create a new iterator. */ -struct wchar_iterator * -make_wchar_iterator (const gdb_byte *input, size_t bytes, const char *charset, - size_t width) +wchar_iterator::wchar_iterator (const gdb_byte *input, size_t bytes, + const char *charset, size_t width) +: m_input (input), + m_bytes (bytes), + m_width (width), + m_out (1) { - struct wchar_iterator *result; - iconv_t desc; - - desc = iconv_open (INTERMEDIATE_ENCODING, charset); - if (desc == (iconv_t) -1) - perror_with_name ("Converting character sets"); - - result = XNEW (struct wchar_iterator); - result->desc = desc; - result->input = (char *) input; - result->bytes = bytes; - result->width = width; - - result->out = XNEW (gdb_wchar_t); - result->out_size = 1; - - return result; + m_desc = iconv_open (INTERMEDIATE_ENCODING, charset); + if (m_desc == (iconv_t) -1) + perror_with_name (_("Converting character sets")); } -static void -do_cleanup_iterator (void *p) +wchar_iterator::~wchar_iterator () { - struct wchar_iterator *iter = p; - - iconv_close (iter->desc); - xfree (iter->out); - xfree (iter); -} - -struct cleanup * -make_cleanup_wchar_iterator (struct wchar_iterator *iter) -{ - return make_cleanup (do_cleanup_iterator, iter); + if (m_desc != (iconv_t) -1) + iconv_close (m_desc); } int -wchar_iterate (struct wchar_iterator *iter, - enum wchar_iterate_result *out_result, - gdb_wchar_t **out_chars, - const gdb_byte **ptr, - size_t *len) +wchar_iterator::iterate (enum wchar_iterate_result *out_result, + gdb_wchar_t **out_chars, + const gdb_byte **ptr, + size_t *len) { size_t out_request; @@ -585,30 +632,35 @@ wchar_iterate (struct wchar_iterator *iter, invalid input sequence -- but we want to reliably report this to our caller so it can emit an escape sequence. */ out_request = 1; - while (iter->bytes > 0) + while (m_bytes > 0) { - char *outptr = (char *) &iter->out[0]; - char *orig_inptr = iter->input; - size_t orig_in = iter->bytes; + ICONV_CONST char *inptr = (ICONV_CONST char *) m_input; + char *outptr = (char *) m_out.data (); + const gdb_byte *orig_inptr = m_input; + size_t orig_in = m_bytes; size_t out_avail = out_request * sizeof (gdb_wchar_t); size_t num; - gdb_wchar_t result; + size_t r = iconv (m_desc, &inptr, &m_bytes, &outptr, &out_avail); + + m_input = (gdb_byte *) inptr; - size_t r = iconv (iter->desc, - (ICONV_CONST char **) &iter->input, &iter->bytes, - &outptr, &out_avail); if (r == (size_t) -1) { switch (errno) { case EILSEQ: - /* Invalid input sequence. Skip it, and let the caller - know about it. */ + /* Invalid input sequence. We still might have + converted a character; if so, return it. */ + if (out_avail < out_request * sizeof (gdb_wchar_t)) + break; + + /* Otherwise skip the first invalid character, and let + the caller know about it. */ *out_result = wchar_iterate_invalid; - *ptr = iter->input; - *len = iter->width; - iter->input += iter->width; - iter->bytes -= iter->width; + *ptr = m_input; + *len = m_width; + m_input += m_width; + m_bytes -= m_width; return 0; case E2BIG: @@ -619,34 +671,31 @@ wchar_iterate (struct wchar_iterator *iter, break; ++out_request; - if (out_request > iter->out_size) - { - iter->out_size = out_request; - iter->out = xrealloc (iter->out, - out_request * sizeof (gdb_wchar_t)); - } + if (out_request > m_out.size ()) + m_out.resize (out_request); continue; case EINVAL: /* Incomplete input sequence. Let the caller know, and arrange for future calls to see EOF. */ *out_result = wchar_iterate_incomplete; - *ptr = iter->input; - *len = iter->bytes; - iter->bytes = 0; + *ptr = m_input; + *len = m_bytes; + m_bytes = 0; return 0; default: - perror_with_name ("Internal error while converting character sets"); + perror_with_name (_("Internal error while " + "converting character sets")); } } /* We converted something. */ num = out_request - out_avail / sizeof (gdb_wchar_t); *out_result = wchar_iterate_ok; - *out_chars = iter->out; + *out_chars = m_out.data (); *ptr = orig_inptr; - *len = orig_in - iter->bytes; + *len = orig_in - m_bytes; return num; } @@ -655,23 +704,33 @@ wchar_iterate (struct wchar_iterator *iter, return -1; } - -/* The charset.c module initialization function. */ +struct charset_vector +{ + ~charset_vector () + { + clear (); + } -extern initialize_file_ftype _initialize_charset; /* -Wmissing-prototype */ + void clear () + { + for (char *c : charsets) + xfree (c); -typedef char *char_ptr; -DEF_VEC_P (char_ptr); + charsets.clear (); + } -static VEC (char_ptr) *charsets; + std::vector charsets; +}; + +static charset_vector charsets; #ifdef PHONY_ICONV static void find_charset_names (void) { - VEC_safe_push (char_ptr, charsets, GDB_DEFAULT_HOST_CHARSET); - VEC_safe_push (char_ptr, charsets, NULL); + charsets.charsets.push_back (xstrdup (GDB_DEFAULT_HOST_CHARSET)); + charsets.charsets.push_back (NULL); } #else /* PHONY_ICONV */ @@ -692,7 +751,7 @@ add_one (unsigned int count, const char *const *names, void *data) unsigned int i; for (i = 0; i < count; ++i) - VEC_safe_push (char_ptr, charsets, xstrdup (names[i])); + charsets.charsets.push_back (xstrdup (names[i])); return 0; } @@ -701,7 +760,8 @@ static void find_charset_names (void) { iconvlist (add_one, NULL); - VEC_safe_push (char_ptr, charsets, NULL); + + charsets.charsets.push_back (NULL); } #else @@ -739,27 +799,41 @@ static void find_charset_names (void) { struct pex_obj *child; - char *args[3]; + const char *args[3]; int err, status; int fail = 1; - struct gdb_environ *iconv_env; - - /* Older iconvs, e.g. 2.2.2, don't omit the intro text if stdout is not - a tty. We need to recognize it and ignore it. This text is subject - to translation, so force LANGUAGE=C. */ - iconv_env = make_environ (); - init_environ (iconv_env); - set_in_environ (iconv_env, "LANGUAGE", "C"); - set_in_environ (iconv_env, "LC_ALL", "C"); - - child = pex_init (0, "iconv", NULL); - - args[0] = "iconv"; + int flags; + gdb_environ iconv_env = gdb_environ::from_host_environ (); + char *iconv_program; + + /* Older iconvs, e.g. 2.2.2, don't omit the intro text if stdout is + not a tty. We need to recognize it and ignore it. This text is + subject to translation, so force LANGUAGE=C. */ + iconv_env.set ("LANGUAGE", "C"); + iconv_env.set ("LC_ALL", "C"); + + child = pex_init (PEX_USE_PIPES, "iconv", NULL); + +#ifdef ICONV_BIN + { + std::string iconv_dir = relocate_gdb_directory (ICONV_BIN, + ICONV_BIN_RELOCATABLE); + iconv_program = concat (iconv_dir.c_str(), SLASH_STRING, "iconv", NULL); + } +#else + iconv_program = xstrdup ("iconv"); +#endif + args[0] = iconv_program; args[1] = "-l"; args[2] = NULL; + flags = PEX_STDERR_TO_STDOUT; +#ifndef ICONV_BIN + flags |= PEX_SEARCH; +#endif /* Note that we simply ignore errors here. */ - if (!pex_run_in_environment (child, PEX_SEARCH | PEX_STDERR_TO_STDOUT, - "iconv", args, environ_vector (iconv_env), + if (!pex_run_in_environment (child, flags, + args[0], const_cast (args), + iconv_env.envp (), NULL, NULL, &err)) { FILE *in = pex_read_output (child, 0); @@ -768,12 +842,12 @@ find_charset_names (void) parse the glibc and libiconv formats; feel free to add others as needed. */ - while (!feof (in)) + while (in != NULL && !feof (in)) { /* The size of buf is chosen arbitrarily. */ char buf[1024]; char *start, *r; - int len, keep_going; + int len; r = fgets (buf, sizeof (buf), in); if (!r) @@ -795,8 +869,8 @@ find_charset_names (void) buf[len] = '\0'; /* libiconv will print multiple entries per line, separated - by spaces. Older iconvs will print multiple entries per line, - indented by two spaces, and separated by ", " + by spaces. Older iconvs will print multiple entries per + line, indented by two spaces, and separated by ", " (i.e. the human readable form). */ start = buf; while (1) @@ -816,7 +890,7 @@ find_charset_names (void) break; keep_going = *p; *p = '\0'; - VEC_safe_push (char_ptr, charsets, xstrdup (start)); + charsets.charsets.push_back (xstrdup (start)); if (!keep_going) break; /* Skip any extra spaces. */ @@ -831,56 +905,134 @@ find_charset_names (void) } + xfree (iconv_program); pex_free (child); - free_environ (iconv_env); if (fail) { /* Some error occurred, so drop the vector. */ - int ix; - char *elt; - for (ix = 0; VEC_iterate (char_ptr, charsets, ix, elt); ++ix) - xfree (elt); - VEC_truncate (char_ptr, charsets, 0); + charsets.clear (); } else - VEC_safe_push (char_ptr, charsets, NULL); + charsets.charsets.push_back (NULL); } #endif /* HAVE_ICONVLIST || HAVE_LIBICONVLIST */ #endif /* PHONY_ICONV */ +/* The "auto" target charset used by default_auto_charset. */ +static const char *auto_target_charset_name = GDB_DEFAULT_TARGET_CHARSET; + +const char * +default_auto_charset (void) +{ + return auto_target_charset_name; +} + +const char * +default_auto_wide_charset (void) +{ + return GDB_DEFAULT_TARGET_WIDE_CHARSET; +} + + +#ifdef USE_INTERMEDIATE_ENCODING_FUNCTION +/* Macro used for UTF or UCS endianness suffix. */ +#if WORDS_BIGENDIAN +#define ENDIAN_SUFFIX "BE" +#else +#define ENDIAN_SUFFIX "LE" +#endif + +/* GDB cannot handle strings correctly if this size is different. */ + +gdb_static_assert (sizeof (gdb_wchar_t) == 2 || sizeof (gdb_wchar_t) == 4); + +/* intermediate_encoding returns the charset used internally by + GDB to convert between target and host encodings. As the test above + compiled, sizeof (gdb_wchar_t) is either 2 or 4 bytes. + UTF-16/32 is tested first, UCS-2/4 is tested as a second option, + otherwise an error is generated. */ + +const char * +intermediate_encoding (void) +{ + iconv_t desc; + static const char *stored_result = NULL; + char *result; + + if (stored_result) + return stored_result; + result = xstrprintf ("UTF-%d%s", (int) (sizeof (gdb_wchar_t) * 8), + ENDIAN_SUFFIX); + /* Check that the name is supported by iconv_open. */ + desc = iconv_open (result, host_charset ()); + if (desc != (iconv_t) -1) + { + iconv_close (desc); + stored_result = result; + return result; + } + /* Not valid, free the allocated memory. */ + xfree (result); + /* Second try, with UCS-2 type. */ + result = xstrprintf ("UCS-%d%s", (int) sizeof (gdb_wchar_t), + ENDIAN_SUFFIX); + /* Check that the name is supported by iconv_open. */ + desc = iconv_open (result, host_charset ()); + if (desc != (iconv_t) -1) + { + iconv_close (desc); + stored_result = result; + return result; + } + /* Not valid, free the allocated memory. */ + xfree (result); + /* No valid charset found, generate error here. */ + error (_("Unable to find a vaild charset for string conversions")); +} + +#endif /* USE_INTERMEDIATE_ENCODING_FUNCTION */ + void _initialize_charset (void) { - struct cmd_list_element *new_cmd; - - /* The first element is always "auto"; then we skip it for the - commands where it is not allowed. */ - VEC_safe_push (char_ptr, charsets, xstrdup ("auto")); + /* The first element is always "auto". */ + charsets.charsets.push_back (xstrdup ("auto")); find_charset_names (); - if (VEC_length (char_ptr, charsets) > 1) - charset_enum = (const char **) VEC_address (char_ptr, charsets); + if (charsets.charsets.size () > 1) + charset_enum = (const char **) charsets.charsets.data (); else charset_enum = default_charset_names; #ifndef PHONY_ICONV #ifdef HAVE_LANGINFO_CODESET - auto_host_charset_name = nl_langinfo (CODESET); - /* Solaris will return `646' here -- but the Solaris iconv then - does not accept this. Darwin (and maybe FreeBSD) may return "" here, + /* The result of nl_langinfo may be overwritten later. This may + leak a little memory, if the user later changes the host charset, + but that doesn't matter much. */ + auto_host_charset_name = xstrdup (nl_langinfo (CODESET)); + /* Solaris will return `646' here -- but the Solaris iconv then does + not accept this. Darwin (and maybe FreeBSD) may return "" here, which GNU libiconv doesn't like (infinite loop). */ if (!strcmp (auto_host_charset_name, "646") || !*auto_host_charset_name) auto_host_charset_name = "ASCII"; - target_charset_name = auto_host_charset_name; - - set_be_le_names (); + auto_target_charset_name = auto_host_charset_name; +#elif defined (USE_WIN32API) + { + /* "CP" + x<=5 digits + paranoia. */ + static char w32_host_default_charset[16]; + + snprintf (w32_host_default_charset, sizeof w32_host_default_charset, + "CP%d", GetACP()); + auto_host_charset_name = w32_host_default_charset; + auto_target_charset_name = auto_host_charset_name; + } #endif #endif add_setshow_enum_cmd ("charset", class_support, - &charset_enum[1], &host_charset_name, _("\ + charset_enum, &host_charset_name, _("\ Set the host and target character sets."), _("\ Show the host and target character sets."), _("\ The `host character set' is the one used by the system GDB is running on.\n\ @@ -908,7 +1060,7 @@ To see a list of the character sets GDB supports, type `set host-charset '. &setlist, &showlist); add_setshow_enum_cmd ("target-charset", class_support, - &charset_enum[1], &target_charset_name, _("\ + charset_enum, &target_charset_name, _("\ Set the target character set."), _("\ Show the target character set."), _("\ The `target character set' is the one used by the program being debugged.\n\ @@ -920,12 +1072,12 @@ To see a list of the character sets GDB supports, type `set target-charset' &setlist, &showlist); add_setshow_enum_cmd ("target-wide-charset", class_support, - &charset_enum[1], &target_wide_charset_name, + charset_enum, &target_wide_charset_name, _("\ Set the target wide character set."), _("\ Show the target wide character set."), _("\ -The `target wide character set' is the one used by the program being debugged.\n\ -In particular it is the encoding used by `wchar_t'.\n\ +The `target wide character set' is the one used by the program being debugged.\ +\nIn particular it is the encoding used by `wchar_t'.\n\ GDB translates characters and strings between the host and target\n\ character sets as needed.\n\ To see a list of the character sets GDB supports, type\n\