/* Character set conversion support for GDB.
- Copyright (C) 2001, 2003, 2007, 2008, 2009 Free Software Foundation, Inc.
+ Copyright (C) 2001-2020 Free Software Foundation, Inc.
This file is part of GDB.
#include "defs.h"
#include "charset.h"
#include "gdbcmd.h"
-#include "gdb_assert.h"
#include "gdb_obstack.h"
+#include "gdbsupport/gdb_wait.h"
#include "charset-list.h"
-#include "vec.h"
-
-#include <stddef.h>
-#include "gdb_string.h"
+#include "gdbsupport/environ.h"
+#include "arch-utils.h"
+#include "gdbsupport/gdb_vecs.h"
#include <ctype.h>
+#ifdef USE_WIN32API
+#include <windows.h>
+#endif
\f
/* How GDB's character set support works
arrange for there to be a single available character set. */
#undef GDB_DEFAULT_HOST_CHARSET
-#define GDB_DEFAULT_HOST_CHARSET "ISO-8859-1"
-#define GDB_DEFAULT_TARGET_CHARSET "ISO-8859-1"
-#define GDB_DEFAULT_TARGET_WIDE_CHARSET "ISO-8859-1"
+#ifdef USE_WIN32API
+# define GDB_DEFAULT_HOST_CHARSET "CP1252"
+#else
+# define GDB_DEFAULT_HOST_CHARSET "ISO-8859-1"
+#endif
+#define GDB_DEFAULT_TARGET_CHARSET GDB_DEFAULT_HOST_CHARSET
+#define GDB_DEFAULT_TARGET_WIDE_CHARSET "UTF-32"
#undef DEFAULT_CHARSET_NAMES
#define DEFAULT_CHARSET_NAMES GDB_DEFAULT_HOST_CHARSET ,
#undef iconv_t
#define iconv_t int
#undef iconv_open
+#define iconv_open phony_iconv_open
#undef iconv
+#define iconv phony_iconv
#undef iconv_close
+#define iconv_close phony_iconv_close
#undef ICONV_CONST
#define ICONV_CONST const
-iconv_t
-iconv_open (const char *to, const char *from)
+/* We allow conversions from UTF-32, wchar_t, and the host charset.
+ We allow conversions to wchar_t and the host charset.
+ Return 1 if we are converting from UTF-32BE, 2 if from UTF32-LE,
+ 0 otherwise. This is used as a flag in calls to iconv. */
+
+static iconv_t
+phony_iconv_open (const char *to, const char *from)
{
- /* We allow conversions from UCS-4BE, wchar_t, and the host charset.
- We allow conversions to wchar_t and the host charset. */
- if (strcmp (from, "UCS-4BE") && strcmp (from, "wchar_t")
- && strcmp (from, GDB_DEFAULT_HOST_CHARSET))
- return -1;
if (strcmp (to, "wchar_t") && strcmp (to, GDB_DEFAULT_HOST_CHARSET))
return -1;
- /* Return 1 if we are converting from UCS-4BE, 0 otherwise. This is
- used as a flag in calls to iconv. */
- return !strcmp (from, "UCS-4BE");
+ if (!strcmp (from, "UTF-32BE") || !strcmp (from, "UTF-32"))
+ return 1;
+
+ if (!strcmp (from, "UTF-32LE"))
+ return 2;
+
+ if (strcmp (from, "wchar_t") && strcmp (from, GDB_DEFAULT_HOST_CHARSET))
+ return -1;
+
+ return 0;
}
-int
-iconv_close (iconv_t arg)
+static int
+phony_iconv_close (iconv_t arg)
{
return 0;
}
-size_t
-iconv (iconv_t ucs_flag, const char **inbuf, size_t *inbytesleft,
- char **outbuf, size_t *outbytesleft)
+static size_t
+phony_iconv (iconv_t utf_flag, const char **inbuf, size_t *inbytesleft,
+ char **outbuf, size_t *outbytesleft)
{
- if (ucs_flag)
+ if (utf_flag)
{
+ enum bfd_endian endian
+ = utf_flag == 1 ? BFD_ENDIAN_BIG : BFD_ENDIAN_LITTLE;
while (*inbytesleft >= 4)
{
- size_t j;
- unsigned long c = 0;
-
- for (j = 0; j < 4; ++j)
- {
- c <<= 8;
- c += (*inbuf)[j] & 0xff;
- }
+ unsigned long c
+ = extract_unsigned_integer ((const gdb_byte *)*inbuf, 4, endian);
if (c >= 256)
{
errno = EILSEQ;
return -1;
}
+ if (*outbytesleft < 1)
+ {
+ errno = E2BIG;
+ return -1;
+ }
**outbuf = c & 0xff;
++*outbuf;
--*outbytesleft;
- ++*inbuf;
+ *inbuf += 4;
*inbytesleft -= 4;
}
- if (*inbytesleft < 4)
+ if (*inbytesleft)
{
+ /* Partial sequence on input. */
errno = EINVAL;
return -1;
}
/* In all other cases we simply copy input bytes to the
output. */
size_t amt = *inbytesleft;
+
if (amt > *outbytesleft)
amt = *outbytesleft;
memcpy (*outbuf, *inbuf, amt);
*outbuf += amt;
*inbytesleft -= amt;
*outbytesleft -= amt;
- }
-
- if (*inbytesleft)
- {
- errno = E2BIG;
- return -1;
+ if (*inbytesleft)
+ {
+ errno = E2BIG;
+ return -1;
+ }
}
/* The number of non-reversible conversions -- but they were all
return 0;
}
-#endif
+#else /* PHONY_ICONV */
+
+/* On systems that don't have EILSEQ, GNU iconv's iconv.h defines it
+ to ENOENT, while gnulib defines it to a different value. Always
+ map ENOENT to gnulib's EILSEQ, leaving callers agnostic. */
+static size_t
+gdb_iconv (iconv_t utf_flag, ICONV_CONST char **inbuf, size_t *inbytesleft,
+ char **outbuf, size_t *outbytesleft)
+{
+ size_t ret;
+
+ ret = iconv (utf_flag, inbuf, inbytesleft, outbuf, outbytesleft);
+ if (errno == ENOENT)
+ errno = EILSEQ;
+ return ret;
+}
+
+#undef iconv
+#define iconv gdb_iconv
+
+#endif /* PHONY_ICONV */
\f
/* The global lists of character sets and translations. */
#endif
#ifndef GDB_DEFAULT_TARGET_WIDE_CHARSET
-#define GDB_DEFAULT_TARGET_WIDE_CHARSET "UCS-4"
+#define GDB_DEFAULT_TARGET_WIDE_CHARSET "UTF-32"
#endif
static const char *auto_host_charset_name = GDB_DEFAULT_HOST_CHARSET;
fprintf_filtered (file, _("The host character set is \"%s\".\n"), value);
}
-static const char *target_charset_name = GDB_DEFAULT_TARGET_CHARSET;
+static const char *target_charset_name = "auto";
static void
show_target_charset_name (struct ui_file *file, int from_tty,
struct cmd_list_element *c, const char *value)
{
- fprintf_filtered (file, _("The target character set is \"%s\".\n"),
- value);
+ if (!strcmp (value, "auto"))
+ fprintf_filtered (file,
+ _("The target character set is \"auto; "
+ "currently %s\".\n"),
+ gdbarch_auto_charset (get_current_arch ()));
+ else
+ fprintf_filtered (file, _("The target character set is \"%s\".\n"),
+ value);
}
-static const char *target_wide_charset_name = GDB_DEFAULT_TARGET_WIDE_CHARSET;
+static const char *target_wide_charset_name = "auto";
static void
-show_target_wide_charset_name (struct ui_file *file, int from_tty,
- struct cmd_list_element *c, const char *value)
+show_target_wide_charset_name (struct ui_file *file,
+ int from_tty,
+ struct cmd_list_element *c,
+ const char *value)
{
- fprintf_filtered (file, _("The target wide character set is \"%s\".\n"),
- value);
+ if (!strcmp (value, "auto"))
+ fprintf_filtered (file,
+ _("The target wide character set is \"auto; "
+ "currently %s\".\n"),
+ gdbarch_auto_wide_charset (get_current_arch ()));
+ else
+ fprintf_filtered (file, _("The target wide character set is \"%s\".\n"),
+ value);
}
static const char *default_charset_names[] =
static const char *target_wide_charset_be_name;
static const char *target_wide_charset_le_name;
-/* A helper function for validate which sets the target wide big- and
- little-endian character set names, if possible. */
+/* The architecture for which the BE- and LE-names are valid. */
+static struct gdbarch *be_le_arch;
+
+/* A helper function which sets the target wide big- and little-endian
+ character set names, if possible. */
static void
-set_be_le_names (void)
+set_be_le_names (struct gdbarch *gdbarch)
{
+ if (be_le_arch == gdbarch)
+ return;
+ be_le_arch = gdbarch;
+
+#ifdef PHONY_ICONV
+ /* Match the wide charset names recognized by phony_iconv_open. */
+ target_wide_charset_le_name = "UTF-32LE";
+ target_wide_charset_be_name = "UTF-32BE";
+#else
int i, len;
+ const char *target_wide;
target_wide_charset_le_name = NULL;
target_wide_charset_be_name = NULL;
- len = strlen (target_wide_charset_name);
+ target_wide = target_wide_charset_name;
+ if (!strcmp (target_wide, "auto"))
+ target_wide = gdbarch_auto_wide_charset (gdbarch);
+
+ len = strlen (target_wide);
for (i = 0; charset_enum[i]; ++i)
{
- if (strncmp (target_wide_charset_name, charset_enum[i], len))
+ if (strncmp (target_wide, charset_enum[i], len))
continue;
if ((charset_enum[i][len] == 'B'
|| charset_enum[i][len] == 'L')
target_wide_charset_le_name = charset_enum[i];
}
}
+# endif /* PHONY_ICONV */
}
/* 'Set charset', 'set host-charset', 'set target-charset', 'set
target-wide-charset', 'set charset' sfunc's. */
static void
-validate (void)
+validate (struct gdbarch *gdbarch)
{
iconv_t desc;
const char *host_cset = host_charset ();
+ const char *target_cset = target_charset (gdbarch);
+ const char *target_wide_cset = target_wide_charset_name;
+
+ if (!strcmp (target_wide_cset, "auto"))
+ target_wide_cset = gdbarch_auto_wide_charset (gdbarch);
- desc = iconv_open (target_wide_charset_name, host_cset);
+ desc = iconv_open (target_wide_cset, host_cset);
if (desc == (iconv_t) -1)
- error ("Cannot convert between character sets `%s' and `%s'",
- target_wide_charset_name, host_cset);
+ error (_("Cannot convert between character sets `%s' and `%s'"),
+ target_wide_cset, host_cset);
iconv_close (desc);
- desc = iconv_open (target_charset_name, host_cset);
+ desc = iconv_open (target_cset, host_cset);
if (desc == (iconv_t) -1)
- error ("Cannot convert between character sets `%s' and `%s'",
- target_charset_name, host_cset);
+ error (_("Cannot convert between character sets `%s' and `%s'"),
+ target_cset, host_cset);
iconv_close (desc);
- set_be_le_names ();
+ /* Clear the cache. */
+ be_le_arch = NULL;
}
/* This is the sfunc for the 'set charset' command. */
static void
-set_charset_sfunc (char *charset, int from_tty, struct cmd_list_element *c)
+set_charset_sfunc (const char *charset, int from_tty,
+ struct cmd_list_element *c)
{
- /* CAREFUL: set the target charset here as well. */
+ /* CAREFUL: set the target charset here as well. */
target_charset_name = host_charset_name;
- validate ();
+ validate (get_current_arch ());
}
/* 'set host-charset' command sfunc. We need a wrapper here because
the function needs to have a specific signature. */
static void
-set_host_charset_sfunc (char *charset, int from_tty,
+set_host_charset_sfunc (const char *charset, int from_tty,
struct cmd_list_element *c)
{
- validate ();
+ validate (get_current_arch ());
}
/* Wrapper for the 'set target-charset' command. */
static void
-set_target_charset_sfunc (char *charset, int from_tty,
+set_target_charset_sfunc (const char *charset, int from_tty,
struct cmd_list_element *c)
{
- validate ();
+ validate (get_current_arch ());
}
/* Wrapper for the 'set target-wide-charset' command. */
static void
-set_target_wide_charset_sfunc (char *charset, int from_tty,
+set_target_wide_charset_sfunc (const char *charset, int from_tty,
struct cmd_list_element *c)
{
- validate ();
+ validate (get_current_arch ());
}
/* sfunc for the 'show charset' command. */
static void
-show_charset (struct ui_file *file, int from_tty, struct cmd_list_element *c,
+show_charset (struct ui_file *file, int from_tty,
+ struct cmd_list_element *c,
const char *name)
{
show_host_charset_name (file, from_tty, c, host_charset_name);
show_target_charset_name (file, from_tty, c, target_charset_name);
- show_target_wide_charset_name (file, from_tty, c, target_wide_charset_name);
+ show_target_wide_charset_name (file, from_tty, c,
+ target_wide_charset_name);
}
\f
}
const char *
-target_charset (void)
+target_charset (struct gdbarch *gdbarch)
{
+ if (!strcmp (target_charset_name, "auto"))
+ return gdbarch_auto_charset (gdbarch);
return target_charset_name;
}
const char *
-target_wide_charset (void)
+target_wide_charset (struct gdbarch *gdbarch)
{
- if (gdbarch_byte_order (current_gdbarch) == BFD_ENDIAN_BIG)
+ enum bfd_endian byte_order = gdbarch_byte_order (gdbarch);
+
+ set_be_le_names (gdbarch);
+ if (byte_order == BFD_ENDIAN_BIG)
{
if (target_wide_charset_be_name)
return target_wide_charset_be_name;
return target_wide_charset_le_name;
}
+ if (!strcmp (target_wide_charset_name, "auto"))
+ return gdbarch_auto_wide_charset (gdbarch);
+
return target_wide_charset_name;
}
\f
/* Public character management functions. */
-/* A cleanup function which is run to close an iconv descriptor. */
-
-static void
-cleanup_iconv (void *p)
+class iconv_wrapper
{
- iconv_t *descp = p;
- iconv_close (*descp);
-}
+public:
+
+ iconv_wrapper (const char *to, const char *from)
+ {
+ m_desc = iconv_open (to, from);
+ if (m_desc == (iconv_t) -1)
+ perror_with_name (_("Converting character sets"));
+ }
+
+ ~iconv_wrapper ()
+ {
+ iconv_close (m_desc);
+ }
+
+ size_t convert (ICONV_CONST char **inp, size_t *inleft, char **outp,
+ size_t *outleft)
+ {
+ return iconv (m_desc, inp, inleft, outp, outleft);
+ }
+
+private:
+
+ iconv_t m_desc;
+};
void
convert_between_encodings (const char *from, const char *to,
int width, struct obstack *output,
enum transliterations translit)
{
- iconv_t desc;
- struct cleanup *cleanups;
size_t inleft;
- char *inp;
+ ICONV_CONST char *inp;
unsigned int space_request;
/* Often, the host and target charsets will be the same. */
return;
}
- desc = iconv_open (to, from);
- if (desc == (iconv_t) -1)
- perror_with_name ("Converting character sets");
- cleanups = make_cleanup (cleanup_iconv, &desc);
+ iconv_wrapper desc (to, from);
inleft = num_bytes;
- inp = (char *) bytes;
+ inp = (ICONV_CONST char *) bytes;
space_request = num_bytes;
old_size = obstack_object_size (output);
obstack_blank (output, space_request);
- outp = obstack_base (output) + old_size;
+ outp = (char *) obstack_base (output) + old_size;
outleft = space_request;
- r = iconv (desc, (ICONV_CONST char **) &inp, &inleft, &outp, &outleft);
+ r = desc.convert (&inp, &inleft, &outp, &outleft);
/* Now make sure that the object on the obstack only includes
bytes we have converted. */
- obstack_blank (output, - (int) outleft);
+ obstack_blank_fast (output, -(ssize_t) outleft);
if (r == (size_t) -1)
{
/* Invalid input sequence. */
if (translit == translit_none)
- error (_("Could not convert character to `%s' character set"),
- to);
+ error (_("Could not convert character "
+ "to `%s' character set"), to);
/* We emit escape sequence for the bytes, skip them,
and try again. */
{
char octal[5];
- sprintf (octal, "\\%.3o", *inp & 0xff);
+ xsnprintf (octal, sizeof (octal), "\\%.3o", *inp & 0xff);
obstack_grow_str (output, octal);
++inp;
break;
default:
- perror_with_name ("Internal error while converting character sets");
+ perror_with_name (_("Internal error while "
+ "converting character sets"));
}
}
}
-
- do_cleanups (cleanups);
}
\f
-/* An iterator that returns host wchar_t's from a target string. */
-struct wchar_iterator
-{
- /* The underlying iconv descriptor. */
- iconv_t desc;
-
- /* The input string. This is updated as convert characters. */
- char *input;
- /* The number of bytes remaining in the input. */
- size_t bytes;
-
- /* The width of an input character. */
- size_t width;
-
- /* The output buffer and its size. */
- gdb_wchar_t *out;
- size_t out_size;
-};
-
/* Create a new iterator. */
-struct wchar_iterator *
-make_wchar_iterator (const gdb_byte *input, size_t bytes, const char *charset,
- size_t width)
+wchar_iterator::wchar_iterator (const gdb_byte *input, size_t bytes,
+ const char *charset, size_t width)
+: m_input (input),
+ m_bytes (bytes),
+ m_width (width),
+ m_out (1)
{
- struct wchar_iterator *result;
- iconv_t desc;
-
- desc = iconv_open ("wchar_t", charset);
- if (desc == (iconv_t) -1)
- perror_with_name ("Converting character sets");
-
- result = XNEW (struct wchar_iterator);
- result->desc = desc;
- result->input = (char *) input;
- result->bytes = bytes;
- result->width = width;
-
- result->out = XNEW (gdb_wchar_t);
- result->out_size = 1;
-
- return result;
-}
-
-static void
-do_cleanup_iterator (void *p)
-{
- struct wchar_iterator *iter = p;
-
- iconv_close (iter->desc);
- xfree (iter->out);
- xfree (iter);
+ m_desc = iconv_open (INTERMEDIATE_ENCODING, charset);
+ if (m_desc == (iconv_t) -1)
+ perror_with_name (_("Converting character sets"));
}
-struct cleanup *
-make_cleanup_wchar_iterator (struct wchar_iterator *iter)
+wchar_iterator::~wchar_iterator ()
{
- return make_cleanup (do_cleanup_iterator, iter);
+ if (m_desc != (iconv_t) -1)
+ iconv_close (m_desc);
}
int
-wchar_iterate (struct wchar_iterator *iter,
- enum wchar_iterate_result *out_result,
- gdb_wchar_t **out_chars,
- const gdb_byte **ptr,
- size_t *len)
+wchar_iterator::iterate (enum wchar_iterate_result *out_result,
+ gdb_wchar_t **out_chars,
+ const gdb_byte **ptr,
+ size_t *len)
{
size_t out_request;
invalid input sequence -- but we want to reliably report this to
our caller so it can emit an escape sequence. */
out_request = 1;
- while (iter->bytes > 0)
+ while (m_bytes > 0)
{
- char *outptr = (char *) &iter->out[0];
- char *orig_inptr = iter->input;
- size_t orig_in = iter->bytes;
+ ICONV_CONST char *inptr = (ICONV_CONST char *) m_input;
+ char *outptr = (char *) m_out.data ();
+ const gdb_byte *orig_inptr = m_input;
+ size_t orig_in = m_bytes;
size_t out_avail = out_request * sizeof (gdb_wchar_t);
size_t num;
- gdb_wchar_t result;
+ size_t r = iconv (m_desc, &inptr, &m_bytes, &outptr, &out_avail);
+
+ m_input = (gdb_byte *) inptr;
- size_t r = iconv (iter->desc,
- (ICONV_CONST char **) &iter->input, &iter->bytes,
- &outptr, &out_avail);
if (r == (size_t) -1)
{
switch (errno)
{
case EILSEQ:
- /* Invalid input sequence. Skip it, and let the caller
- know about it. */
+ /* Invalid input sequence. We still might have
+ converted a character; if so, return it. */
+ if (out_avail < out_request * sizeof (gdb_wchar_t))
+ break;
+
+ /* Otherwise skip the first invalid character, and let
+ the caller know about it. */
*out_result = wchar_iterate_invalid;
- *ptr = iter->input;
- *len = iter->width;
- iter->input += iter->width;
- iter->bytes -= iter->width;
+ *ptr = m_input;
+ *len = m_width;
+ m_input += m_width;
+ m_bytes -= m_width;
return 0;
case E2BIG:
break;
++out_request;
- if (out_request > iter->out_size)
- {
- iter->out_size = out_request;
- iter->out = xrealloc (iter->out,
- out_request * sizeof (gdb_wchar_t));
- }
+ if (out_request > m_out.size ())
+ m_out.resize (out_request);
continue;
case EINVAL:
/* Incomplete input sequence. Let the caller know, and
arrange for future calls to see EOF. */
*out_result = wchar_iterate_incomplete;
- *ptr = iter->input;
- *len = iter->bytes;
- iter->bytes = 0;
+ *ptr = m_input;
+ *len = m_bytes;
+ m_bytes = 0;
return 0;
default:
- perror_with_name ("Internal error while converting character sets");
+ perror_with_name (_("Internal error while "
+ "converting character sets"));
}
}
/* We converted something. */
num = out_request - out_avail / sizeof (gdb_wchar_t);
*out_result = wchar_iterate_ok;
- *out_chars = iter->out;
+ *out_chars = m_out.data ();
*ptr = orig_inptr;
- *len = orig_in - iter->bytes;
+ *len = orig_in - m_bytes;
return num;
}
return -1;
}
-\f
-/* The charset.c module initialization function. */
+struct charset_vector
+{
+ ~charset_vector ()
+ {
+ clear ();
+ }
-extern initialize_file_ftype _initialize_charset; /* -Wmissing-prototype */
+ void clear ()
+ {
+ for (char *c : charsets)
+ xfree (c);
-typedef char *char_ptr;
-DEF_VEC_P (char_ptr);
+ charsets.clear ();
+ }
+
+ std::vector<char *> charsets;
+};
-static VEC (char_ptr) *charsets;
+static charset_vector charsets;
#ifdef PHONY_ICONV
static void
find_charset_names (void)
{
- VEC_safe_push (char_ptr, charsets, GDB_DEFAULT_HOST_CHARSET);
- VEC_safe_push (char_ptr, charsets, NULL);
+ charsets.charsets.push_back (xstrdup (GDB_DEFAULT_HOST_CHARSET));
+ charsets.charsets.push_back (NULL);
}
#else /* PHONY_ICONV */
unsigned int i;
for (i = 0; i < count; ++i)
- VEC_safe_push (char_ptr, charsets, xstrdup (names[i]));
+ charsets.charsets.push_back (xstrdup (names[i]));
return 0;
}
find_charset_names (void)
{
iconvlist (add_one, NULL);
- VEC_safe_push (char_ptr, charsets, NULL);
+
+ charsets.charsets.push_back (NULL);
}
#else
+/* Return non-zero if LINE (output from iconv) should be ignored.
+ Older iconv programs (e.g. 2.2.2) include the human readable
+ introduction even when stdout is not a tty. Newer versions omit
+ the intro if stdout is not a tty. */
+
+static int
+ignore_line_p (const char *line)
+{
+ /* This table is used to filter the output. If this text appears
+ anywhere in the line, it is ignored (strstr is used). */
+ static const char * const ignore_lines[] =
+ {
+ "The following",
+ "not necessarily",
+ "the FROM and TO",
+ "listed with several",
+ NULL
+ };
+ int i;
+
+ for (i = 0; ignore_lines[i] != NULL; ++i)
+ {
+ if (strstr (line, ignore_lines[i]) != NULL)
+ return 1;
+ }
+
+ return 0;
+}
+
static void
find_charset_names (void)
{
- FILE *in;
+ struct pex_obj *child;
+ const char *args[3];
+ int err, status;
+ int fail = 1;
+ int flags;
+ gdb_environ iconv_env = gdb_environ::from_host_environ ();
+ char *iconv_program;
+
+ /* Older iconvs, e.g. 2.2.2, don't omit the intro text if stdout is
+ not a tty. We need to recognize it and ignore it. This text is
+ subject to translation, so force LANGUAGE=C. */
+ iconv_env.set ("LANGUAGE", "C");
+ iconv_env.set ("LC_ALL", "C");
+
+ child = pex_init (PEX_USE_PIPES, "iconv", NULL);
+
+#ifdef ICONV_BIN
+ {
+ std::string iconv_dir = relocate_gdb_directory (ICONV_BIN,
+ ICONV_BIN_RELOCATABLE);
+ iconv_program = concat (iconv_dir.c_str(), SLASH_STRING, "iconv", NULL);
+ }
+#else
+ iconv_program = xstrdup ("iconv");
+#endif
+ args[0] = iconv_program;
+ args[1] = "-l";
+ args[2] = NULL;
+ flags = PEX_STDERR_TO_STDOUT;
+#ifndef ICONV_BIN
+ flags |= PEX_SEARCH;
+#endif
+ /* Note that we simply ignore errors here. */
+ if (!pex_run_in_environment (child, flags,
+ args[0], const_cast<char **> (args),
+ iconv_env.envp (),
+ NULL, NULL, &err))
+ {
+ FILE *in = pex_read_output (child, 0);
- in = popen ("iconv -l", "r");
- /* It is ok to ignore errors; we'll fall back on a default. */
- if (!in)
- return;
+ /* POSIX says that iconv -l uses an unspecified format. We
+ parse the glibc and libiconv formats; feel free to add others
+ as needed. */
+
+ while (in != NULL && !feof (in))
+ {
+ /* The size of buf is chosen arbitrarily. */
+ char buf[1024];
+ char *start, *r;
+ int len;
+
+ r = fgets (buf, sizeof (buf), in);
+ if (!r)
+ break;
+ len = strlen (r);
+ if (len <= 3)
+ continue;
+ if (ignore_line_p (r))
+ continue;
+
+ /* Strip off the newline. */
+ --len;
+ /* Strip off one or two '/'s. glibc will print lines like
+ "8859_7//", but also "10646-1:1993/UCS4/". */
+ if (buf[len - 1] == '/')
+ --len;
+ if (buf[len - 1] == '/')
+ --len;
+ buf[len] = '\0';
+
+ /* libiconv will print multiple entries per line, separated
+ by spaces. Older iconvs will print multiple entries per
+ line, indented by two spaces, and separated by ", "
+ (i.e. the human readable form). */
+ start = buf;
+ while (1)
+ {
+ int keep_going;
+ char *p;
+
+ /* Skip leading blanks. */
+ for (p = start; *p && *p == ' '; ++p)
+ ;
+ start = p;
+ /* Find the next space, comma, or end-of-line. */
+ for ( ; *p && *p != ' ' && *p != ','; ++p)
+ ;
+ /* Ignore an empty result. */
+ if (p == start)
+ break;
+ keep_going = *p;
+ *p = '\0';
+ charsets.charsets.push_back (xstrdup (start));
+ if (!keep_going)
+ break;
+ /* Skip any extra spaces. */
+ for (start = p + 1; *start && *start == ' '; ++start)
+ ;
+ }
+ }
+
+ if (pex_get_status (child, 1, &status)
+ && WIFEXITED (status) && !WEXITSTATUS (status))
+ fail = 0;
- /* POSIX says that iconv -l uses an unspecified format. We parse
- the glibc format; feel free to add others as needed. */
- while (!feof (in))
- {
- /* The size of buf is chosen arbitrarily. A character set name
- longer than this would not be very nice. */
- char buf[80];
- int len;
- char *r = fgets (buf, sizeof (buf), in);
- if (!r)
- break;
- len = strlen (r);
- if (len <= 3)
- continue;
- if (buf[len - 2] == '/' && buf[len - 3] == '/')
- buf[len - 3] = '\0';
- VEC_safe_push (char_ptr, charsets, xstrdup (buf));
}
- pclose (in);
+ xfree (iconv_program);
+ pex_free (child);
- VEC_safe_push (char_ptr, charsets, NULL);
+ if (fail)
+ {
+ /* Some error occurred, so drop the vector. */
+ charsets.clear ();
+ }
+ else
+ charsets.charsets.push_back (NULL);
}
#endif /* HAVE_ICONVLIST || HAVE_LIBICONVLIST */
#endif /* PHONY_ICONV */
+/* The "auto" target charset used by default_auto_charset. */
+static const char *auto_target_charset_name = GDB_DEFAULT_TARGET_CHARSET;
+
+const char *
+default_auto_charset (void)
+{
+ return auto_target_charset_name;
+}
+
+const char *
+default_auto_wide_charset (void)
+{
+ return GDB_DEFAULT_TARGET_WIDE_CHARSET;
+}
+
+
+#ifdef USE_INTERMEDIATE_ENCODING_FUNCTION
+/* Macro used for UTF or UCS endianness suffix. */
+#if WORDS_BIGENDIAN
+#define ENDIAN_SUFFIX "BE"
+#else
+#define ENDIAN_SUFFIX "LE"
+#endif
+
+/* GDB cannot handle strings correctly if this size is different. */
+
+gdb_static_assert (sizeof (gdb_wchar_t) == 2 || sizeof (gdb_wchar_t) == 4);
+
+/* intermediate_encoding returns the charset used internally by
+ GDB to convert between target and host encodings. As the test above
+ compiled, sizeof (gdb_wchar_t) is either 2 or 4 bytes.
+ UTF-16/32 is tested first, UCS-2/4 is tested as a second option,
+ otherwise an error is generated. */
+
+const char *
+intermediate_encoding (void)
+{
+ iconv_t desc;
+ static const char *stored_result = NULL;
+ char *result;
+
+ if (stored_result)
+ return stored_result;
+ result = xstrprintf ("UTF-%d%s", (int) (sizeof (gdb_wchar_t) * 8),
+ ENDIAN_SUFFIX);
+ /* Check that the name is supported by iconv_open. */
+ desc = iconv_open (result, host_charset ());
+ if (desc != (iconv_t) -1)
+ {
+ iconv_close (desc);
+ stored_result = result;
+ return result;
+ }
+ /* Not valid, free the allocated memory. */
+ xfree (result);
+ /* Second try, with UCS-2 type. */
+ result = xstrprintf ("UCS-%d%s", (int) sizeof (gdb_wchar_t),
+ ENDIAN_SUFFIX);
+ /* Check that the name is supported by iconv_open. */
+ desc = iconv_open (result, host_charset ());
+ if (desc != (iconv_t) -1)
+ {
+ iconv_close (desc);
+ stored_result = result;
+ return result;
+ }
+ /* Not valid, free the allocated memory. */
+ xfree (result);
+ /* No valid charset found, generate error here. */
+ error (_("Unable to find a vaild charset for string conversions"));
+}
+
+#endif /* USE_INTERMEDIATE_ENCODING_FUNCTION */
+
void
_initialize_charset (void)
{
- struct cmd_list_element *new_cmd;
-
- /* The first element is always "auto"; then we skip it for the
- commands where it is not allowed. */
- VEC_safe_push (char_ptr, charsets, "auto");
+ /* The first element is always "auto". */
+ charsets.charsets.push_back (xstrdup ("auto"));
find_charset_names ();
- if (VEC_length (char_ptr, charsets) > 1)
- charset_enum = (const char **) VEC_address (char_ptr, charsets);
+ if (charsets.charsets.size () > 1)
+ charset_enum = (const char **) charsets.charsets.data ();
else
charset_enum = default_charset_names;
#ifndef PHONY_ICONV
#ifdef HAVE_LANGINFO_CODESET
- auto_host_charset_name = nl_langinfo (CODESET);
- target_charset_name = auto_host_charset_name;
-
- set_be_le_names ();
+ /* The result of nl_langinfo may be overwritten later. This may
+ leak a little memory, if the user later changes the host charset,
+ but that doesn't matter much. */
+ auto_host_charset_name = xstrdup (nl_langinfo (CODESET));
+ /* Solaris will return `646' here -- but the Solaris iconv then does
+ not accept this. Darwin (and maybe FreeBSD) may return "" here,
+ which GNU libiconv doesn't like (infinite loop). */
+ if (!strcmp (auto_host_charset_name, "646") || !*auto_host_charset_name)
+ auto_host_charset_name = "ASCII";
+ auto_target_charset_name = auto_host_charset_name;
+#elif defined (USE_WIN32API)
+ {
+ /* "CP" + x<=5 digits + paranoia. */
+ static char w32_host_default_charset[16];
+
+ snprintf (w32_host_default_charset, sizeof w32_host_default_charset,
+ "CP%d", GetACP());
+ auto_host_charset_name = w32_host_default_charset;
+ auto_target_charset_name = auto_host_charset_name;
+ }
#endif
#endif
add_setshow_enum_cmd ("charset", class_support,
- &charset_enum[1], &host_charset_name, _("\
+ charset_enum, &host_charset_name, _("\
Set the host and target character sets."), _("\
Show the host and target character sets."), _("\
The `host character set' is the one used by the system GDB is running on.\n\
&setlist, &showlist);
add_setshow_enum_cmd ("target-charset", class_support,
- &charset_enum[1], &target_charset_name, _("\
+ charset_enum, &target_charset_name, _("\
Set the target character set."), _("\
Show the target character set."), _("\
The `target character set' is the one used by the program being debugged.\n\
&setlist, &showlist);
add_setshow_enum_cmd ("target-wide-charset", class_support,
- &charset_enum[1], &target_wide_charset_name,
+ charset_enum, &target_wide_charset_name,
_("\
Set the target wide character set."), _("\
Show the target wide character set."), _("\
-The `target wide character set' is the one used by the program being debugged.\n\
-In particular it is the encoding used by `wchar_t'.\n\
+The `target wide character set' is the one used by the program being debugged.\
+\nIn particular it is the encoding used by `wchar_t'.\n\
GDB translates characters and strings between the host and target\n\
character sets as needed.\n\
To see a list of the character sets GDB supports, type\n\