Fix build errors in with clang in gdb.compile/compile-cplus.c

[deliverable/binutils-gdb.git] / gdb / charset.c
diff --git a/gdb/charset.c b/gdb/charset.c

index 4bc8894f15dcee75c8bfd1af18a6e56a099a23aa..4e459c2b45d8a85144e9320f14c688524acc8a5b 100644 (file)
--- a/gdb/charset.c
+++ b/gdb/charset.c
@@ -1,6 +1,6 @@
  /* Character set conversion support for GDB.
  
-   Copyright (C) 2001, 2003, 2007, 2008, 2009 Free Software Foundation, Inc.
+   Copyright (C) 2001-2020 Free Software Foundation, Inc.
  
     This file is part of GDB.
  
@@ -20,16 +20,17 @@
  #include "defs.h"
  #include "charset.h"
  #include "gdbcmd.h"
-#include "gdb_assert.h"
  #include "gdb_obstack.h"
-#include "gdb_wait.h"
+#include "gdbsupport/gdb_wait.h"
  #include "charset-list.h"
-#include "vec.h"
-
-#include <stddef.h>
-#include "gdb_string.h"
+#include "gdbsupport/environ.h"
+#include "arch-utils.h"
+#include "gdbsupport/gdb_vecs.h"
  #include <ctype.h>
  
+#ifdef USE_WIN32API
+#include <windows.h>
+#endif
  \f
  /* How GDB's character set support works
  
@@ -75,83 +76,90 @@
     arrange for there to be a single available character set.  */
  
  #undef GDB_DEFAULT_HOST_CHARSET
-#define GDB_DEFAULT_HOST_CHARSET "ISO-8859-1"
-#define GDB_DEFAULT_TARGET_CHARSET "ISO-8859-1"
-#define GDB_DEFAULT_TARGET_WIDE_CHARSET "ISO-8859-1"
+#ifdef USE_WIN32API
+# define GDB_DEFAULT_HOST_CHARSET "CP1252"
+#else
+# define GDB_DEFAULT_HOST_CHARSET "ISO-8859-1"
+#endif
+#define GDB_DEFAULT_TARGET_CHARSET GDB_DEFAULT_HOST_CHARSET 
+#define GDB_DEFAULT_TARGET_WIDE_CHARSET "UTF-32"
  #undef DEFAULT_CHARSET_NAMES
  #define DEFAULT_CHARSET_NAMES GDB_DEFAULT_HOST_CHARSET ,
  
  #undef iconv_t
  #define iconv_t int
  #undef iconv_open
+#define iconv_open phony_iconv_open
  #undef iconv
+#define iconv phony_iconv
  #undef iconv_close
+#define iconv_close phony_iconv_close
  
  #undef ICONV_CONST
  #define ICONV_CONST const
  
-/* Some systems don't have EILSEQ, so we define it here, but not as
-   EINVAL, because callers of `iconv' want to distinguish EINVAL and
-   EILSEQ.  This is what iconv.h from libiconv does as well.  Note
-   that wchar.h may also define EILSEQ, so this needs to be after we
-   include wchar.h, which happens in defs.h through gdb_wchar.h.  */
-#ifndef EILSEQ
-#define EILSEQ ENOENT
-#endif
+/* We allow conversions from UTF-32, wchar_t, and the host charset.
+   We allow conversions to wchar_t and the host charset.
+   Return 1 if we are converting from UTF-32BE, 2 if from UTF32-LE,
+   0 otherwise.  This is used as a flag in calls to iconv.  */
  
-iconv_t
-iconv_open (const char *to, const char *from)
+static iconv_t
+phony_iconv_open (const char *to, const char *from)
  {
-  /* We allow conversions from UTF-32BE, wchar_t, and the host charset.
-     We allow conversions to wchar_t and the host charset.  */
-  if (strcmp (from, "UTF-32BE") && strcmp (from, "wchar_t")
-      && strcmp (from, GDB_DEFAULT_HOST_CHARSET))
-    return -1;
    if (strcmp (to, "wchar_t") && strcmp (to, GDB_DEFAULT_HOST_CHARSET))
      return -1;
  
-  /* Return 1 if we are converting from UTF-32BE, 0 otherwise.  This is
-     used as a flag in calls to iconv.  */
-  return !strcmp (from, "UTF-32BE");
+  if (!strcmp (from, "UTF-32BE") || !strcmp (from, "UTF-32"))
+    return 1;
+
+  if (!strcmp (from, "UTF-32LE"))
+    return 2;
+
+  if (strcmp (from, "wchar_t") && strcmp (from, GDB_DEFAULT_HOST_CHARSET))
+    return -1;
+
+  return 0;
  }
  
-int
-iconv_close (iconv_t arg)
+static int
+phony_iconv_close (iconv_t arg)
  {
    return 0;
  }
  
-size_t
-iconv (iconv_t utf_flag, const char **inbuf, size_t *inbytesleft,
-       char **outbuf, size_t *outbytesleft)
+static size_t
+phony_iconv (iconv_t utf_flag, const char **inbuf, size_t *inbytesleft,
+            char **outbuf, size_t *outbytesleft)
  {
    if (utf_flag)
      {
+      enum bfd_endian endian
+       = utf_flag == 1 ? BFD_ENDIAN_BIG : BFD_ENDIAN_LITTLE;
        while (*inbytesleft >= 4)
         {
-         size_t j;
-         unsigned long c = 0;
-
-         for (j = 0; j < 4; ++j)
-           {
-             c <<= 8;
-             c += (*inbuf)[j] & 0xff;
-           }
+         unsigned long c
+           = extract_unsigned_integer ((const gdb_byte *)*inbuf, 4, endian);
  
           if (c >= 256)
             {
               errno = EILSEQ;
               return -1;
             }
+         if (*outbytesleft < 1)
+           {
+             errno = E2BIG;
+             return -1;
+           }
           **outbuf = c & 0xff;
           ++*outbuf;
           --*outbytesleft;
  
-         ++*inbuf;
+         *inbuf += 4;
           *inbytesleft -= 4;
         }
-      if (*inbytesleft < 4)
+      if (*inbytesleft)
         {
+         /* Partial sequence on input.  */
           errno = EINVAL;
           return -1;
         }
@@ -161,6 +169,7 @@ iconv (iconv_t utf_flag, const char **inbuf, size_t *inbytesleft,
        /* In all other cases we simply copy input bytes to the
          output.  */
        size_t amt = *inbytesleft;
+
        if (amt > *outbytesleft)
         amt = *outbytesleft;
        memcpy (*outbuf, *inbuf, amt);
@@ -168,12 +177,11 @@ iconv (iconv_t utf_flag, const char **inbuf, size_t *inbytesleft,
        *outbuf += amt;
        *inbytesleft -= amt;
        *outbytesleft -= amt;
-    }
-
-  if (*inbytesleft)
-    {
-      errno = E2BIG;
-      return -1;
+      if (*inbytesleft)
+       {
+         errno = E2BIG;
+         return -1;
+       }
      }
  
    /* The number of non-reversible conversions -- but they were all
@@ -181,8 +189,28 @@ iconv (iconv_t utf_flag, const char **inbuf, size_t *inbytesleft,
    return 0;
  }
  
-#endif
+#else /* PHONY_ICONV */
+
+/* On systems that don't have EILSEQ, GNU iconv's iconv.h defines it
+   to ENOENT, while gnulib defines it to a different value.  Always
+   map ENOENT to gnulib's EILSEQ, leaving callers agnostic.  */
  
+static size_t
+gdb_iconv (iconv_t utf_flag, ICONV_CONST char **inbuf, size_t *inbytesleft,
+          char **outbuf, size_t *outbytesleft)
+{
+  size_t ret;
+
+  ret = iconv (utf_flag, inbuf, inbytesleft, outbuf, outbytesleft);
+  if (errno == ENOENT)
+    errno = EILSEQ;
+  return ret;
+}
+
+#undef iconv
+#define iconv gdb_iconv
+
+#endif /* PHONY_ICONV */
  
  \f
  /* The global lists of character sets and translations.  */
@@ -211,22 +239,36 @@ show_host_charset_name (struct ui_file *file, int from_tty,
      fprintf_filtered (file, _("The host character set is \"%s\".\n"), value);
  }
  
-static const char *target_charset_name = GDB_DEFAULT_TARGET_CHARSET;
+static const char *target_charset_name = "auto";
  static void
  show_target_charset_name (struct ui_file *file, int from_tty,
                           struct cmd_list_element *c, const char *value)
  {
-  fprintf_filtered (file, _("The target character set is \"%s\".\n"),
-                   value);
+  if (!strcmp (value, "auto"))
+    fprintf_filtered (file,
+                     _("The target character set is \"auto; "
+                       "currently %s\".\n"),
+                     gdbarch_auto_charset (get_current_arch ()));
+  else
+    fprintf_filtered (file, _("The target character set is \"%s\".\n"),
+                     value);
  }
  
-static const char *target_wide_charset_name = GDB_DEFAULT_TARGET_WIDE_CHARSET;
+static const char *target_wide_charset_name = "auto";
  static void
-show_target_wide_charset_name (struct ui_file *file, int from_tty,
-                              struct cmd_list_element *c, const char *value)
+show_target_wide_charset_name (struct ui_file *file, 
+                              int from_tty,
+                              struct cmd_list_element *c, 
+                              const char *value)
  {
-  fprintf_filtered (file, _("The target wide character set is \"%s\".\n"),
-                   value);
+  if (!strcmp (value, "auto"))
+    fprintf_filtered (file,
+                     _("The target wide character set is \"auto; "
+                       "currently %s\".\n"),
+                     gdbarch_auto_wide_charset (get_current_arch ()));
+  else
+    fprintf_filtered (file, _("The target wide character set is \"%s\".\n"),
+                     value);
  }
  
  static const char *default_charset_names[] =
@@ -243,21 +285,38 @@ static const char **charset_enum;
  static const char *target_wide_charset_be_name;
  static const char *target_wide_charset_le_name;
  
-/* A helper function for validate which sets the target wide big- and
-   little-endian character set names, if possible.  */
+/* The architecture for which the BE- and LE-names are valid.  */
+static struct gdbarch *be_le_arch;
+
+/* A helper function which sets the target wide big- and little-endian
+   character set names, if possible.  */
  
  static void
-set_be_le_names (void)
+set_be_le_names (struct gdbarch *gdbarch)
  {
+  if (be_le_arch == gdbarch)
+    return;
+  be_le_arch = gdbarch;
+
+#ifdef PHONY_ICONV
+  /* Match the wide charset names recognized by phony_iconv_open.  */
+  target_wide_charset_le_name = "UTF-32LE";
+  target_wide_charset_be_name = "UTF-32BE";
+#else
    int i, len;
+  const char *target_wide;
  
    target_wide_charset_le_name = NULL;
    target_wide_charset_be_name = NULL;
  
-  len = strlen (target_wide_charset_name);
+  target_wide = target_wide_charset_name;
+  if (!strcmp (target_wide, "auto"))
+    target_wide = gdbarch_auto_wide_charset (gdbarch);
+
+  len = strlen (target_wide);
    for (i = 0; charset_enum[i]; ++i)
      {
-      if (strncmp (target_wide_charset_name, charset_enum[i], len))
+      if (strncmp (target_wide, charset_enum[i], len))
         continue;
        if ((charset_enum[i][len] == 'B'
            || charset_enum[i][len] == 'L')
@@ -270,74 +329,84 @@ set_be_le_names (void)
             target_wide_charset_le_name = charset_enum[i];
         }
      }
+# endif  /* PHONY_ICONV */
  }
  
  /* 'Set charset', 'set host-charset', 'set target-charset', 'set
     target-wide-charset', 'set charset' sfunc's.  */
  
  static void
-validate (void)
+validate (struct gdbarch *gdbarch)
  {
    iconv_t desc;
    const char *host_cset = host_charset ();
+  const char *target_cset = target_charset (gdbarch);
+  const char *target_wide_cset = target_wide_charset_name;
+
+  if (!strcmp (target_wide_cset, "auto"))
+    target_wide_cset = gdbarch_auto_wide_charset (gdbarch);
  
-  desc = iconv_open (target_wide_charset_name, host_cset);
+  desc = iconv_open (target_wide_cset, host_cset);
    if (desc == (iconv_t) -1)
-    error ("Cannot convert between character sets `%s' and `%s'",
-          target_wide_charset_name, host_cset);
+    error (_("Cannot convert between character sets `%s' and `%s'"),
+          target_wide_cset, host_cset);
    iconv_close (desc);
  
-  desc = iconv_open (target_charset_name, host_cset);
+  desc = iconv_open (target_cset, host_cset);
    if (desc == (iconv_t) -1)
-    error ("Cannot convert between character sets `%s' and `%s'",
-          target_charset_name, host_cset);
+    error (_("Cannot convert between character sets `%s' and `%s'"),
+          target_cset, host_cset);
    iconv_close (desc);
  
-  set_be_le_names ();
+  /* Clear the cache.  */
+  be_le_arch = NULL;
  }
  
  /* This is the sfunc for the 'set charset' command.  */
  static void
-set_charset_sfunc (char *charset, int from_tty, struct cmd_list_element *c)
+set_charset_sfunc (const char *charset, int from_tty, 
+                  struct cmd_list_element *c)
  {
-  /* CAREFUL: set the target charset here as well. */
+  /* CAREFUL: set the target charset here as well.  */
    target_charset_name = host_charset_name;
-  validate ();
+  validate (get_current_arch ());
  }
  
  /* 'set host-charset' command sfunc.  We need a wrapper here because
     the function needs to have a specific signature.  */
  static void
-set_host_charset_sfunc (char *charset, int from_tty,
+set_host_charset_sfunc (const char *charset, int from_tty,
                         struct cmd_list_element *c)
  {
-  validate ();
+  validate (get_current_arch ());
  }
  
  /* Wrapper for the 'set target-charset' command.  */
  static void
-set_target_charset_sfunc (char *charset, int from_tty,
+set_target_charset_sfunc (const char *charset, int from_tty,
                           struct cmd_list_element *c)
  {
-  validate ();
+  validate (get_current_arch ());
  }
  
  /* Wrapper for the 'set target-wide-charset' command.  */
  static void
-set_target_wide_charset_sfunc (char *charset, int from_tty,
+set_target_wide_charset_sfunc (const char *charset, int from_tty,
                                struct cmd_list_element *c)
  {
-  validate ();
+  validate (get_current_arch ());
  }
  
  /* sfunc for the 'show charset' command.  */
  static void
-show_charset (struct ui_file *file, int from_tty, struct cmd_list_element *c,
+show_charset (struct ui_file *file, int from_tty, 
+             struct cmd_list_element *c,
               const char *name)
  {
    show_host_charset_name (file, from_tty, c, host_charset_name);
    show_target_charset_name (file, from_tty, c, target_charset_name);
-  show_target_wide_charset_name (file, from_tty, c, target_wide_charset_name);
+  show_target_wide_charset_name (file, from_tty, c, 
+                                target_wide_charset_name);
  }
  
  \f
@@ -352,14 +421,19 @@ host_charset (void)
  }
  
  const char *
-target_charset (void)
+target_charset (struct gdbarch *gdbarch)
  {
+  if (!strcmp (target_charset_name, "auto"))
+    return gdbarch_auto_charset (gdbarch);
    return target_charset_name;
  }
  
  const char *
-target_wide_charset (enum bfd_endian byte_order)
+target_wide_charset (struct gdbarch *gdbarch)
  {
+  enum bfd_endian byte_order = gdbarch_byte_order (gdbarch);
+
+  set_be_le_names (gdbarch);
    if (byte_order == BFD_ENDIAN_BIG)
      {
        if (target_wide_charset_be_name)
@@ -371,6 +445,9 @@ target_wide_charset (enum bfd_endian byte_order)
         return target_wide_charset_le_name;
      }
  
+  if (!strcmp (target_wide_charset_name, "auto"))
+    return gdbarch_auto_wide_charset (gdbarch);
+
    return target_wide_charset_name;
  }
  
@@ -403,14 +480,32 @@ host_hex_value (char c)
  \f
  /* Public character management functions.  */
  
-/* A cleanup function which is run to close an iconv descriptor.  */
-
-static void
-cleanup_iconv (void *p)
+class iconv_wrapper
  {
-  iconv_t *descp = p;
-  iconv_close (*descp);
-}
+public:
+
+  iconv_wrapper (const char *to, const char *from)
+  {
+    m_desc = iconv_open (to, from);
+    if (m_desc == (iconv_t) -1)
+      perror_with_name (_("Converting character sets"));
+  }
+
+  ~iconv_wrapper ()
+  {
+    iconv_close (m_desc);
+  }
+
+  size_t convert (ICONV_CONST char **inp, size_t *inleft, char **outp,
+                 size_t *outleft)
+  {
+    return iconv (m_desc, inp, inleft, outp, outleft);
+  }
+
+private:
+
+  iconv_t m_desc;
+};
  
  void
  convert_between_encodings (const char *from, const char *to,
@@ -418,10 +513,8 @@ convert_between_encodings (const char *from, const char *to,
                            int width, struct obstack *output,
                            enum transliterations translit)
  {
-  iconv_t desc;
-  struct cleanup *cleanups;
    size_t inleft;
-  char *inp;
+  ICONV_CONST char *inp;
    unsigned int space_request;
  
    /* Often, the host and target charsets will be the same.  */
@@ -431,13 +524,10 @@ convert_between_encodings (const char *from, const char *to,
        return;
      }
  
-  desc = iconv_open (to, from);
-  if (desc == (iconv_t) -1)
-    perror_with_name ("Converting character sets");
-  cleanups = make_cleanup (cleanup_iconv, &desc);
+  iconv_wrapper desc (to, from);
  
    inleft = num_bytes;
-  inp = (char *) bytes;
+  inp = (ICONV_CONST char *) bytes;
  
    space_request = num_bytes;
  
@@ -450,14 +540,14 @@ convert_between_encodings (const char *from, const char *to,
        old_size = obstack_object_size (output);
        obstack_blank (output, space_request);
  
-      outp = obstack_base (output) + old_size;
+      outp = (char *) obstack_base (output) + old_size;
        outleft = space_request;
  
-      r = iconv (desc, (ICONV_CONST char **) &inp, &inleft, &outp, &outleft);
+      r = desc.convert (&inp, &inleft, &outp, &outleft);
  
        /* Now make sure that the object on the obstack only includes
          bytes we have converted.  */
-      obstack_blank (output, - (int) outleft);
+      obstack_blank_fast (output, -(ssize_t) outleft);
  
        if (r == (size_t) -1)
         {
@@ -469,8 +559,8 @@ convert_between_encodings (const char *from, const char *to,
  
                 /* Invalid input sequence.  */
                 if (translit == translit_none)
-                 error (_("Could not convert character to `%s' character set"),
-                        to);
+                 error (_("Could not convert character "
+                          "to `%s' character set"), to);
  
                 /* We emit escape sequence for the bytes, skip them,
                    and try again.  */
@@ -478,7 +568,7 @@ convert_between_encodings (const char *from, const char *to,
                   {
                     char octal[5];
  
-                   sprintf (octal, "\\%.3o", *inp & 0xff);
+                   xsnprintf (octal, sizeof (octal), "\\%.3o", *inp & 0xff);
                     obstack_grow_str (output, octal);
  
                     ++inp;
@@ -500,81 +590,39 @@ convert_between_encodings (const char *from, const char *to,
               break;
  
             default:
-             perror_with_name ("Internal error while converting character sets");
+             perror_with_name (_("Internal error while "
+                                 "converting character sets"));
             }
         }
      }
-
-  do_cleanups (cleanups);
  }
  
  \f
  
-/* An iterator that returns host wchar_t's from a target string.  */
-struct wchar_iterator
-{
-  /* The underlying iconv descriptor.  */
-  iconv_t desc;
-
-  /* The input string.  This is updated as convert characters.  */
-  char *input;
-  /* The number of bytes remaining in the input.  */
-  size_t bytes;
-
-  /* The width of an input character.  */
-  size_t width;
-
-  /* The output buffer and its size.  */
-  gdb_wchar_t *out;
-  size_t out_size;
-};
-
  /* Create a new iterator.  */
-struct wchar_iterator *
-make_wchar_iterator (const gdb_byte *input, size_t bytes, const char *charset,
-                    size_t width)
-{
-  struct wchar_iterator *result;
-  iconv_t desc;
-
-  desc = iconv_open (INTERMEDIATE_ENCODING, charset);
-  if (desc == (iconv_t) -1)
-    perror_with_name ("Converting character sets");
-
-  result = XNEW (struct wchar_iterator);
-  result->desc = desc;
-  result->input = (char *) input;
-  result->bytes = bytes;
-  result->width = width;
-
-  result->out = XNEW (gdb_wchar_t);
-  result->out_size = 1;
-
-  return result;
-}
-
-static void
-do_cleanup_iterator (void *p)
+wchar_iterator::wchar_iterator (const gdb_byte *input, size_t bytes, 
+                               const char *charset, size_t width)
+: m_input (input),
+  m_bytes (bytes),
+  m_width (width),
+  m_out (1)
  {
-  struct wchar_iterator *iter = p;
-
-  iconv_close (iter->desc);
-  xfree (iter->out);
-  xfree (iter);
+  m_desc = iconv_open (INTERMEDIATE_ENCODING, charset);
+  if (m_desc == (iconv_t) -1)
+    perror_with_name (_("Converting character sets"));
  }
  
-struct cleanup *
-make_cleanup_wchar_iterator (struct wchar_iterator *iter)
+wchar_iterator::~wchar_iterator ()
  {
-  return make_cleanup (do_cleanup_iterator, iter);
+  if (m_desc != (iconv_t) -1)
+    iconv_close (m_desc);
  }
  
  int
-wchar_iterate (struct wchar_iterator *iter,
-              enum wchar_iterate_result *out_result,
-              gdb_wchar_t **out_chars,
-              const gdb_byte **ptr,
-              size_t *len)
+wchar_iterator::iterate (enum wchar_iterate_result *out_result,
+                        gdb_wchar_t **out_chars,
+                        const gdb_byte **ptr,
+                        size_t *len)
  {
    size_t out_request;
  
@@ -584,30 +632,35 @@ wchar_iterate (struct wchar_iterator *iter,
       invalid input sequence -- but we want to reliably report this to
       our caller so it can emit an escape sequence.  */
    out_request = 1;
-  while (iter->bytes > 0)
+  while (m_bytes > 0)
      {
-      char *outptr = (char *) &iter->out[0];
-      char *orig_inptr = iter->input;
-      size_t orig_in = iter->bytes;
+      ICONV_CONST char *inptr = (ICONV_CONST char *) m_input;
+      char *outptr = (char *) m_out.data ();
+      const gdb_byte *orig_inptr = m_input;
+      size_t orig_in = m_bytes;
        size_t out_avail = out_request * sizeof (gdb_wchar_t);
        size_t num;
-      gdb_wchar_t result;
+      size_t r = iconv (m_desc, &inptr, &m_bytes, &outptr, &out_avail);
+
+      m_input = (gdb_byte *) inptr;
  
-      size_t r = iconv (iter->desc,
-                       (ICONV_CONST char **) &iter->input, &iter->bytes,
-                       &outptr, &out_avail);
        if (r == (size_t) -1)
         {
           switch (errno)
             {
             case EILSEQ:
-             /* Invalid input sequence.  Skip it, and let the caller
-                know about it.  */
+             /* Invalid input sequence.  We still might have
+                converted a character; if so, return it.  */
+             if (out_avail < out_request * sizeof (gdb_wchar_t))
+               break;
+             
+             /* Otherwise skip the first invalid character, and let
+                the caller know about it.  */
               *out_result = wchar_iterate_invalid;
-             *ptr = iter->input;
-             *len = iter->width;
-             iter->input += iter->width;
-             iter->bytes -= iter->width;
+             *ptr = m_input;
+             *len = m_width;
+             m_input += m_width;
+             m_bytes -= m_width;
               return 0;
  
             case E2BIG:
@@ -618,34 +671,31 @@ wchar_iterate (struct wchar_iterator *iter,
                 break;
  
               ++out_request;
-             if (out_request > iter->out_size)
-               {
-                 iter->out_size = out_request;
-                 iter->out = xrealloc (iter->out,
-                                       out_request * sizeof (gdb_wchar_t));
-               }
+             if (out_request > m_out.size ())
+               m_out.resize (out_request);
               continue;
  
             case EINVAL:
               /* Incomplete input sequence.  Let the caller know, and
                  arrange for future calls to see EOF.  */
               *out_result = wchar_iterate_incomplete;
-             *ptr = iter->input;
-             *len = iter->bytes;
-             iter->bytes = 0;
+             *ptr = m_input;
+             *len = m_bytes;
+             m_bytes = 0;
               return 0;
  
             default:
-             perror_with_name ("Internal error while converting character sets");
+             perror_with_name (_("Internal error while "
+                                 "converting character sets"));
             }
         }
  
        /* We converted something.  */
        num = out_request - out_avail / sizeof (gdb_wchar_t);
        *out_result = wchar_iterate_ok;
-      *out_chars = iter->out;
+      *out_chars = m_out.data ();
        *ptr = orig_inptr;
-      *len = orig_in - iter->bytes;
+      *len = orig_in - m_bytes;
        return num;
      }
  
@@ -654,23 +704,33 @@ wchar_iterate (struct wchar_iterator *iter,
    return -1;
  }
  
-\f
-/* The charset.c module initialization function.  */
+struct charset_vector
+{
+  ~charset_vector ()
+  {
+    clear ();
+  }
  
-extern initialize_file_ftype _initialize_charset; /* -Wmissing-prototype */
+  void clear ()
+  {
+    for (char *c : charsets)
+      xfree (c);
  
-typedef char *char_ptr;
-DEF_VEC_P (char_ptr);
+    charsets.clear ();
+  }
  
-static VEC (char_ptr) *charsets;
+  std::vector<char *> charsets;
+};
+
+static charset_vector charsets;
  
  #ifdef PHONY_ICONV
  
  static void
  find_charset_names (void)
  {
-  VEC_safe_push (char_ptr, charsets, GDB_DEFAULT_HOST_CHARSET);
-  VEC_safe_push (char_ptr, charsets, NULL);
+  charsets.charsets.push_back (xstrdup (GDB_DEFAULT_HOST_CHARSET));
+  charsets.charsets.push_back (NULL);
  }
  
  #else /* PHONY_ICONV */
@@ -691,7 +751,7 @@ add_one (unsigned int count, const char *const *names, void *data)
    unsigned int i;
  
    for (i = 0; i < count; ++i)
-    VEC_safe_push (char_ptr, charsets, xstrdup (names[i]));
+    charsets.charsets.push_back (xstrdup (names[i]));
  
    return 0;
  }
@@ -700,39 +760,95 @@ static void
  find_charset_names (void)
  {
    iconvlist (add_one, NULL);
-  VEC_safe_push (char_ptr, charsets, NULL);
+
+  charsets.charsets.push_back (NULL);
  }
  
  #else
  
+/* Return non-zero if LINE (output from iconv) should be ignored.
+   Older iconv programs (e.g. 2.2.2) include the human readable
+   introduction even when stdout is not a tty.  Newer versions omit
+   the intro if stdout is not a tty.  */
+
+static int
+ignore_line_p (const char *line)
+{
+  /* This table is used to filter the output.  If this text appears
+     anywhere in the line, it is ignored (strstr is used).  */
+  static const char * const ignore_lines[] =
+    {
+      "The following",
+      "not necessarily",
+      "the FROM and TO",
+      "listed with several",
+      NULL
+    };
+  int i;
+
+  for (i = 0; ignore_lines[i] != NULL; ++i)
+    {
+      if (strstr (line, ignore_lines[i]) != NULL)
+       return 1;
+    }
+
+  return 0;
+}
+
  static void
  find_charset_names (void)
  {
    struct pex_obj *child;
-  char *args[3];
+  const char *args[3];
    int err, status;
    int fail = 1;
-
-  child = pex_init (0, "iconv", NULL);
-
-  args[0] = "iconv";
+  int flags;
+  gdb_environ iconv_env = gdb_environ::from_host_environ ();
+  char *iconv_program;
+
+  /* Older iconvs, e.g. 2.2.2, don't omit the intro text if stdout is
+     not a tty.  We need to recognize it and ignore it.  This text is
+     subject to translation, so force LANGUAGE=C.  */
+  iconv_env.set ("LANGUAGE", "C");
+  iconv_env.set ("LC_ALL", "C");
+
+  child = pex_init (PEX_USE_PIPES, "iconv", NULL);
+
+#ifdef ICONV_BIN
+  {
+    std::string iconv_dir = relocate_gdb_directory (ICONV_BIN,
+                                                   ICONV_BIN_RELOCATABLE);
+    iconv_program
+      = concat (iconv_dir.c_str(), SLASH_STRING, "iconv", (char *) NULL);
+  }
+#else
+  iconv_program = xstrdup ("iconv");
+#endif
+  args[0] = iconv_program;
    args[1] = "-l";
    args[2] = NULL;
+  flags = PEX_STDERR_TO_STDOUT;
+#ifndef ICONV_BIN
+  flags |= PEX_SEARCH;
+#endif
    /* Note that we simply ignore errors here.  */
-  if (!pex_run (child, PEX_SEARCH | PEX_STDERR_TO_STDOUT, "iconv",
-               args, NULL, NULL, &err))
+  if (!pex_run_in_environment (child, flags,
+                              args[0], const_cast<char **> (args),
+                              iconv_env.envp (),
+                              NULL, NULL, &err))
      {
        FILE *in = pex_read_output (child, 0);
  
        /* POSIX says that iconv -l uses an unspecified format.  We
          parse the glibc and libiconv formats; feel free to add others
          as needed.  */
-      while (!feof (in))
+
+      while (in != NULL && !feof (in))
         {
           /* The size of buf is chosen arbitrarily.  */
           char buf[1024];
           char *start, *r;
-         int len, keep_going;
+         int len;
  
           r = fgets (buf, sizeof (buf), in);
           if (!r)
@@ -740,6 +856,9 @@ find_charset_names (void)
           len = strlen (r);
           if (len <= 3)
             continue;
+         if (ignore_line_p (r))
+           continue;
+
           /* Strip off the newline.  */
           --len;
           /* Strip off one or two '/'s.  glibc will print lines like
@@ -751,22 +870,28 @@ find_charset_names (void)
           buf[len] = '\0';
  
           /* libiconv will print multiple entries per line, separated
-            by spaces.  */
+            by spaces.  Older iconvs will print multiple entries per
+            line, indented by two spaces, and separated by ", "
+            (i.e. the human readable form).  */
           start = buf;
           while (1)
             {
               int keep_going;
               char *p;
  
-             /* Find the next space, or end-of-line.  */
-             for (p = start; *p && *p != ' '; ++p)
+             /* Skip leading blanks.  */
+             for (p = start; *p && *p == ' '; ++p)
+               ;
+             start = p;
+             /* Find the next space, comma, or end-of-line.  */
+             for ( ; *p && *p != ' ' && *p != ','; ++p)
                 ;
               /* Ignore an empty result.  */
               if (p == start)
                 break;
               keep_going = *p;
               *p = '\0';
-             VEC_safe_push (char_ptr, charsets, xstrdup (start));
+             charsets.charsets.push_back (xstrdup (start));
               if (!keep_going)
                 break;
               /* Skip any extra spaces.  */
@@ -781,55 +906,135 @@ find_charset_names (void)
  
      }
  
+  xfree (iconv_program);
    pex_free (child);
  
    if (fail)
      {
        /* Some error occurred, so drop the vector.  */
-      int ix;
-      char *elt;
-      for (ix = 0; VEC_iterate (char_ptr, charsets, ix, elt); ++ix)
-       xfree (elt);
-      VEC_truncate (char_ptr, charsets, 0);
+      charsets.clear ();
      }
    else
-    VEC_safe_push (char_ptr, charsets, NULL);
+    charsets.charsets.push_back (NULL);
  }
  
  #endif /* HAVE_ICONVLIST || HAVE_LIBICONVLIST */
  #endif /* PHONY_ICONV */
  
-void
-_initialize_charset (void)
+/* The "auto" target charset used by default_auto_charset.  */
+static const char *auto_target_charset_name = GDB_DEFAULT_TARGET_CHARSET;
+
+const char *
+default_auto_charset (void)
+{
+  return auto_target_charset_name;
+}
+
+const char *
+default_auto_wide_charset (void)
+{
+  return GDB_DEFAULT_TARGET_WIDE_CHARSET;
+}
+
+
+#ifdef USE_INTERMEDIATE_ENCODING_FUNCTION
+/* Macro used for UTF or UCS endianness suffix.  */
+#if WORDS_BIGENDIAN
+#define ENDIAN_SUFFIX "BE"
+#else
+#define ENDIAN_SUFFIX "LE"
+#endif
+
+/* GDB cannot handle strings correctly if this size is different.  */
+
+gdb_static_assert (sizeof (gdb_wchar_t) == 2 || sizeof (gdb_wchar_t) == 4);
+
+/* intermediate_encoding returns the charset used internally by
+   GDB to convert between target and host encodings. As the test above
+   compiled, sizeof (gdb_wchar_t) is either 2 or 4 bytes.
+   UTF-16/32 is tested first, UCS-2/4 is tested as a second option,
+   otherwise an error is generated.  */
+
+const char *
+intermediate_encoding (void)
  {
-  struct cmd_list_element *new_cmd;
+  iconv_t desc;
+  static const char *stored_result = NULL;
+  char *result;
+
+  if (stored_result)
+    return stored_result;
+  result = xstrprintf ("UTF-%d%s", (int) (sizeof (gdb_wchar_t) * 8),
+                      ENDIAN_SUFFIX);
+  /* Check that the name is supported by iconv_open.  */
+  desc = iconv_open (result, host_charset ());
+  if (desc != (iconv_t) -1)
+    {
+      iconv_close (desc);
+      stored_result = result;
+      return result;
+    }
+  /* Not valid, free the allocated memory.  */
+  xfree (result);
+  /* Second try, with UCS-2 type.  */
+  result = xstrprintf ("UCS-%d%s", (int) sizeof (gdb_wchar_t),
+                      ENDIAN_SUFFIX);
+  /* Check that the name is supported by iconv_open.  */
+  desc = iconv_open (result, host_charset ());
+  if (desc != (iconv_t) -1)
+    {
+      iconv_close (desc);
+      stored_result = result;
+      return result;
+    }
+  /* Not valid, free the allocated memory.  */
+  xfree (result);
+  /* No valid charset found, generate error here.  */
+  error (_("Unable to find a valid charset for string conversions"));
+}
+
+#endif /* USE_INTERMEDIATE_ENCODING_FUNCTION */
  
-  /* The first element is always "auto"; then we skip it for the
-     commands where it is not allowed.  */
-  VEC_safe_push (char_ptr, charsets, xstrdup ("auto"));
+void _initialize_charset ();
+void
+_initialize_charset ()
+{
+  /* The first element is always "auto".  */
+  charsets.charsets.push_back (xstrdup ("auto"));
    find_charset_names ();
  
-  if (VEC_length (char_ptr, charsets) > 1)
-    charset_enum = (const char **) VEC_address (char_ptr, charsets);
+  if (charsets.charsets.size () > 1)
+    charset_enum = (const char **) charsets.charsets.data ();
    else
      charset_enum = default_charset_names;
  
  #ifndef PHONY_ICONV
  #ifdef HAVE_LANGINFO_CODESET
-  auto_host_charset_name = nl_langinfo (CODESET);
-  /* Solaris will return `646' here -- but the Solaris iconv then
-     does not accept this.  Darwin (and maybe FreeBSD) may return "" here,
+  /* The result of nl_langinfo may be overwritten later.  This may
+     leak a little memory, if the user later changes the host charset,
+     but that doesn't matter much.  */
+  auto_host_charset_name = xstrdup (nl_langinfo (CODESET));
+  /* Solaris will return `646' here -- but the Solaris iconv then does
+     not accept this.  Darwin (and maybe FreeBSD) may return "" here,
       which GNU libiconv doesn't like (infinite loop).  */
    if (!strcmp (auto_host_charset_name, "646") || !*auto_host_charset_name)
      auto_host_charset_name = "ASCII";
-  target_charset_name = auto_host_charset_name;
-
-  set_be_le_names ();
+  auto_target_charset_name = auto_host_charset_name;
+#elif defined (USE_WIN32API)
+  {
+    /* "CP" + x<=5 digits + paranoia.  */
+    static char w32_host_default_charset[16];
+
+    snprintf (w32_host_default_charset, sizeof w32_host_default_charset,
+             "CP%d", GetACP());
+    auto_host_charset_name = w32_host_default_charset;
+    auto_target_charset_name = auto_host_charset_name;
+  }
  #endif
  #endif
  
    add_setshow_enum_cmd ("charset", class_support,
-                       &charset_enum[1], &host_charset_name, _("\
+                       charset_enum, &host_charset_name, _("\
  Set the host and target character sets."), _("\
  Show the host and target character sets."), _("\
  The `host character set' is the one used by the system GDB is running on.\n\
@@ -857,7 +1062,7 @@ To see a list of the character sets GDB supports, type `set host-charset <TAB>'.
                         &setlist, &showlist);
  
    add_setshow_enum_cmd ("target-charset", class_support,
-                       &charset_enum[1], &target_charset_name, _("\
+                       charset_enum, &target_charset_name, _("\
  Set the target character set."), _("\
  Show the target character set."), _("\
  The `target character set' is the one used by the program being debugged.\n\
@@ -869,12 +1074,12 @@ To see a list of the character sets GDB supports, type `set target-charset'<TAB>
                         &setlist, &showlist);
  
    add_setshow_enum_cmd ("target-wide-charset", class_support,
-                       &charset_enum[1], &target_wide_charset_name,
+                       charset_enum, &target_wide_charset_name,
                         _("\
  Set the target wide character set."), _("\
  Show the target wide character set."), _("\
-The `target wide character set' is the one used by the program being debugged.\n\
-In particular it is the encoding used by `wchar_t'.\n\
+The `target wide character set' is the one used by the program being debugged.\
+\nIn particular it is the encoding used by `wchar_t'.\n\
  GDB translates characters and strings between the host and target\n\
  character sets as needed.\n\
  To see a list of the character sets GDB supports, type\n\