/* Wide characters for gdb
- Copyright (C) 2009 Free Software Foundation, Inc.
+ Copyright (C) 2009-2020 Free Software Foundation, Inc.
This file is part of GDB.
#ifndef GDB_WCHAR_H
#define GDB_WCHAR_H
-/* If this host has wchar_t and if iconv is available (perhaps via GNU
- libiconv), then we arrange to use those. Otherwise, we provide a
- phony iconv which only handles a single character set, and we
+/* We handle three different modes here.
+
+ Capable systems have the full suite: wchar_t support and iconv
+ (perhaps via GNU libiconv). On these machines, full functionality
+ is available. Note that full functionality is dependent on us
+ being able to convert from an arbitrary encoding to wchar_t. In
+ practice this means we look for __STDC_ISO_10646__ (where we know
+ the name of the wchar_t encoding) or GNU libiconv, where we can use
+ "wchar_t".
+
+ DJGPP is known to have libiconv but not wchar_t support. On
+ systems like this, we use the narrow character functions. The full
+ functionality is available to the user, but many characters (those
+ outside the narrow range) will be displayed as escapes.
+
+ Finally, some systems do not have iconv, or are really broken
+ (e.g., Solaris, which almost has all of this working, but where
+ just enough is broken to make it too hard to use). Here we provide
+ a phony iconv which only handles a single character set, and we
provide wrappers for the wchar_t functionality we use. */
-#if defined(HAVE_ICONV) && defined(HAVE_WCHAR_H)
+
+#if defined (HAVE_ICONV)
#include <iconv.h>
+#else
+/* This define is used elsewhere so we don't need to duplicate the
+ same checking logic in multiple places. */
+#define PHONY_ICONV
+#endif
+
#include <wchar.h>
#include <wctype.h>
+/* We use "btowc" as a sentinel to detect functioning wchar_t support.
+ We check for either __STDC_ISO_10646__ or a new-enough libiconv in
+ order to ensure we can convert to and from wchar_t. We choose
+ libiconv version 0x108 because it is the first version with
+ iconvlist. */
+#if defined (HAVE_ICONV) && defined (HAVE_BTOWC) \
+ && (defined (__STDC_ISO_10646__) \
+ || (defined (_LIBICONV_VERSION) && _LIBICONV_VERSION >= 0x108))
+
typedef wchar_t gdb_wchar_t;
typedef wint_t gdb_wint_t;
#define LCST(X) L ## X
+/* If __STDC_ISO_10646__ is defined, then the host wchar_t is UCS-4.
+ We exploit this fact in the hope that there are hosts that define
+ this but which do not support "wchar_t" as an encoding argument to
+ iconv_open. We put the endianness into the encoding name to avoid
+ hosts that emit a BOM when the unadorned name is used. */
+#if defined (__STDC_ISO_10646__)
+#define USE_INTERMEDIATE_ENCODING_FUNCTION
+#define INTERMEDIATE_ENCODING intermediate_encoding ()
+const char *intermediate_encoding (void);
+
+#elif defined (_LIBICONV_VERSION) && _LIBICONV_VERSION >= 0x108
+#define INTERMEDIATE_ENCODING "wchar_t"
#else
+/* This shouldn't happen, because the earlier #if should have filtered
+ out this case. */
+#error "Neither __STDC_ISO_10646__ nor _LIBICONV_VERSION defined"
+#endif
+
+#else
+
+/* If we got here and have wchar_t support, we might be on a system
+ with some problem. So, we just disable everything. */
+#if defined (HAVE_BTOWC)
+#define PHONY_ICONV
+#endif
typedef char gdb_wchar_t;
typedef int gdb_wint_t;
#define LCST(X) X
-/* This define is used elsewhere so we don't need to duplicate the
- same checking logic in multiple places. */
-#define PHONY_ICONV
+/* If we are using the narrow character set, we want to use the host
+ narrow encoding as our intermediate encoding. However, if we are
+ also providing a phony iconv, we might as well just stick with
+ "wchar_t". */
+#ifdef PHONY_ICONV
+#define INTERMEDIATE_ENCODING "wchar_t"
+#else
+#define INTERMEDIATE_ENCODING host_charset ()
+#endif
-#endif /* defined(HAVE_ICONV) && defined(HAVE_WCHAR_H) */
+#endif
#endif /* GDB_WCHAR_H */