| 1 | /* Character set conversion support for GDB. |
| 2 | Copyright (C) 2001-2019 Free Software Foundation, Inc. |
| 3 | |
| 4 | This file is part of GDB. |
| 5 | |
| 6 | This program is free software; you can redistribute it and/or modify |
| 7 | it under the terms of the GNU General Public License as published by |
| 8 | the Free Software Foundation; either version 3 of the License, or |
| 9 | (at your option) any later version. |
| 10 | |
| 11 | This program is distributed in the hope that it will be useful, |
| 12 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 14 | GNU General Public License for more details. |
| 15 | |
| 16 | You should have received a copy of the GNU General Public License |
| 17 | along with this program. If not, see <http://www.gnu.org/licenses/>. */ |
| 18 | |
| 19 | #ifndef CHARSET_H |
| 20 | #define CHARSET_H |
| 21 | |
| 22 | #include "gdbsupport/def-vector.h" |
| 23 | |
| 24 | /* If the target program uses a different character set than the host, |
| 25 | GDB has some support for translating between the two; GDB converts |
| 26 | characters and strings to the host character set before displaying |
| 27 | them, and converts characters and strings appearing in expressions |
| 28 | entered by the user to the target character set. |
| 29 | |
| 30 | GDB's code pretty much assumes that the host character set is some |
| 31 | superset of ASCII; there are plenty if ('0' + n) expressions and |
| 32 | the like. */ |
| 33 | |
| 34 | /* Return the name of the current host/target character set. The |
| 35 | result is owned by the charset module; the caller should not free |
| 36 | it. */ |
| 37 | const char *host_charset (void); |
| 38 | const char *target_charset (struct gdbarch *gdbarch); |
| 39 | const char *target_wide_charset (struct gdbarch *gdbarch); |
| 40 | |
| 41 | /* These values are used to specify the type of transliteration done |
| 42 | by convert_between_encodings. */ |
| 43 | enum transliterations |
| 44 | { |
| 45 | /* Error on failure to convert. */ |
| 46 | translit_none, |
| 47 | /* Transliterate to host char. */ |
| 48 | translit_char |
| 49 | }; |
| 50 | |
| 51 | /* Convert between two encodings. |
| 52 | |
| 53 | FROM is the name of the source encoding. |
| 54 | TO is the name of the target encoding. |
| 55 | BYTES holds the bytes to convert; this is assumed to be characters |
| 56 | in the target encoding. |
| 57 | NUM_BYTES is the number of bytes. |
| 58 | WIDTH is the width of a character from the FROM charset, in bytes. |
| 59 | For a variable width encoding, WIDTH should be the size of a "base |
| 60 | character". |
| 61 | OUTPUT is an obstack where the converted data is written. The |
| 62 | caller is responsible for initializing the obstack, and for |
| 63 | destroying the obstack should an error occur. |
| 64 | TRANSLIT specifies how invalid conversions should be handled. */ |
| 65 | |
| 66 | void convert_between_encodings (const char *from, const char *to, |
| 67 | const gdb_byte *bytes, |
| 68 | unsigned int num_bytes, |
| 69 | int width, struct obstack *output, |
| 70 | enum transliterations translit); |
| 71 | |
| 72 | |
| 73 | /* These values are used by wchar_iterate to report errors. */ |
| 74 | enum wchar_iterate_result |
| 75 | { |
| 76 | /* Ordinary return. */ |
| 77 | wchar_iterate_ok, |
| 78 | /* Invalid input sequence. */ |
| 79 | wchar_iterate_invalid, |
| 80 | /* Incomplete input sequence at the end of the input. */ |
| 81 | wchar_iterate_incomplete, |
| 82 | /* EOF. */ |
| 83 | wchar_iterate_eof |
| 84 | }; |
| 85 | |
| 86 | /* An iterator that returns host wchar_t's from a target string. */ |
| 87 | class wchar_iterator |
| 88 | { |
| 89 | public: |
| 90 | |
| 91 | /* Create a new character iterator which returns wchar_t's. INPUT is |
| 92 | the input buffer. BYTES is the number of bytes in the input |
| 93 | buffer. CHARSET is the name of the character set in which INPUT is |
| 94 | encoded. WIDTH is the number of bytes in a base character of |
| 95 | CHARSET. |
| 96 | |
| 97 | This constructor can throw on error. */ |
| 98 | wchar_iterator (const gdb_byte *input, size_t bytes, const char *charset, |
| 99 | size_t width); |
| 100 | |
| 101 | ~wchar_iterator (); |
| 102 | |
| 103 | /* Perform a single iteration of a wchar_t iterator. |
| 104 | |
| 105 | Returns the number of characters converted. A negative result |
| 106 | means that EOF has been reached. A positive result indicates the |
| 107 | number of valid wchar_ts in the result; *OUT_CHARS is updated to |
| 108 | point to the first valid character. |
| 109 | |
| 110 | In all cases aside from EOF, *PTR is set to point to the first |
| 111 | converted target byte. *LEN is set to the number of bytes |
| 112 | converted. |
| 113 | |
| 114 | A zero result means one of several unusual results. *OUT_RESULT is |
| 115 | set to indicate the type of un-ordinary return. |
| 116 | |
| 117 | wchar_iterate_invalid means that an invalid input character was |
| 118 | seen. The iterator is advanced by WIDTH (the argument to |
| 119 | the wchar_iterator constructor) bytes. |
| 120 | |
| 121 | wchar_iterate_incomplete means that an incomplete character was |
| 122 | seen at the end of the input sequence. |
| 123 | |
| 124 | wchar_iterate_eof means that all bytes were successfully |
| 125 | converted. The other output arguments are not set. */ |
| 126 | int iterate (enum wchar_iterate_result *out_result, gdb_wchar_t **out_chars, |
| 127 | const gdb_byte **ptr, size_t *len); |
| 128 | |
| 129 | private: |
| 130 | |
| 131 | /* The underlying iconv descriptor. */ |
| 132 | #ifdef PHONY_ICONV |
| 133 | int m_desc; |
| 134 | #else |
| 135 | iconv_t m_desc; |
| 136 | #endif |
| 137 | |
| 138 | /* The input string. This is updated as we convert characters. */ |
| 139 | const gdb_byte *m_input; |
| 140 | /* The number of bytes remaining in the input. */ |
| 141 | size_t m_bytes; |
| 142 | |
| 143 | /* The width of an input character. */ |
| 144 | size_t m_width; |
| 145 | |
| 146 | /* The output buffer. */ |
| 147 | gdb::def_vector<gdb_wchar_t> m_out; |
| 148 | }; |
| 149 | |
| 150 | \f |
| 151 | |
| 152 | /* GDB needs to know a few details of its execution character set. |
| 153 | This knowledge is isolated here and in charset.c. */ |
| 154 | |
| 155 | /* The escape character. */ |
| 156 | #define HOST_ESCAPE_CHAR 27 |
| 157 | |
| 158 | /* Convert a letter, like 'c', to its corresponding control |
| 159 | character. */ |
| 160 | char host_letter_to_control_character (char c); |
| 161 | |
| 162 | /* Convert a hex digit character to its numeric value. E.g., 'f' is |
| 163 | converted to 15. This function assumes that C is a valid hex |
| 164 | digit. Both upper- and lower-case letters are recognized. */ |
| 165 | int host_hex_value (char c); |
| 166 | |
| 167 | #endif /* CHARSET_H */ |