Commit | Line | Data |
---|---|---|
234b45d4 | 1 | /* Character set conversion support for GDB. |
28e7fd62 | 2 | Copyright (C) 2001-2013 Free Software Foundation, Inc. |
234b45d4 KB |
3 | |
4 | This file is part of GDB. | |
5 | ||
6 | This program is free software; you can redistribute it and/or modify | |
7 | it under the terms of the GNU General Public License as published by | |
a9762ec7 | 8 | the Free Software Foundation; either version 3 of the License, or |
234b45d4 KB |
9 | (at your option) any later version. |
10 | ||
11 | This program is distributed in the hope that it will be useful, | |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | GNU General Public License for more details. | |
15 | ||
16 | You should have received a copy of the GNU General Public License | |
a9762ec7 | 17 | along with this program. If not, see <http://www.gnu.org/licenses/>. */ |
234b45d4 KB |
18 | |
19 | #ifndef CHARSET_H | |
20 | #define CHARSET_H | |
21 | ||
234b45d4 KB |
22 | /* If the target program uses a different character set than the host, |
23 | GDB has some support for translating between the two; GDB converts | |
24 | characters and strings to the host character set before displaying | |
25 | them, and converts characters and strings appearing in expressions | |
26 | entered by the user to the target character set. | |
27 | ||
6c7a06a3 TT |
28 | GDB's code pretty much assumes that the host character set is some |
29 | superset of ASCII; there are plenty if ('0' + n) expressions and | |
30 | the like. */ | |
234b45d4 | 31 | |
234b45d4 KB |
32 | /* Return the name of the current host/target character set. The |
33 | result is owned by the charset module; the caller should not free | |
34 | it. */ | |
35 | const char *host_charset (void); | |
f870a310 TT |
36 | const char *target_charset (struct gdbarch *gdbarch); |
37 | const char *target_wide_charset (struct gdbarch *gdbarch); | |
6c7a06a3 TT |
38 | |
39 | /* These values are used to specify the type of transliteration done | |
40 | by convert_between_encodings. */ | |
41 | enum transliterations | |
42 | { | |
43 | /* Error on failure to convert. */ | |
44 | translit_none, | |
45 | /* Transliterate to host char. */ | |
46 | translit_char | |
47 | }; | |
48 | ||
49 | /* Convert between two encodings. | |
50 | ||
51 | FROM is the name of the source encoding. | |
52 | TO is the name of the target encoding. | |
53 | BYTES holds the bytes to convert; this is assumed to be characters | |
54 | in the target encoding. | |
55 | NUM_BYTES is the number of bytes. | |
56 | WIDTH is the width of a character from the FROM charset, in bytes. | |
57 | For a variable width encoding, WIDTH should be the size of a "base | |
58 | character". | |
59 | OUTPUT is an obstack where the converted data is written. The | |
60 | caller is responsible for initializing the obstack, and for | |
61 | destroying the obstack should an error occur. | |
62 | TRANSLIT specifies how invalid conversions should be handled. */ | |
aff410f1 | 63 | |
6c7a06a3 | 64 | void convert_between_encodings (const char *from, const char *to, |
aff410f1 MS |
65 | const gdb_byte *bytes, |
66 | unsigned int num_bytes, | |
6c7a06a3 TT |
67 | int width, struct obstack *output, |
68 | enum transliterations translit); | |
69 | ||
70 | ||
71 | /* These values are used by wchar_iterate to report errors. */ | |
72 | enum wchar_iterate_result | |
73 | { | |
74 | /* Ordinary return. */ | |
75 | wchar_iterate_ok, | |
76 | /* Invalid input sequence. */ | |
77 | wchar_iterate_invalid, | |
78 | /* Incomplete input sequence at the end of the input. */ | |
79 | wchar_iterate_incomplete, | |
80 | /* EOF. */ | |
81 | wchar_iterate_eof | |
82 | }; | |
83 | ||
84 | /* Declaration of the opaque wchar iterator type. */ | |
85 | struct wchar_iterator; | |
86 | ||
87 | /* Create a new character iterator which returns wchar_t's. INPUT is | |
88 | the input buffer. BYTES is the number of bytes in the input | |
89 | buffer. CHARSET is the name of the character set in which INPUT is | |
90 | encoded. WIDTH is the number of bytes in a base character of | |
91 | CHARSET. | |
92 | ||
93 | This function either returns a new character set iterator, or calls | |
94 | error. The result can be freed using a cleanup; see | |
95 | make_cleanup_wchar_iterator. */ | |
aff410f1 MS |
96 | struct wchar_iterator *make_wchar_iterator (const gdb_byte *input, |
97 | size_t bytes, | |
6c7a06a3 TT |
98 | const char *charset, |
99 | size_t width); | |
100 | ||
101 | /* Return a new cleanup suitable for destroying the wchar iterator | |
102 | ITER. */ | |
103 | struct cleanup *make_cleanup_wchar_iterator (struct wchar_iterator *iter); | |
104 | ||
105 | /* Perform a single iteration of a wchar_t iterator. | |
106 | ||
107 | Returns the number of characters converted. A negative result | |
108 | means that EOF has been reached. A positive result indicates the | |
109 | number of valid wchar_ts in the result; *OUT_CHARS is updated to | |
110 | point to the first valid character. | |
111 | ||
112 | In all cases aside from EOF, *PTR is set to point to the first | |
113 | converted target byte. *LEN is set to the number of bytes | |
114 | converted. | |
115 | ||
116 | A zero result means one of several unusual results. *OUT_RESULT is | |
117 | set to indicate the type of un-ordinary return. | |
118 | ||
119 | wchar_iterate_invalid means that an invalid input character was | |
120 | seen. The iterator is advanced by WIDTH (the argument to | |
121 | make_wchar_iterator) bytes. | |
122 | ||
123 | wchar_iterate_incomplete means that an incomplete character was | |
124 | seen at the end of the input sequence. | |
125 | ||
126 | wchar_iterate_eof means that all bytes were successfully | |
127 | converted. The other output arguments are not set. */ | |
128 | int wchar_iterate (struct wchar_iterator *iter, | |
129 | enum wchar_iterate_result *out_result, | |
130 | gdb_wchar_t **out_chars, | |
131 | const gdb_byte **ptr, size_t *len); | |
132 | ||
133 | \f | |
134 | ||
135 | /* GDB needs to know a few details of its execution character set. | |
136 | This knowledge is isolated here and in charset.c. */ | |
137 | ||
138 | /* The escape character. */ | |
139 | #define HOST_ESCAPE_CHAR 27 | |
140 | ||
141 | /* Convert a letter, like 'c', to its corresponding control | |
142 | character. */ | |
143 | char host_letter_to_control_character (char c); | |
144 | ||
145 | /* Convert a hex digit character to its numeric value. E.g., 'f' is | |
146 | converted to 15. This function assumes that C is a valid hex | |
147 | digit. Both upper- and lower-case letters are recognized. */ | |
148 | int host_hex_value (char c); | |
234b45d4 KB |
149 | |
150 | #endif /* CHARSET_H */ |