Commit | Line | Data |
---|---|---|
234b45d4 KB |
1 | /* Character set conversion support for GDB. |
2 | Copyright 2001 Free Software Foundation, Inc. | |
3 | ||
4 | This file is part of GDB. | |
5 | ||
6 | This program is free software; you can redistribute it and/or modify | |
7 | it under the terms of the GNU General Public License as published by | |
8 | the Free Software Foundation; either version 2 of the License, or | |
9 | (at your option) any later version. | |
10 | ||
11 | This program is distributed in the hope that it will be useful, | |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | GNU General Public License for more details. | |
15 | ||
16 | You should have received a copy of the GNU General Public License | |
17 | along with this program; if not, write to the Free Software | |
18 | Foundation, Inc., 59 Temple Place - Suite 330, | |
19 | Boston, MA 02111-1307, USA. */ | |
20 | ||
21 | #ifndef CHARSET_H | |
22 | #define CHARSET_H | |
23 | ||
24 | ||
25 | /* If the target program uses a different character set than the host, | |
26 | GDB has some support for translating between the two; GDB converts | |
27 | characters and strings to the host character set before displaying | |
28 | them, and converts characters and strings appearing in expressions | |
29 | entered by the user to the target character set. | |
30 | ||
31 | At the moment, GDB only supports single-byte, stateless character | |
32 | sets. This includes the ISO-8859 family (ASCII extended with | |
33 | accented characters, and (I think) Cyrillic, for European | |
34 | languages), and the EBCDIC family (used on IBM's mainframes). | |
35 | Unfortunately, it excludes many Asian scripts, the fixed- and | |
36 | variable-width Unicode encodings, and other desireable things. | |
37 | Patches are welcome! (For example, it would be nice if the Java | |
38 | string support could simply get absorbed into some more general | |
39 | multi-byte encoding support.) | |
40 | ||
41 | Furthermore, GDB's code pretty much assumes that the host character | |
42 | set is some superset of ASCII; there are plenty if ('0' + n) | |
43 | expressions and the like. | |
44 | ||
45 | When the `iconv' library routine supports a character set meeting | |
46 | the requirements above, it's easy to plug an entry into GDB's table | |
47 | that uses iconv to handle the details. */ | |
48 | ||
234b45d4 KB |
49 | /* Return the name of the current host/target character set. The |
50 | result is owned by the charset module; the caller should not free | |
51 | it. */ | |
52 | const char *host_charset (void); | |
53 | const char *target_charset (void); | |
54 | ||
234b45d4 KB |
55 | /* In general, the set of C backslash escapes (\n, \f) is specific to |
56 | the character set. Not all character sets will have form feed | |
57 | characters, for example. | |
58 | ||
59 | The following functions allow GDB to parse and print control | |
60 | characters in a character-set-independent way. They are both | |
61 | language-specific (to C and C++) and character-set-specific. | |
62 | Putting them here is a compromise. */ | |
63 | ||
64 | ||
65 | /* If the target character TARGET_CHAR have a backslash escape in the | |
66 | C language (i.e., a character like 'n' or 't'), return the host | |
67 | character string that should follow the backslash. Otherwise, | |
68 | return zero. | |
69 | ||
70 | When this function returns non-zero, the string it returns is | |
71 | statically allocated; the caller is not responsible for freeing it. */ | |
72 | const char *c_target_char_has_backslash_escape (int target_char); | |
73 | ||
74 | ||
75 | /* If the host character HOST_CHAR is a valid backslash escape in the | |
76 | C language for the target character set, return non-zero, and set | |
77 | *TARGET_CHAR to the target character the backslash escape represents. | |
78 | Otherwise, return zero. */ | |
79 | int c_parse_backslash (int host_char, int *target_char); | |
80 | ||
81 | ||
82 | /* Return non-zero if the host character HOST_CHAR can be printed | |
83 | literally --- that is, if it can be readably printed as itself in a | |
84 | character or string constant. Return zero if it should be printed | |
85 | using some kind of numeric escape, like '\031' in C, '^(25)' in | |
86 | Chill, or #25 in Pascal. */ | |
87 | int host_char_print_literally (int host_char); | |
88 | ||
89 | ||
90 | /* If the host character HOST_CHAR has an equivalent in the target | |
91 | character set, set *TARGET_CHAR to that equivalent, and return | |
92 | non-zero. Otherwise, return zero. */ | |
93 | int host_char_to_target (int host_char, int *target_char); | |
94 | ||
95 | ||
96 | /* If the target character TARGET_CHAR has an equivalent in the host | |
97 | character set, set *HOST_CHAR to that equivalent, and return | |
98 | non-zero. Otherwise, return zero. */ | |
99 | int target_char_to_host (int target_char, int *host_char); | |
100 | ||
101 | ||
102 | /* If the target character TARGET_CHAR has a corresponding control | |
103 | character (also in the target character set), set *TARGET_CTRL_CHAR | |
104 | to the control character, and return non-zero. Otherwise, return | |
105 | zero. */ | |
106 | int target_char_to_control_char (int target_char, int *target_ctrl_char); | |
107 | ||
108 | ||
109 | #endif /* CHARSET_H */ |