readline/
[deliverable/binutils-gdb.git] / readline / mbutil.c
CommitLineData
9255ee31
EZ
1/* mbutil.c -- readline multibyte character utility functions */
2
cc88a640 3/* Copyright (C) 2001-2009 Free Software Foundation, Inc.
9255ee31 4
cc88a640
JK
5 This file is part of the GNU Readline Library (Readline), a library
6 for reading lines of text with interactive input and history editing.
9255ee31 7
cc88a640
JK
8 Readline is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
9255ee31
EZ
11 (at your option) any later version.
12
cc88a640
JK
13 Readline is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9255ee31
EZ
16 GNU General Public License for more details.
17
cc88a640
JK
18 You should have received a copy of the GNU General Public License
19 along with Readline. If not, see <http://www.gnu.org/licenses/>.
20*/
21
9255ee31
EZ
22#define READLINE_LIBRARY
23
24#if defined (HAVE_CONFIG_H)
25# include <config.h>
26#endif
27
28#include <sys/types.h>
29#include <fcntl.h>
30#include "posixjmp.h"
31
32#if defined (HAVE_UNISTD_H)
33# include <unistd.h> /* for _POSIX_VERSION */
34#endif /* HAVE_UNISTD_H */
35
36#if defined (HAVE_STDLIB_H)
37# include <stdlib.h>
38#else
39# include "ansi_stdlib.h"
40#endif /* HAVE_STDLIB_H */
41
42#include <stdio.h>
43#include <ctype.h>
44
45/* System-specific feature definitions and include files. */
46#include "rldefs.h"
47#include "rlmbutil.h"
48
49#if defined (TIOCSTAT_IN_SYS_IOCTL)
50# include <sys/ioctl.h>
51#endif /* TIOCSTAT_IN_SYS_IOCTL */
52
53/* Some standard library routines. */
54#include "readline.h"
55
56#include "rlprivate.h"
57#include "xmalloc.h"
58
59/* Declared here so it can be shared between the readline and history
60 libraries. */
61#if defined (HANDLE_MULTIBYTE)
62int rl_byte_oriented = 0;
63#else
64int rl_byte_oriented = 1;
65#endif
66
67/* **************************************************************** */
68/* */
69/* Multibyte Character Utility Functions */
70/* */
71/* **************************************************************** */
72
73#if defined(HANDLE_MULTIBYTE)
74
75static int
76_rl_find_next_mbchar_internal (string, seed, count, find_non_zero)
77 char *string;
78 int seed, count, find_non_zero;
79{
cc88a640 80 size_t tmp, len;
9255ee31 81 mbstate_t ps;
5bdf8622 82 int point;
9255ee31
EZ
83 wchar_t wc;
84
5bdf8622
DJ
85 tmp = 0;
86
9255ee31
EZ
87 memset(&ps, 0, sizeof (mbstate_t));
88 if (seed < 0)
89 seed = 0;
90 if (count <= 0)
91 return seed;
92
5bdf8622 93 point = seed + _rl_adjust_point (string, seed, &ps);
cc88a640
JK
94 /* if this is true, means that seed was not pointing to a byte indicating
95 the beginning of a multibyte character. Correct the point and consume
96 one char. */
9255ee31 97 if (seed < point)
5bdf8622 98 count--;
9255ee31
EZ
99
100 while (count > 0)
101 {
cc88a640
JK
102 len = strlen (string + point);
103 if (len == 0)
104 break;
105 tmp = mbrtowc (&wc, string+point, len, &ps);
5bdf8622 106 if (MB_INVALIDCH ((size_t)tmp))
9255ee31 107 {
cc88a640 108 /* invalid bytes. assume a byte represents a character */
9255ee31
EZ
109 point++;
110 count--;
111 /* reset states. */
112 memset(&ps, 0, sizeof(mbstate_t));
113 }
5bdf8622
DJ
114 else if (MB_NULLWCH (tmp))
115 break; /* found wide '\0' */
9255ee31
EZ
116 else
117 {
118 /* valid bytes */
119 point += tmp;
120 if (find_non_zero)
121 {
122 if (wcwidth (wc) == 0)
123 continue;
124 else
125 count--;
126 }
127 else
128 count--;
129 }
130 }
131
132 if (find_non_zero)
133 {
134 tmp = mbrtowc (&wc, string + point, strlen (string + point), &ps);
cc88a640 135 while (MB_NULLWCH (tmp) == 0 && MB_INVALIDCH (tmp) == 0 && wcwidth (wc) == 0)
9255ee31
EZ
136 {
137 point += tmp;
138 tmp = mbrtowc (&wc, string + point, strlen (string + point), &ps);
9255ee31
EZ
139 }
140 }
5bdf8622
DJ
141
142 return point;
9255ee31
EZ
143}
144
145static int
146_rl_find_prev_mbchar_internal (string, seed, find_non_zero)
147 char *string;
148 int seed, find_non_zero;
149{
150 mbstate_t ps;
151 int prev, non_zero_prev, point, length;
152 size_t tmp;
153 wchar_t wc;
154
155 memset(&ps, 0, sizeof(mbstate_t));
156 length = strlen(string);
157
158 if (seed < 0)
159 return 0;
160 else if (length < seed)
161 return length;
162
163 prev = non_zero_prev = point = 0;
164 while (point < seed)
165 {
166 tmp = mbrtowc (&wc, string + point, length - point, &ps);
5bdf8622 167 if (MB_INVALIDCH ((size_t)tmp))
9255ee31
EZ
168 {
169 /* in this case, bytes are invalid or shorted to compose
170 multibyte char, so assume that the first byte represents
171 a single character anyway. */
172 tmp = 1;
173 /* clear the state of the byte sequence, because
174 in this case effect of mbstate is undefined */
175 memset(&ps, 0, sizeof (mbstate_t));
5bdf8622
DJ
176
177 /* Since we're assuming that this byte represents a single
178 non-zero-width character, don't forget about it. */
179 prev = point;
9255ee31 180 }
5bdf8622 181 else if (MB_NULLWCH (tmp))
9255ee31
EZ
182 break; /* Found '\0' char. Can this happen? */
183 else
184 {
185 if (find_non_zero)
186 {
187 if (wcwidth (wc) != 0)
188 prev = point;
189 }
190 else
191 prev = point;
192 }
193
194 point += tmp;
195 }
196
197 return prev;
198}
199
200/* return the number of bytes parsed from the multibyte sequence starting
201 at src, if a non-L'\0' wide character was recognized. It returns 0,
202 if a L'\0' wide character was recognized. It returns (size_t)(-1),
203 if an invalid multibyte sequence was encountered. It returns (size_t)(-2)
204 if it couldn't parse a complete multibyte character. */
205int
206_rl_get_char_len (src, ps)
207 char *src;
208 mbstate_t *ps;
209{
210 size_t tmp;
211
212 tmp = mbrlen((const char *)src, (size_t)strlen (src), ps);
213 if (tmp == (size_t)(-2))
214 {
215 /* shorted to compose multibyte char */
5af408ce
EZ
216 if (ps)
217 memset (ps, 0, sizeof(mbstate_t));
9255ee31
EZ
218 return -2;
219 }
220 else if (tmp == (size_t)(-1))
221 {
222 /* invalid to compose multibyte char */
223 /* initialize the conversion state */
5af408ce
EZ
224 if (ps)
225 memset (ps, 0, sizeof(mbstate_t));
9255ee31
EZ
226 return -1;
227 }
228 else if (tmp == (size_t)0)
229 return 0;
230 else
231 return (int)tmp;
232}
233
234/* compare the specified two characters. If the characters matched,
235 return 1. Otherwise return 0. */
236int
237_rl_compare_chars (buf1, pos1, ps1, buf2, pos2, ps2)
5af408ce
EZ
238 char *buf1;
239 int pos1;
240 mbstate_t *ps1;
241 char *buf2;
242 int pos2;
243 mbstate_t *ps2;
9255ee31
EZ
244{
245 int i, w1, w2;
246
247 if ((w1 = _rl_get_char_len (&buf1[pos1], ps1)) <= 0 ||
248 (w2 = _rl_get_char_len (&buf2[pos2], ps2)) <= 0 ||
249 (w1 != w2) ||
250 (buf1[pos1] != buf2[pos2]))
251 return 0;
252
253 for (i = 1; i < w1; i++)
254 if (buf1[pos1+i] != buf2[pos2+i])
255 return 0;
256
257 return 1;
258}
259
260/* adjust pointed byte and find mbstate of the point of string.
261 adjusted point will be point <= adjusted_point, and returns
262 differences of the byte(adjusted_point - point).
263 if point is invalied (point < 0 || more than string length),
264 it returns -1 */
265int
266_rl_adjust_point(string, point, ps)
267 char *string;
268 int point;
269 mbstate_t *ps;
270{
271 size_t tmp = 0;
272 int length;
273 int pos = 0;
274
275 length = strlen(string);
276 if (point < 0)
277 return -1;
278 if (length < point)
279 return -1;
280
281 while (pos < point)
282 {
283 tmp = mbrlen (string + pos, length - pos, ps);
5bdf8622 284 if (MB_INVALIDCH ((size_t)tmp))
9255ee31
EZ
285 {
286 /* in this case, bytes are invalid or shorted to compose
287 multibyte char, so assume that the first byte represents
288 a single character anyway. */
289 pos++;
290 /* clear the state of the byte sequence, because
291 in this case effect of mbstate is undefined */
5af408ce
EZ
292 if (ps)
293 memset (ps, 0, sizeof (mbstate_t));
9255ee31 294 }
5bdf8622 295 else if (MB_NULLWCH (tmp))
5af408ce 296 pos++;
9255ee31
EZ
297 else
298 pos += tmp;
299 }
300
301 return (pos - point);
302}
303
304int
305_rl_is_mbchar_matched (string, seed, end, mbchar, length)
306 char *string;
307 int seed, end;
308 char *mbchar;
309 int length;
310{
311 int i;
312
313 if ((end - seed) < length)
314 return 0;
315
316 for (i = 0; i < length; i++)
317 if (string[seed + i] != mbchar[i])
318 return 0;
319 return 1;
320}
5bdf8622
DJ
321
322wchar_t
323_rl_char_value (buf, ind)
324 char *buf;
325 int ind;
326{
327 size_t tmp;
328 wchar_t wc;
329 mbstate_t ps;
330 int l;
331
332 if (MB_LEN_MAX == 1 || rl_byte_oriented)
333 return ((wchar_t) buf[ind]);
334 l = strlen (buf);
335 if (ind >= l - 1)
336 return ((wchar_t) buf[ind]);
337 memset (&ps, 0, sizeof (mbstate_t));
338 tmp = mbrtowc (&wc, buf + ind, l - ind, &ps);
339 if (MB_INVALIDCH (tmp) || MB_NULLWCH (tmp))
340 return ((wchar_t) buf[ind]);
341 return wc;
342}
9255ee31
EZ
343#endif /* HANDLE_MULTIBYTE */
344
345/* Find next `count' characters started byte point of the specified seed.
346 If flags is MB_FIND_NONZERO, we look for non-zero-width multibyte
347 characters. */
348#undef _rl_find_next_mbchar
349int
350_rl_find_next_mbchar (string, seed, count, flags)
351 char *string;
352 int seed, count, flags;
353{
354#if defined (HANDLE_MULTIBYTE)
355 return _rl_find_next_mbchar_internal (string, seed, count, flags);
356#else
357 return (seed + count);
358#endif
359}
360
361/* Find previous character started byte point of the specified seed.
362 Returned point will be point <= seed. If flags is MB_FIND_NONZERO,
363 we look for non-zero-width multibyte characters. */
364#undef _rl_find_prev_mbchar
365int
366_rl_find_prev_mbchar (string, seed, flags)
367 char *string;
368 int seed, flags;
369{
370#if defined (HANDLE_MULTIBYTE)
371 return _rl_find_prev_mbchar_internal (string, seed, flags);
372#else
373 return ((seed == 0) ? seed : seed - 1);
374#endif
375}
This page took 0.639011 seconds and 4 git commands to generate.