2009-10-12 Tristan Gingold <gingold@adacore.com>
[deliverable/binutils-gdb.git] / readline / mbutil.c
CommitLineData
9255ee31
EZ
1/* mbutil.c -- readline multibyte character utility functions */
2
5bdf8622 3/* Copyright (C) 2001-2005 Free Software Foundation, Inc.
9255ee31
EZ
4
5 This file is part of the GNU Readline Library, a library for
6 reading lines of text with interactive input and history editing.
7
8 The GNU Readline Library is free software; you can redistribute it
9 and/or modify it under the terms of the GNU General Public License
10 as published by the Free Software Foundation; either version 2, or
11 (at your option) any later version.
12
13 The GNU Readline Library is distributed in the hope that it will be
14 useful, but WITHOUT ANY WARRANTY; without even the implied warranty
15 of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 The GNU General Public License is often shipped with GNU software, and
19 is generally kept in a file called COPYING or LICENSE. If you do not
20 have a copy of the license, write to the Free Software Foundation,
21 59 Temple Place, Suite 330, Boston, MA 02111 USA. */
22#define READLINE_LIBRARY
23
24#if defined (HAVE_CONFIG_H)
25# include <config.h>
26#endif
27
28#include <sys/types.h>
29#include <fcntl.h>
30#include "posixjmp.h"
31
32#if defined (HAVE_UNISTD_H)
33# include <unistd.h> /* for _POSIX_VERSION */
34#endif /* HAVE_UNISTD_H */
35
36#if defined (HAVE_STDLIB_H)
37# include <stdlib.h>
38#else
39# include "ansi_stdlib.h"
40#endif /* HAVE_STDLIB_H */
41
42#include <stdio.h>
43#include <ctype.h>
44
45/* System-specific feature definitions and include files. */
46#include "rldefs.h"
47#include "rlmbutil.h"
48
49#if defined (TIOCSTAT_IN_SYS_IOCTL)
50# include <sys/ioctl.h>
51#endif /* TIOCSTAT_IN_SYS_IOCTL */
52
53/* Some standard library routines. */
54#include "readline.h"
55
56#include "rlprivate.h"
57#include "xmalloc.h"
58
59/* Declared here so it can be shared between the readline and history
60 libraries. */
61#if defined (HANDLE_MULTIBYTE)
62int rl_byte_oriented = 0;
63#else
64int rl_byte_oriented = 1;
65#endif
66
67/* **************************************************************** */
68/* */
69/* Multibyte Character Utility Functions */
70/* */
71/* **************************************************************** */
72
73#if defined(HANDLE_MULTIBYTE)
74
75static int
76_rl_find_next_mbchar_internal (string, seed, count, find_non_zero)
77 char *string;
78 int seed, count, find_non_zero;
79{
5bdf8622 80 size_t tmp;
9255ee31 81 mbstate_t ps;
5bdf8622 82 int point;
9255ee31
EZ
83 wchar_t wc;
84
5bdf8622
DJ
85 tmp = 0;
86
9255ee31
EZ
87 memset(&ps, 0, sizeof (mbstate_t));
88 if (seed < 0)
89 seed = 0;
90 if (count <= 0)
91 return seed;
92
5bdf8622 93 point = seed + _rl_adjust_point (string, seed, &ps);
9255ee31
EZ
94 /* if this is true, means that seed was not pointed character
95 started byte. So correct the point and consume count */
96 if (seed < point)
5bdf8622 97 count--;
9255ee31
EZ
98
99 while (count > 0)
100 {
101 tmp = mbrtowc (&wc, string+point, strlen(string + point), &ps);
5bdf8622 102 if (MB_INVALIDCH ((size_t)tmp))
9255ee31
EZ
103 {
104 /* invalid bytes. asume a byte represents a character */
105 point++;
106 count--;
107 /* reset states. */
108 memset(&ps, 0, sizeof(mbstate_t));
109 }
5bdf8622
DJ
110 else if (MB_NULLWCH (tmp))
111 break; /* found wide '\0' */
9255ee31
EZ
112 else
113 {
114 /* valid bytes */
115 point += tmp;
116 if (find_non_zero)
117 {
118 if (wcwidth (wc) == 0)
119 continue;
120 else
121 count--;
122 }
123 else
124 count--;
125 }
126 }
127
128 if (find_non_zero)
129 {
130 tmp = mbrtowc (&wc, string + point, strlen (string + point), &ps);
5bdf8622 131 while (tmp > 0 && wcwidth (wc) == 0)
9255ee31
EZ
132 {
133 point += tmp;
134 tmp = mbrtowc (&wc, string + point, strlen (string + point), &ps);
5bdf8622 135 if (MB_NULLWCH (tmp) || MB_INVALIDCH (tmp))
9255ee31
EZ
136 break;
137 }
138 }
5bdf8622
DJ
139
140 return point;
9255ee31
EZ
141}
142
143static int
144_rl_find_prev_mbchar_internal (string, seed, find_non_zero)
145 char *string;
146 int seed, find_non_zero;
147{
148 mbstate_t ps;
149 int prev, non_zero_prev, point, length;
150 size_t tmp;
151 wchar_t wc;
152
153 memset(&ps, 0, sizeof(mbstate_t));
154 length = strlen(string);
155
156 if (seed < 0)
157 return 0;
158 else if (length < seed)
159 return length;
160
161 prev = non_zero_prev = point = 0;
162 while (point < seed)
163 {
164 tmp = mbrtowc (&wc, string + point, length - point, &ps);
5bdf8622 165 if (MB_INVALIDCH ((size_t)tmp))
9255ee31
EZ
166 {
167 /* in this case, bytes are invalid or shorted to compose
168 multibyte char, so assume that the first byte represents
169 a single character anyway. */
170 tmp = 1;
171 /* clear the state of the byte sequence, because
172 in this case effect of mbstate is undefined */
173 memset(&ps, 0, sizeof (mbstate_t));
5bdf8622
DJ
174
175 /* Since we're assuming that this byte represents a single
176 non-zero-width character, don't forget about it. */
177 prev = point;
9255ee31 178 }
5bdf8622 179 else if (MB_NULLWCH (tmp))
9255ee31
EZ
180 break; /* Found '\0' char. Can this happen? */
181 else
182 {
183 if (find_non_zero)
184 {
185 if (wcwidth (wc) != 0)
186 prev = point;
187 }
188 else
189 prev = point;
190 }
191
192 point += tmp;
193 }
194
195 return prev;
196}
197
198/* return the number of bytes parsed from the multibyte sequence starting
199 at src, if a non-L'\0' wide character was recognized. It returns 0,
200 if a L'\0' wide character was recognized. It returns (size_t)(-1),
201 if an invalid multibyte sequence was encountered. It returns (size_t)(-2)
202 if it couldn't parse a complete multibyte character. */
203int
204_rl_get_char_len (src, ps)
205 char *src;
206 mbstate_t *ps;
207{
208 size_t tmp;
209
210 tmp = mbrlen((const char *)src, (size_t)strlen (src), ps);
211 if (tmp == (size_t)(-2))
212 {
213 /* shorted to compose multibyte char */
5af408ce
EZ
214 if (ps)
215 memset (ps, 0, sizeof(mbstate_t));
9255ee31
EZ
216 return -2;
217 }
218 else if (tmp == (size_t)(-1))
219 {
220 /* invalid to compose multibyte char */
221 /* initialize the conversion state */
5af408ce
EZ
222 if (ps)
223 memset (ps, 0, sizeof(mbstate_t));
9255ee31
EZ
224 return -1;
225 }
226 else if (tmp == (size_t)0)
227 return 0;
228 else
229 return (int)tmp;
230}
231
232/* compare the specified two characters. If the characters matched,
233 return 1. Otherwise return 0. */
234int
235_rl_compare_chars (buf1, pos1, ps1, buf2, pos2, ps2)
5af408ce
EZ
236 char *buf1;
237 int pos1;
238 mbstate_t *ps1;
239 char *buf2;
240 int pos2;
241 mbstate_t *ps2;
9255ee31
EZ
242{
243 int i, w1, w2;
244
245 if ((w1 = _rl_get_char_len (&buf1[pos1], ps1)) <= 0 ||
246 (w2 = _rl_get_char_len (&buf2[pos2], ps2)) <= 0 ||
247 (w1 != w2) ||
248 (buf1[pos1] != buf2[pos2]))
249 return 0;
250
251 for (i = 1; i < w1; i++)
252 if (buf1[pos1+i] != buf2[pos2+i])
253 return 0;
254
255 return 1;
256}
257
258/* adjust pointed byte and find mbstate of the point of string.
259 adjusted point will be point <= adjusted_point, and returns
260 differences of the byte(adjusted_point - point).
261 if point is invalied (point < 0 || more than string length),
262 it returns -1 */
263int
264_rl_adjust_point(string, point, ps)
265 char *string;
266 int point;
267 mbstate_t *ps;
268{
269 size_t tmp = 0;
270 int length;
271 int pos = 0;
272
273 length = strlen(string);
274 if (point < 0)
275 return -1;
276 if (length < point)
277 return -1;
278
279 while (pos < point)
280 {
281 tmp = mbrlen (string + pos, length - pos, ps);
5bdf8622 282 if (MB_INVALIDCH ((size_t)tmp))
9255ee31
EZ
283 {
284 /* in this case, bytes are invalid or shorted to compose
285 multibyte char, so assume that the first byte represents
286 a single character anyway. */
287 pos++;
288 /* clear the state of the byte sequence, because
289 in this case effect of mbstate is undefined */
5af408ce
EZ
290 if (ps)
291 memset (ps, 0, sizeof (mbstate_t));
9255ee31 292 }
5bdf8622 293 else if (MB_NULLWCH (tmp))
5af408ce 294 pos++;
9255ee31
EZ
295 else
296 pos += tmp;
297 }
298
299 return (pos - point);
300}
301
302int
303_rl_is_mbchar_matched (string, seed, end, mbchar, length)
304 char *string;
305 int seed, end;
306 char *mbchar;
307 int length;
308{
309 int i;
310
311 if ((end - seed) < length)
312 return 0;
313
314 for (i = 0; i < length; i++)
315 if (string[seed + i] != mbchar[i])
316 return 0;
317 return 1;
318}
5bdf8622
DJ
319
320wchar_t
321_rl_char_value (buf, ind)
322 char *buf;
323 int ind;
324{
325 size_t tmp;
326 wchar_t wc;
327 mbstate_t ps;
328 int l;
329
330 if (MB_LEN_MAX == 1 || rl_byte_oriented)
331 return ((wchar_t) buf[ind]);
332 l = strlen (buf);
333 if (ind >= l - 1)
334 return ((wchar_t) buf[ind]);
335 memset (&ps, 0, sizeof (mbstate_t));
336 tmp = mbrtowc (&wc, buf + ind, l - ind, &ps);
337 if (MB_INVALIDCH (tmp) || MB_NULLWCH (tmp))
338 return ((wchar_t) buf[ind]);
339 return wc;
340}
9255ee31
EZ
341#endif /* HANDLE_MULTIBYTE */
342
343/* Find next `count' characters started byte point of the specified seed.
344 If flags is MB_FIND_NONZERO, we look for non-zero-width multibyte
345 characters. */
346#undef _rl_find_next_mbchar
347int
348_rl_find_next_mbchar (string, seed, count, flags)
349 char *string;
350 int seed, count, flags;
351{
352#if defined (HANDLE_MULTIBYTE)
353 return _rl_find_next_mbchar_internal (string, seed, count, flags);
354#else
355 return (seed + count);
356#endif
357}
358
359/* Find previous character started byte point of the specified seed.
360 Returned point will be point <= seed. If flags is MB_FIND_NONZERO,
361 we look for non-zero-width multibyte characters. */
362#undef _rl_find_prev_mbchar
363int
364_rl_find_prev_mbchar (string, seed, flags)
365 char *string;
366 int seed, flags;
367{
368#if defined (HANDLE_MULTIBYTE)
369 return _rl_find_prev_mbchar_internal (string, seed, flags);
370#else
371 return ((seed == 0) ? seed : seed - 1);
372#endif
373}
This page took 0.30964 seconds and 4 git commands to generate.