Import readline 7.0 (patch 5)
[deliverable/binutils-gdb.git] / readline / mbutil.c
CommitLineData
9255ee31
EZ
1/* mbutil.c -- readline multibyte character utility functions */
2
775e241e 3/* Copyright (C) 2001-2015 Free Software Foundation, Inc.
9255ee31 4
cc88a640
JK
5 This file is part of the GNU Readline Library (Readline), a library
6 for reading lines of text with interactive input and history editing.
9255ee31 7
cc88a640
JK
8 Readline is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
9255ee31
EZ
11 (at your option) any later version.
12
cc88a640
JK
13 Readline is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9255ee31
EZ
16 GNU General Public License for more details.
17
cc88a640
JK
18 You should have received a copy of the GNU General Public License
19 along with Readline. If not, see <http://www.gnu.org/licenses/>.
20*/
21
9255ee31
EZ
22#define READLINE_LIBRARY
23
24#if defined (HAVE_CONFIG_H)
25# include <config.h>
26#endif
27
28#include <sys/types.h>
29#include <fcntl.h>
30#include "posixjmp.h"
31
32#if defined (HAVE_UNISTD_H)
33# include <unistd.h> /* for _POSIX_VERSION */
34#endif /* HAVE_UNISTD_H */
35
36#if defined (HAVE_STDLIB_H)
37# include <stdlib.h>
38#else
39# include "ansi_stdlib.h"
40#endif /* HAVE_STDLIB_H */
41
42#include <stdio.h>
43#include <ctype.h>
44
45/* System-specific feature definitions and include files. */
46#include "rldefs.h"
47#include "rlmbutil.h"
48
49#if defined (TIOCSTAT_IN_SYS_IOCTL)
50# include <sys/ioctl.h>
51#endif /* TIOCSTAT_IN_SYS_IOCTL */
52
53/* Some standard library routines. */
54#include "readline.h"
55
56#include "rlprivate.h"
57#include "xmalloc.h"
58
59/* Declared here so it can be shared between the readline and history
60 libraries. */
61#if defined (HANDLE_MULTIBYTE)
62int rl_byte_oriented = 0;
63#else
64int rl_byte_oriented = 1;
65#endif
66
775e241e
TT
67/* Ditto */
68int _rl_utf8locale = 0;
69
9255ee31
EZ
70/* **************************************************************** */
71/* */
72/* Multibyte Character Utility Functions */
73/* */
74/* **************************************************************** */
75
76#if defined(HANDLE_MULTIBYTE)
77
78static int
79_rl_find_next_mbchar_internal (string, seed, count, find_non_zero)
80 char *string;
81 int seed, count, find_non_zero;
82{
cc88a640 83 size_t tmp, len;
9255ee31 84 mbstate_t ps;
5bdf8622 85 int point;
9255ee31
EZ
86 wchar_t wc;
87
5bdf8622
DJ
88 tmp = 0;
89
9255ee31
EZ
90 memset(&ps, 0, sizeof (mbstate_t));
91 if (seed < 0)
92 seed = 0;
93 if (count <= 0)
94 return seed;
95
5bdf8622 96 point = seed + _rl_adjust_point (string, seed, &ps);
cc88a640
JK
97 /* if this is true, means that seed was not pointing to a byte indicating
98 the beginning of a multibyte character. Correct the point and consume
99 one char. */
9255ee31 100 if (seed < point)
5bdf8622 101 count--;
9255ee31
EZ
102
103 while (count > 0)
104 {
cc88a640
JK
105 len = strlen (string + point);
106 if (len == 0)
107 break;
108 tmp = mbrtowc (&wc, string+point, len, &ps);
5bdf8622 109 if (MB_INVALIDCH ((size_t)tmp))
9255ee31 110 {
cc88a640 111 /* invalid bytes. assume a byte represents a character */
9255ee31
EZ
112 point++;
113 count--;
114 /* reset states. */
115 memset(&ps, 0, sizeof(mbstate_t));
116 }
5bdf8622
DJ
117 else if (MB_NULLWCH (tmp))
118 break; /* found wide '\0' */
9255ee31
EZ
119 else
120 {
121 /* valid bytes */
122 point += tmp;
123 if (find_non_zero)
124 {
775e241e 125 if (WCWIDTH (wc) == 0)
9255ee31
EZ
126 continue;
127 else
128 count--;
129 }
130 else
131 count--;
132 }
133 }
134
135 if (find_non_zero)
136 {
137 tmp = mbrtowc (&wc, string + point, strlen (string + point), &ps);
775e241e 138 while (MB_NULLWCH (tmp) == 0 && MB_INVALIDCH (tmp) == 0 && WCWIDTH (wc) == 0)
9255ee31
EZ
139 {
140 point += tmp;
141 tmp = mbrtowc (&wc, string + point, strlen (string + point), &ps);
9255ee31
EZ
142 }
143 }
5bdf8622
DJ
144
145 return point;
9255ee31
EZ
146}
147
775e241e 148/*static*/ int
9255ee31
EZ
149_rl_find_prev_mbchar_internal (string, seed, find_non_zero)
150 char *string;
151 int seed, find_non_zero;
152{
153 mbstate_t ps;
154 int prev, non_zero_prev, point, length;
155 size_t tmp;
156 wchar_t wc;
157
158 memset(&ps, 0, sizeof(mbstate_t));
159 length = strlen(string);
160
161 if (seed < 0)
162 return 0;
163 else if (length < seed)
164 return length;
165
166 prev = non_zero_prev = point = 0;
167 while (point < seed)
168 {
169 tmp = mbrtowc (&wc, string + point, length - point, &ps);
5bdf8622 170 if (MB_INVALIDCH ((size_t)tmp))
9255ee31
EZ
171 {
172 /* in this case, bytes are invalid or shorted to compose
173 multibyte char, so assume that the first byte represents
174 a single character anyway. */
175 tmp = 1;
176 /* clear the state of the byte sequence, because
177 in this case effect of mbstate is undefined */
178 memset(&ps, 0, sizeof (mbstate_t));
5bdf8622
DJ
179
180 /* Since we're assuming that this byte represents a single
181 non-zero-width character, don't forget about it. */
182 prev = point;
9255ee31 183 }
5bdf8622 184 else if (MB_NULLWCH (tmp))
9255ee31
EZ
185 break; /* Found '\0' char. Can this happen? */
186 else
187 {
188 if (find_non_zero)
189 {
775e241e 190 if (WCWIDTH (wc) != 0)
9255ee31
EZ
191 prev = point;
192 }
193 else
194 prev = point;
195 }
196
197 point += tmp;
198 }
199
200 return prev;
201}
202
203/* return the number of bytes parsed from the multibyte sequence starting
204 at src, if a non-L'\0' wide character was recognized. It returns 0,
205 if a L'\0' wide character was recognized. It returns (size_t)(-1),
206 if an invalid multibyte sequence was encountered. It returns (size_t)(-2)
207 if it couldn't parse a complete multibyte character. */
208int
209_rl_get_char_len (src, ps)
210 char *src;
211 mbstate_t *ps;
212{
213 size_t tmp;
214
215 tmp = mbrlen((const char *)src, (size_t)strlen (src), ps);
216 if (tmp == (size_t)(-2))
217 {
218 /* shorted to compose multibyte char */
5af408ce
EZ
219 if (ps)
220 memset (ps, 0, sizeof(mbstate_t));
9255ee31
EZ
221 return -2;
222 }
223 else if (tmp == (size_t)(-1))
224 {
225 /* invalid to compose multibyte char */
226 /* initialize the conversion state */
5af408ce
EZ
227 if (ps)
228 memset (ps, 0, sizeof(mbstate_t));
9255ee31
EZ
229 return -1;
230 }
231 else if (tmp == (size_t)0)
232 return 0;
233 else
234 return (int)tmp;
235}
236
237/* compare the specified two characters. If the characters matched,
238 return 1. Otherwise return 0. */
239int
240_rl_compare_chars (buf1, pos1, ps1, buf2, pos2, ps2)
5af408ce
EZ
241 char *buf1;
242 int pos1;
243 mbstate_t *ps1;
244 char *buf2;
245 int pos2;
246 mbstate_t *ps2;
9255ee31
EZ
247{
248 int i, w1, w2;
249
250 if ((w1 = _rl_get_char_len (&buf1[pos1], ps1)) <= 0 ||
251 (w2 = _rl_get_char_len (&buf2[pos2], ps2)) <= 0 ||
252 (w1 != w2) ||
253 (buf1[pos1] != buf2[pos2]))
254 return 0;
255
256 for (i = 1; i < w1; i++)
257 if (buf1[pos1+i] != buf2[pos2+i])
258 return 0;
259
260 return 1;
261}
262
263/* adjust pointed byte and find mbstate of the point of string.
264 adjusted point will be point <= adjusted_point, and returns
265 differences of the byte(adjusted_point - point).
266 if point is invalied (point < 0 || more than string length),
267 it returns -1 */
268int
775e241e 269_rl_adjust_point (string, point, ps)
9255ee31
EZ
270 char *string;
271 int point;
272 mbstate_t *ps;
273{
274 size_t tmp = 0;
275 int length;
276 int pos = 0;
277
278 length = strlen(string);
279 if (point < 0)
280 return -1;
281 if (length < point)
282 return -1;
283
284 while (pos < point)
285 {
286 tmp = mbrlen (string + pos, length - pos, ps);
5bdf8622 287 if (MB_INVALIDCH ((size_t)tmp))
9255ee31
EZ
288 {
289 /* in this case, bytes are invalid or shorted to compose
290 multibyte char, so assume that the first byte represents
291 a single character anyway. */
292 pos++;
293 /* clear the state of the byte sequence, because
294 in this case effect of mbstate is undefined */
5af408ce
EZ
295 if (ps)
296 memset (ps, 0, sizeof (mbstate_t));
9255ee31 297 }
5bdf8622 298 else if (MB_NULLWCH (tmp))
5af408ce 299 pos++;
9255ee31
EZ
300 else
301 pos += tmp;
302 }
303
304 return (pos - point);
305}
306
307int
308_rl_is_mbchar_matched (string, seed, end, mbchar, length)
309 char *string;
310 int seed, end;
311 char *mbchar;
312 int length;
313{
314 int i;
315
316 if ((end - seed) < length)
317 return 0;
318
319 for (i = 0; i < length; i++)
320 if (string[seed + i] != mbchar[i])
321 return 0;
322 return 1;
323}
5bdf8622
DJ
324
325wchar_t
326_rl_char_value (buf, ind)
327 char *buf;
328 int ind;
329{
330 size_t tmp;
331 wchar_t wc;
332 mbstate_t ps;
333 int l;
334
335 if (MB_LEN_MAX == 1 || rl_byte_oriented)
336 return ((wchar_t) buf[ind]);
337 l = strlen (buf);
338 if (ind >= l - 1)
339 return ((wchar_t) buf[ind]);
340 memset (&ps, 0, sizeof (mbstate_t));
341 tmp = mbrtowc (&wc, buf + ind, l - ind, &ps);
342 if (MB_INVALIDCH (tmp) || MB_NULLWCH (tmp))
343 return ((wchar_t) buf[ind]);
344 return wc;
345}
9255ee31
EZ
346#endif /* HANDLE_MULTIBYTE */
347
348/* Find next `count' characters started byte point of the specified seed.
349 If flags is MB_FIND_NONZERO, we look for non-zero-width multibyte
350 characters. */
351#undef _rl_find_next_mbchar
352int
353_rl_find_next_mbchar (string, seed, count, flags)
354 char *string;
355 int seed, count, flags;
356{
357#if defined (HANDLE_MULTIBYTE)
358 return _rl_find_next_mbchar_internal (string, seed, count, flags);
359#else
360 return (seed + count);
361#endif
362}
363
364/* Find previous character started byte point of the specified seed.
365 Returned point will be point <= seed. If flags is MB_FIND_NONZERO,
366 we look for non-zero-width multibyte characters. */
367#undef _rl_find_prev_mbchar
368int
369_rl_find_prev_mbchar (string, seed, flags)
370 char *string;
371 int seed, flags;
372{
373#if defined (HANDLE_MULTIBYTE)
374 return _rl_find_prev_mbchar_internal (string, seed, flags);
375#else
376 return ((seed == 0) ? seed : seed - 1);
377#endif
378}
This page took 0.721116 seconds and 4 git commands to generate.