[deliverable/binutils-gdb.git] / readline / readline / mbutil.c

/* mbutil.c -- readline multibyte character utility functions */

/* Copyright (C) 2001-2020 Free Software Foundation, Inc.

   This file is part of the GNU Readline Library (Readline), a library
   for reading lines of text with interactive input and history editing.      

   Readline is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation, either version 3 of the License, or
   (at your option) any later version.

   Readline is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with Readline.  If not, see <http://www.gnu.org/licenses/>.
*/

#define READLINE_LIBRARY

#if defined (HAVE_CONFIG_H)
#  include <config.h>
#endif

#include <sys/types.h>
#include <fcntl.h>
#include "posixjmp.h"

#if defined (HAVE_UNISTD_H)
#  include <unistd.h>	   /* for _POSIX_VERSION */
#endif /* HAVE_UNISTD_H */

#if defined (HAVE_STDLIB_H)
#  include <stdlib.h>
#else
#  include "ansi_stdlib.h"
#endif /* HAVE_STDLIB_H */

#include <stdio.h>
#include <ctype.h>

/* System-specific feature definitions and include files. */
#include "rldefs.h"
#include "rlmbutil.h"

#if defined (TIOCSTAT_IN_SYS_IOCTL)
#  include <sys/ioctl.h>
#endif /* TIOCSTAT_IN_SYS_IOCTL */

/* Some standard library routines. */
#include "readline.h"

#include "rlprivate.h"
#include "xmalloc.h"

/* Declared here so it can be shared between the readline and history
   libraries. */
#if defined (HANDLE_MULTIBYTE)
int rl_byte_oriented = 0;
#else
int rl_byte_oriented = 1;
#endif

/* Ditto */
int _rl_utf8locale = 0;

/* **************************************************************** */
/*								    */
/*		Multibyte Character Utility Functions		    */
/*								    */
/* **************************************************************** */

#if defined(HANDLE_MULTIBYTE)

/* **************************************************************** */
/*								    */
/*		UTF-8 specific Character Utility Functions	    */
/*								    */
/* **************************************************************** */

/* Return the length in bytes of the possibly-multibyte character beginning
   at S. Encoding is UTF-8. */
static int
_rl_utf8_mblen (const char *s, size_t n)
{
  unsigned char c, c1, c2, c3;

  if (s == 0)
    return (0);	/* no shift states */
  if (n <= 0)
    return (-1);

  c = (unsigned char)*s;
  if (c < 0x80)
    return (c != 0);
  if (c >= 0xc2)
    {
      c1 = (unsigned char)s[1];
      if (c < 0xe0)
	{
	  if (n == 1)
	    return -2;
	  if (n >= 2 && (c1 ^ 0x80) < 0x40)
	    return 2;
	}
      else if (c < 0xf0)
	{
	  if (n == 1)
	    return -2;
	  if ((c1 ^ 0x80) < 0x40
		&& (c >= 0xe1 || c1 >= 0xa0)
		&& (c != 0xed || c1 < 0xa0))
	    {
	      if (n == 2)
		return -2;
	      c2 = (unsigned char)s[2];
	      if ((c2 ^ 0x80) < 0x40)
		return 3;
	    }
	}
      else if (c < 0xf4)
	{
	  if (n == 1)
	    return -2;
	  if (((c1 ^ 0x80) < 0x40)
		&& (c >= 0xf1 || c1 >= 0x90)
		&& (c < 0xf4 || (c == 0xf4 && c1 < 0x90)))
	    {
	      if (n == 2)
		return -2;
	      c2 = (unsigned char)s[2];
	      if ((c2 ^ 0x80) < 0x40)
		{
		  if (n == 3)
		    return -2;
		  c3 = (unsigned char)s[3];
		  if ((c3 ^ 0x80) < 0x40)
		    return 4;
		}
	    }
	}
    }
  /* invalid or incomplete multibyte character */
  return -1;
}

static int
_rl_find_next_mbchar_internal (char *string, int seed, int count, int find_non_zero)
{
  size_t tmp, len;
  mbstate_t ps;
  int point;
  wchar_t wc;

  tmp = 0;

  memset(&ps, 0, sizeof (mbstate_t));
  if (seed < 0)
    seed = 0;
  if (count <= 0)
    return seed;

  point = seed + _rl_adjust_point (string, seed, &ps);
  /* if _rl_adjust_point returns -1, the character or string is invalid.
     treat as a byte. */
  if (point == seed - 1)	/* invalid */
    return seed + 1;
    
  /* if this is true, means that seed was not pointing to a byte indicating
     the beginning of a multibyte character.  Correct the point and consume
     one char. */
  if (seed < point)
    count--;

  while (count > 0)  
    {
      len = strlen (string + point);
      if (len == 0)
	break;
      if (_rl_utf8locale && UTF8_SINGLEBYTE(string[point]))
	{
	  tmp = 1;
	  wc = (wchar_t) string[point];
	  memset(&ps, 0, sizeof(mbstate_t));
	}
      else
	tmp = mbrtowc (&wc, string+point, len, &ps);
      if (MB_INVALIDCH ((size_t)tmp))
	{
	  /* invalid bytes. assume a byte represents a character */
	  point++;
	  count--;
	  /* reset states. */
	  memset(&ps, 0, sizeof(mbstate_t));
	}
      else if (MB_NULLWCH (tmp))
	break;			/* found wide '\0' */
      else
	{
	  /* valid bytes */
	  point += tmp;
	  if (find_non_zero)
	    {
	      if (WCWIDTH (wc) == 0)
		continue;
	      else
		count--;
	    }
	  else
	    count--;
	}
    }

  if (find_non_zero)
    {
      tmp = mbrtowc (&wc, string + point, strlen (string + point), &ps);
      while (MB_NULLWCH (tmp) == 0 && MB_INVALIDCH (tmp) == 0 && WCWIDTH (wc) == 0)
	{
	  point += tmp;
	  tmp = mbrtowc (&wc, string + point, strlen (string + point), &ps);
	}
    }

  return point;
}

static inline int
_rl_test_nonzero (char *string, int ind, int len)
{
  size_t tmp;
  wchar_t wc;
  mbstate_t ps;

  memset (&ps, 0, sizeof (mbstate_t));
  tmp = mbrtowc (&wc, string + ind, len - ind, &ps);
  /* treat invalid multibyte sequences as non-zero-width */
  return (MB_INVALIDCH (tmp) || MB_NULLWCH (tmp) || WCWIDTH (wc) > 0);
}

/* experimental -- needs to handle zero-width characters better */
static int
_rl_find_prev_utf8char (char *string, int seed, int find_non_zero)
{
  char *s;
  unsigned char b;
  int save, prev;
  size_t len;

  if (find_non_zero)
    len = RL_STRLEN (string);

  prev = seed - 1;
  while (prev >= 0)
   {
      b = (unsigned char)string[prev];
      if (UTF8_SINGLEBYTE (b))
	return (prev);

      save = prev;

      /* Move back until we're not in the middle of a multibyte char */
      if (UTF8_MBCHAR (b))
	{
	  while (prev > 0 && (b = (unsigned char)string[--prev]) && UTF8_MBCHAR (b))
	    ;
	}

      if (UTF8_MBFIRSTCHAR (b))
	{
	  if (find_non_zero)
	    {
	      if (_rl_test_nonzero (string, prev, len))
		return (prev);
	      else		/* valid but WCWIDTH (wc) == 0 */
		prev = prev - 1;
	    }
	  else
	    return (prev);
	}
      else
	return (save);			/* invalid utf-8 multibyte sequence */
    }

  return ((prev < 0) ? 0 : prev);
}  

/*static*/ int
_rl_find_prev_mbchar_internal (char *string, int seed, int find_non_zero)
{
  mbstate_t ps;
  int prev, non_zero_prev, point, length;
  size_t tmp;
  wchar_t wc;

  if (_rl_utf8locale)
    return (_rl_find_prev_utf8char (string, seed, find_non_zero));

  memset(&ps, 0, sizeof(mbstate_t));
  length = strlen(string);
  
  if (seed < 0)
    return 0;
  else if (length < seed)
    return length;

  prev = non_zero_prev = point = 0;
  while (point < seed)
    {
      if (_rl_utf8locale && UTF8_SINGLEBYTE(string[point]))
	{
	  tmp = 1;
	  wc = (wchar_t) string[point];
	  memset(&ps, 0, sizeof(mbstate_t));
	}
      else
	tmp = mbrtowc (&wc, string + point, length - point, &ps);
      if (MB_INVALIDCH ((size_t)tmp))
	{
	  /* in this case, bytes are invalid or too short to compose
	     multibyte char, so assume that the first byte represents
	     a single character anyway. */
	  tmp = 1;
	  /* clear the state of the byte sequence, because
	     in this case effect of mbstate is undefined  */
	  memset(&ps, 0, sizeof (mbstate_t));

	  /* Since we're assuming that this byte represents a single
	     non-zero-width character, don't forget about it. */
	  prev = point;
	}
      else if (MB_NULLWCH (tmp))
	break;			/* Found '\0' char.  Can this happen? */
      else
	{
	  if (find_non_zero)
	    {
	      if (WCWIDTH (wc) != 0)
		prev = point;
	    }
	  else
	    prev = point;  
	}

      point += tmp;
    }

  return prev;
}

/* return the number of bytes parsed from the multibyte sequence starting
   at src, if a non-L'\0' wide character was recognized. It returns 0, 
   if a L'\0' wide character was recognized. It  returns (size_t)(-1), 
   if an invalid multibyte sequence was encountered. It returns (size_t)(-2) 
   if it couldn't parse a complete  multibyte character.  */
int
_rl_get_char_len (char *src, mbstate_t *ps)
{
  size_t tmp, l;
  int mb_cur_max;

  /* Look at no more than MB_CUR_MAX characters */
  l = (size_t)strlen (src);
  if (_rl_utf8locale && l > 0 && UTF8_SINGLEBYTE(*src))
    tmp = (*src != 0) ? 1 : 0;
  else
    {
      mb_cur_max = MB_CUR_MAX;
      tmp = mbrlen((const char *)src, (l < mb_cur_max) ? l : mb_cur_max, ps);
    }
  if (tmp == (size_t)(-2))
    {
      /* too short to compose multibyte char */
      if (ps)
	memset (ps, 0, sizeof(mbstate_t));
      return -2;
    }
  else if (tmp == (size_t)(-1))
    {
      /* invalid to compose multibyte char */
      /* initialize the conversion state */
      if (ps)
	memset (ps, 0, sizeof(mbstate_t));
      return -1;
    }
  else if (tmp == (size_t)0)
    return 0;
  else
    return (int)tmp;
}

/* compare the specified two characters. If the characters matched,
   return 1. Otherwise return 0. */
int
_rl_compare_chars (char *buf1, int pos1, mbstate_t *ps1, char *buf2, int pos2, mbstate_t *ps2)
{
  int i, w1, w2;

  if ((w1 = _rl_get_char_len (&buf1[pos1], ps1)) <= 0 || 
	(w2 = _rl_get_char_len (&buf2[pos2], ps2)) <= 0 ||
	(w1 != w2) ||
	(buf1[pos1] != buf2[pos2]))
    return 0;

  for (i = 1; i < w1; i++)
    if (buf1[pos1+i] != buf2[pos2+i])
      return 0;

  return 1;
}

/* adjust pointed byte and find mbstate of the point of string.
   adjusted point will be point <= adjusted_point, and returns
   differences of the byte(adjusted_point - point).
   if point is invalid (point < 0 || more than string length),
   it returns -1 */
int
_rl_adjust_point (char *string, int point, mbstate_t *ps)
{
  size_t tmp;
  int length, pos;

  tmp = 0;
  pos = 0;
  length = strlen(string);
  if (point < 0)
    return -1;
  if (length < point)
    return -1;
  
  while (pos < point)
    {
      if (_rl_utf8locale && UTF8_SINGLEBYTE(string[pos]))
	tmp = 1;
      else
	tmp = mbrlen (string + pos, length - pos, ps);
      if (MB_INVALIDCH ((size_t)tmp))
	{
	  /* in this case, bytes are invalid or too short to compose
	     multibyte char, so assume that the first byte represents
	     a single character anyway. */
	  pos++;
	  /* clear the state of the byte sequence, because
	     in this case effect of mbstate is undefined  */
	  if (ps)
	    memset (ps, 0, sizeof (mbstate_t));
	}
      else if (MB_NULLWCH (tmp))
	pos++;
      else
	pos += tmp;
    }

  return (pos - point);
}

int
_rl_is_mbchar_matched (char *string, int seed, int end, char *mbchar, int length)
{
  int i;

  if ((end - seed) < length)
    return 0;

  for (i = 0; i < length; i++)
    if (string[seed + i] != mbchar[i])
      return 0;
  return 1;
}

wchar_t
_rl_char_value (char *buf, int ind)
{
  size_t tmp;
  wchar_t wc;
  mbstate_t ps;
  int l;

  if (MB_LEN_MAX == 1 || rl_byte_oriented)
    return ((wchar_t) buf[ind]);
  if (_rl_utf8locale && UTF8_SINGLEBYTE(buf[ind]))
    return ((wchar_t) buf[ind]);
  l = strlen (buf);
  if (ind >= l - 1)
    return ((wchar_t) buf[ind]);
  if (l < ind)			/* Sanity check */
    l = strlen (buf+ind);
  memset (&ps, 0, sizeof (mbstate_t));
  tmp = mbrtowc (&wc, buf + ind, l - ind, &ps);
  if (MB_INVALIDCH (tmp) || MB_NULLWCH (tmp))  
    return ((wchar_t) buf[ind]);
  return wc;
}
#endif /* HANDLE_MULTIBYTE */

/* Find next `count' characters started byte point of the specified seed.
   If flags is MB_FIND_NONZERO, we look for non-zero-width multibyte
   characters. */
#undef _rl_find_next_mbchar
int
_rl_find_next_mbchar (char *string, int seed, int count, int flags)
{
#if defined (HANDLE_MULTIBYTE)
  return _rl_find_next_mbchar_internal (string, seed, count, flags);
#else
  return (seed + count);
#endif
}

/* Find previous character started byte point of the specified seed.
   Returned point will be point <= seed.  If flags is MB_FIND_NONZERO,
   we look for non-zero-width multibyte characters. */
#undef _rl_find_prev_mbchar
int
_rl_find_prev_mbchar (char *string, int seed, int flags)
{
#if defined (HANDLE_MULTIBYTE)
  return _rl_find_prev_mbchar_internal (string, seed, flags);
#else
  return ((seed == 0) ? seed : seed - 1);
#endif
}
Commit	Line	Data
9255ee31 EZ	1	/* mbutil.c -- readline multibyte character utility functions */
9255ee31 EZ	2
b4f26d54	3	/* Copyright (C) 2001-2020 Free Software Foundation, Inc.
9255ee31	4
cc88a640 JK	5	This file is part of the GNU Readline Library (Readline), a library
cc88a640 JK	6	for reading lines of text with interactive input and history editing.
9255ee31	7
cc88a640 JK	8	Readline is free software: you can redistribute it and/or modify
	9	it under the terms of the GNU General Public License as published by
	10	the Free Software Foundation, either version 3 of the License, or
9255ee31 EZ	11	(at your option) any later version.
9255ee31 EZ	12
cc88a640 JK	13	Readline is distributed in the hope that it will be useful,
	14	but WITHOUT ANY WARRANTY; without even the implied warranty of
	15	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9255ee31 EZ	16	GNU General Public License for more details.
9255ee31 EZ	17
cc88a640 JK	18	You should have received a copy of the GNU General Public License
	19	along with Readline. If not, see <http://www.gnu.org/licenses/>.
	20	*/
	21
9255ee31 EZ	22	#define READLINE_LIBRARY
	23
	24	#if defined (HAVE_CONFIG_H)
	25	# include <config.h>
	26	#endif
	27
	28	#include <sys/types.h>
	29	#include <fcntl.h>
	30	#include "posixjmp.h"
	31
	32	#if defined (HAVE_UNISTD_H)
	33	# include <unistd.h> /* for _POSIX_VERSION */
	34	#endif /* HAVE_UNISTD_H */
	35
	36	#if defined (HAVE_STDLIB_H)
	37	# include <stdlib.h>
	38	#else
	39	# include "ansi_stdlib.h"
	40	#endif /* HAVE_STDLIB_H */
	41
	42	#include <stdio.h>
	43	#include <ctype.h>
	44
	45	/* System-specific feature definitions and include files. */
	46	#include "rldefs.h"
	47	#include "rlmbutil.h"
	48
	49	#if defined (TIOCSTAT_IN_SYS_IOCTL)
	50	# include <sys/ioctl.h>
	51	#endif /* TIOCSTAT_IN_SYS_IOCTL */
	52
	53	/* Some standard library routines. */
	54	#include "readline.h"
	55
	56	#include "rlprivate.h"
	57	#include "xmalloc.h"
	58
	59	/* Declared here so it can be shared between the readline and history
	60	libraries. */
	61	#if defined (HANDLE_MULTIBYTE)
	62	int rl_byte_oriented = 0;
	63	#else
	64	int rl_byte_oriented = 1;
	65	#endif
	66
775e241e TT	67	/* Ditto */
	68	int _rl_utf8locale = 0;
	69
9255ee31 EZ	70	/* **************************************************************** */
	71	/* */
	72	/* Multibyte Character Utility Functions */
	73	/* */
	74	/* **************************************************************** */
	75
	76	#if defined(HANDLE_MULTIBYTE)
	77
cb41b9e7 TT	78	/* **************************************************************** */
	79	/* */
	80	/* UTF-8 specific Character Utility Functions */
	81	/* */
	82	/* **************************************************************** */
	83
	84	/* Return the length in bytes of the possibly-multibyte character beginning
	85	at S. Encoding is UTF-8. */
	86	static int
	87	_rl_utf8_mblen (const char *s, size_t n)
	88	{
b4f26d54	89	unsigned char c, c1, c2, c3;
cb41b9e7 TT	90
	91	if (s == 0)
	92	return (0); /* no shift states */
	93	if (n <= 0)
	94	return (-1);
	95
	96	c = (unsigned char)*s;
	97	if (c < 0x80)
	98	return (c != 0);
	99	if (c >= 0xc2)
	100	{
	101	c1 = (unsigned char)s[1];
	102	if (c < 0xe0)
	103	{
b4f26d54 TT	104	if (n == 1)
	105	return -2;
	106	if (n >= 2 && (c1 ^ 0x80) < 0x40)
cb41b9e7 TT	107	return 2;
	108	}
	109	else if (c < 0xf0)
	110	{
b4f26d54 TT	111	if (n == 1)
	112	return -2;
	113	if ((c1 ^ 0x80) < 0x40
cb41b9e7 TT	114	&& (c >= 0xe1 \|\| c1 >= 0xa0)
cb41b9e7 TT	115	&& (c != 0xed \|\| c1 < 0xa0))
b4f26d54 TT	116	{
	117	if (n == 2)
	118	return -2;
	119	c2 = (unsigned char)s[2];
	120	if ((c2 ^ 0x80) < 0x40)
	121	return 3;
	122	}
cb41b9e7	123	}
b4f26d54	124	else if (c < 0xf4)
cb41b9e7	125	{
b4f26d54 TT	126	if (n == 1)
	127	return -2;
	128	if (((c1 ^ 0x80) < 0x40)
cb41b9e7 TT	129	&& (c >= 0xf1 \|\| c1 >= 0x90)
cb41b9e7 TT	130	&& (c < 0xf4 \|\| (c == 0xf4 && c1 < 0x90)))
b4f26d54 TT	131	{
	132	if (n == 2)
	133	return -2;
	134	c2 = (unsigned char)s[2];
	135	if ((c2 ^ 0x80) < 0x40)
	136	{
	137	if (n == 3)
	138	return -2;
	139	c3 = (unsigned char)s[3];
	140	if ((c3 ^ 0x80) < 0x40)
	141	return 4;
	142	}
	143	}
cb41b9e7 TT	144	}
	145	}
	146	/* invalid or incomplete multibyte character */
	147	return -1;
	148	}
	149
9255ee31	150	static int
cb41b9e7	151	_rl_find_next_mbchar_internal (char *string, int seed, int count, int find_non_zero)
9255ee31	152	{
cc88a640	153	size_t tmp, len;
9255ee31	154	mbstate_t ps;
5bdf8622	155	int point;
9255ee31 EZ	156	wchar_t wc;
9255ee31 EZ	157
5bdf8622 DJ	158	tmp = 0;
5bdf8622 DJ	159
9255ee31 EZ	160	memset(&ps, 0, sizeof (mbstate_t));
	161	if (seed < 0)
	162	seed = 0;
	163	if (count <= 0)
	164	return seed;
	165
5bdf8622	166	point = seed + _rl_adjust_point (string, seed, &ps);
cb41b9e7 TT	167	/* if _rl_adjust_point returns -1, the character or string is invalid.
	168	treat as a byte. */
	169	if (point == seed - 1) /* invalid */
	170	return seed + 1;
	171
cc88a640 JK	172	/* if this is true, means that seed was not pointing to a byte indicating
	173	the beginning of a multibyte character. Correct the point and consume
	174	one char. */
9255ee31	175	if (seed < point)
5bdf8622	176	count--;
9255ee31 EZ	177
	178	while (count > 0)
	179	{
cc88a640 JK	180	len = strlen (string + point);
	181	if (len == 0)
	182	break;
cb41b9e7 TT	183	if (_rl_utf8locale && UTF8_SINGLEBYTE(string[point]))
	184	{
	185	tmp = 1;
	186	wc = (wchar_t) string[point];
	187	memset(&ps, 0, sizeof(mbstate_t));
	188	}
	189	else
	190	tmp = mbrtowc (&wc, string+point, len, &ps);
5bdf8622	191	if (MB_INVALIDCH ((size_t)tmp))
9255ee31	192	{
cc88a640	193	/* invalid bytes. assume a byte represents a character */
9255ee31 EZ	194	point++;
	195	count--;
	196	/* reset states. */
	197	memset(&ps, 0, sizeof(mbstate_t));
	198	}
5bdf8622 DJ	199	else if (MB_NULLWCH (tmp))
5bdf8622 DJ	200	break; /* found wide '\0' */
9255ee31 EZ	201	else
	202	{
	203	/* valid bytes */
	204	point += tmp;
	205	if (find_non_zero)
	206	{
775e241e	207	if (WCWIDTH (wc) == 0)
9255ee31 EZ	208	continue;
	209	else
	210	count--;
	211	}
	212	else
	213	count--;
	214	}
	215	}
	216
	217	if (find_non_zero)
	218	{
	219	tmp = mbrtowc (&wc, string + point, strlen (string + point), &ps);
775e241e	220	while (MB_NULLWCH (tmp) == 0 && MB_INVALIDCH (tmp) == 0 && WCWIDTH (wc) == 0)
9255ee31 EZ	221	{
	222	point += tmp;
	223	tmp = mbrtowc (&wc, string + point, strlen (string + point), &ps);
9255ee31 EZ	224	}
9255ee31 EZ	225	}
5bdf8622 DJ	226
5bdf8622 DJ	227	return point;
9255ee31 EZ	228	}
9255ee31 EZ	229
b4f26d54 TT	230	static inline int
	231	_rl_test_nonzero (char *string, int ind, int len)
	232	{
	233	size_t tmp;
	234	wchar_t wc;
	235	mbstate_t ps;
	236
	237	memset (&ps, 0, sizeof (mbstate_t));
	238	tmp = mbrtowc (&wc, string + ind, len - ind, &ps);
	239	/* treat invalid multibyte sequences as non-zero-width */
	240	return (MB_INVALIDCH (tmp) \|\| MB_NULLWCH (tmp) \|\| WCWIDTH (wc) > 0);
	241	}
	242
	243	/* experimental -- needs to handle zero-width characters better */
	244	static int
	245	_rl_find_prev_utf8char (char *string, int seed, int find_non_zero)
	246	{
	247	char *s;
	248	unsigned char b;
	249	int save, prev;
	250	size_t len;
	251
	252	if (find_non_zero)
	253	len = RL_STRLEN (string);
	254
	255	prev = seed - 1;
	256	while (prev >= 0)
	257	{
	258	b = (unsigned char)string[prev];
	259	if (UTF8_SINGLEBYTE (b))
	260	return (prev);
	261
	262	save = prev;
	263
	264	/* Move back until we're not in the middle of a multibyte char */
	265	if (UTF8_MBCHAR (b))
	266	{
	267	while (prev > 0 && (b = (unsigned char)string[--prev]) && UTF8_MBCHAR (b))
	268	;
	269	}
	270
	271	if (UTF8_MBFIRSTCHAR (b))
	272	{
	273	if (find_non_zero)
	274	{
	275	if (_rl_test_nonzero (string, prev, len))
	276	return (prev);
	277	else /* valid but WCWIDTH (wc) == 0 */
	278	prev = prev - 1;
	279	}
	280	else
	281	return (prev);
	282	}
	283	else
	284	return (save); /* invalid utf-8 multibyte sequence */
	285	}
	286
	287	return ((prev < 0) ? 0 : prev);
	288	}
	289
775e241e	290	/static/ int
cb41b9e7	291	_rl_find_prev_mbchar_internal (char *string, int seed, int find_non_zero)
9255ee31 EZ	292	{
	293	mbstate_t ps;
	294	int prev, non_zero_prev, point, length;
	295	size_t tmp;
	296	wchar_t wc;
	297
b4f26d54 TT	298	if (_rl_utf8locale)
	299	return (_rl_find_prev_utf8char (string, seed, find_non_zero));
	300
9255ee31 EZ	301	memset(&ps, 0, sizeof(mbstate_t));
	302	length = strlen(string);
	303
	304	if (seed < 0)
	305	return 0;
	306	else if (length < seed)
	307	return length;
	308
	309	prev = non_zero_prev = point = 0;
	310	while (point < seed)
	311	{
cb41b9e7 TT	312	if (_rl_utf8locale && UTF8_SINGLEBYTE(string[point]))
	313	{
	314	tmp = 1;
	315	wc = (wchar_t) string[point];
	316	memset(&ps, 0, sizeof(mbstate_t));
	317	}
	318	else
	319	tmp = mbrtowc (&wc, string + point, length - point, &ps);
5bdf8622	320	if (MB_INVALIDCH ((size_t)tmp))
9255ee31	321	{
cb41b9e7	322	/* in this case, bytes are invalid or too short to compose
9255ee31 EZ	323	multibyte char, so assume that the first byte represents
	324	a single character anyway. */
	325	tmp = 1;
	326	/* clear the state of the byte sequence, because
	327	in this case effect of mbstate is undefined */
	328	memset(&ps, 0, sizeof (mbstate_t));
5bdf8622 DJ	329
	330	/* Since we're assuming that this byte represents a single
	331	non-zero-width character, don't forget about it. */
	332	prev = point;
9255ee31	333	}
5bdf8622	334	else if (MB_NULLWCH (tmp))
9255ee31 EZ	335	break; /* Found '\0' char. Can this happen? */
	336	else
	337	{
	338	if (find_non_zero)
	339	{
775e241e	340	if (WCWIDTH (wc) != 0)
9255ee31 EZ	341	prev = point;
	342	}
	343	else
	344	prev = point;
	345	}
	346
	347	point += tmp;
	348	}
	349
	350	return prev;
	351	}
	352
	353	/* return the number of bytes parsed from the multibyte sequence starting
	354	at src, if a non-L'\0' wide character was recognized. It returns 0,
	355	if a L'\0' wide character was recognized. It returns (size_t)(-1),
	356	if an invalid multibyte sequence was encountered. It returns (size_t)(-2)
	357	if it couldn't parse a complete multibyte character. */
	358	int
cb41b9e7	359	_rl_get_char_len (char src, mbstate_t ps)
9255ee31	360	{
cb41b9e7 TT	361	size_t tmp, l;
cb41b9e7 TT	362	int mb_cur_max;
9255ee31	363
cb41b9e7 TT	364	/* Look at no more than MB_CUR_MAX characters */
	365	l = (size_t)strlen (src);
	366	if (_rl_utf8locale && l > 0 && UTF8_SINGLEBYTE(*src))
	367	tmp = (*src != 0) ? 1 : 0;
	368	else
	369	{
	370	mb_cur_max = MB_CUR_MAX;
	371	tmp = mbrlen((const char *)src, (l < mb_cur_max) ? l : mb_cur_max, ps);
	372	}
9255ee31 EZ	373	if (tmp == (size_t)(-2))
9255ee31 EZ	374	{
cb41b9e7	375	/* too short to compose multibyte char */
5af408ce EZ	376	if (ps)
5af408ce EZ	377	memset (ps, 0, sizeof(mbstate_t));
9255ee31 EZ	378	return -2;
	379	}
	380	else if (tmp == (size_t)(-1))
	381	{
	382	/* invalid to compose multibyte char */
	383	/* initialize the conversion state */
5af408ce EZ	384	if (ps)
5af408ce EZ	385	memset (ps, 0, sizeof(mbstate_t));
9255ee31 EZ	386	return -1;
	387	}
	388	else if (tmp == (size_t)0)
	389	return 0;
	390	else
	391	return (int)tmp;
	392	}
	393
	394	/* compare the specified two characters. If the characters matched,
	395	return 1. Otherwise return 0. */
	396	int
cb41b9e7	397	_rl_compare_chars (char buf1, int pos1, mbstate_t ps1, char buf2, int pos2, mbstate_t ps2)
9255ee31 EZ	398	{
	399	int i, w1, w2;
	400
	401	if ((w1 = _rl_get_char_len (&buf1[pos1], ps1)) <= 0 \|\|
	402	(w2 = _rl_get_char_len (&buf2[pos2], ps2)) <= 0 \|\|
	403	(w1 != w2) \|\|
	404	(buf1[pos1] != buf2[pos2]))
	405	return 0;
	406
	407	for (i = 1; i < w1; i++)
	408	if (buf1[pos1+i] != buf2[pos2+i])
	409	return 0;
	410
	411	return 1;
	412	}
	413
	414	/* adjust pointed byte and find mbstate of the point of string.
	415	adjusted point will be point <= adjusted_point, and returns
	416	differences of the byte(adjusted_point - point).
cb41b9e7	417	if point is invalid (point < 0 \|\| more than string length),
9255ee31 EZ	418	it returns -1 */
9255ee31 EZ	419	int
cb41b9e7	420	_rl_adjust_point (char string, int point, mbstate_t ps)
9255ee31	421	{
cb41b9e7 TT	422	size_t tmp;
cb41b9e7 TT	423	int length, pos;
9255ee31	424
cb41b9e7 TT	425	tmp = 0;
cb41b9e7 TT	426	pos = 0;
9255ee31 EZ	427	length = strlen(string);
	428	if (point < 0)
	429	return -1;
	430	if (length < point)
	431	return -1;
	432
	433	while (pos < point)
	434	{
cb41b9e7 TT	435	if (_rl_utf8locale && UTF8_SINGLEBYTE(string[pos]))
	436	tmp = 1;
	437	else
	438	tmp = mbrlen (string + pos, length - pos, ps);
5bdf8622	439	if (MB_INVALIDCH ((size_t)tmp))
9255ee31	440	{
cb41b9e7	441	/* in this case, bytes are invalid or too short to compose
9255ee31 EZ	442	multibyte char, so assume that the first byte represents
	443	a single character anyway. */
	444	pos++;
	445	/* clear the state of the byte sequence, because
	446	in this case effect of mbstate is undefined */
5af408ce EZ	447	if (ps)
5af408ce EZ	448	memset (ps, 0, sizeof (mbstate_t));
9255ee31	449	}
5bdf8622	450	else if (MB_NULLWCH (tmp))
5af408ce	451	pos++;
9255ee31 EZ	452	else
	453	pos += tmp;
	454	}
	455
	456	return (pos - point);
	457	}
	458
	459	int
cb41b9e7	460	_rl_is_mbchar_matched (char string, int seed, int end, char mbchar, int length)
9255ee31 EZ	461	{
	462	int i;
	463
	464	if ((end - seed) < length)
	465	return 0;
	466
	467	for (i = 0; i < length; i++)
	468	if (string[seed + i] != mbchar[i])
	469	return 0;
	470	return 1;
	471	}
5bdf8622 DJ	472
5bdf8622 DJ	473	wchar_t
cb41b9e7	474	_rl_char_value (char *buf, int ind)
5bdf8622 DJ	475	{
	476	size_t tmp;
	477	wchar_t wc;
	478	mbstate_t ps;
	479	int l;
	480
	481	if (MB_LEN_MAX == 1 \|\| rl_byte_oriented)
	482	return ((wchar_t) buf[ind]);
cb41b9e7 TT	483	if (_rl_utf8locale && UTF8_SINGLEBYTE(buf[ind]))
cb41b9e7 TT	484	return ((wchar_t) buf[ind]);
5bdf8622 DJ	485	l = strlen (buf);
	486	if (ind >= l - 1)
	487	return ((wchar_t) buf[ind]);
cb41b9e7 TT	488	if (l < ind) /* Sanity check */
cb41b9e7 TT	489	l = strlen (buf+ind);
5bdf8622 DJ	490	memset (&ps, 0, sizeof (mbstate_t));
	491	tmp = mbrtowc (&wc, buf + ind, l - ind, &ps);
	492	if (MB_INVALIDCH (tmp) \|\| MB_NULLWCH (tmp))
	493	return ((wchar_t) buf[ind]);
	494	return wc;
	495	}
9255ee31 EZ	496	#endif /* HANDLE_MULTIBYTE */
	497
	498	/* Find next `count' characters started byte point of the specified seed.
	499	If flags is MB_FIND_NONZERO, we look for non-zero-width multibyte
	500	characters. */
	501	#undef _rl_find_next_mbchar
	502	int
cb41b9e7	503	_rl_find_next_mbchar (char *string, int seed, int count, int flags)
9255ee31 EZ	504	{
	505	#if defined (HANDLE_MULTIBYTE)
	506	return _rl_find_next_mbchar_internal (string, seed, count, flags);
	507	#else
	508	return (seed + count);
	509	#endif
	510	}
	511
	512	/* Find previous character started byte point of the specified seed.
	513	Returned point will be point <= seed. If flags is MB_FIND_NONZERO,
	514	we look for non-zero-width multibyte characters. */
	515	#undef _rl_find_prev_mbchar
	516	int
cb41b9e7	517	_rl_find_prev_mbchar (char *string, int seed, int flags)
9255ee31 EZ	518	{
	519	#if defined (HANDLE_MULTIBYTE)
	520	return _rl_find_prev_mbchar_internal (string, seed, flags);
	521	#else
	522	return ((seed == 0) ? seed : seed - 1);
	523	#endif
	524	}