Pull up CVS idents from FreeBSD to match our current version.
[dragonfly.git] / contrib / readline-5.0 / mbutil.c
blob9a8f17c0f74e0ca5cd0062ff2a8427dd91002599
1 /* mbutil.c -- readline multibyte character utility functions */
3 /* Copyright (C) 2001-2004 Free Software Foundation, Inc.
5 This file is part of the GNU Readline Library, a library for
6 reading lines of text with interactive input and history editing.
8 The GNU Readline Library is free software; you can redistribute it
9 and/or modify it under the terms of the GNU General Public License
10 as published by the Free Software Foundation; either version 2, or
11 (at your option) any later version.
13 The GNU Readline Library is distributed in the hope that it will be
14 useful, but WITHOUT ANY WARRANTY; without even the implied warranty
15 of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 The GNU General Public License is often shipped with GNU software, and
19 is generally kept in a file called COPYING or LICENSE. If you do not
20 have a copy of the license, write to the Free Software Foundation,
21 59 Temple Place, Suite 330, Boston, MA 02111 USA. */
22 #define READLINE_LIBRARY
24 #if defined (HAVE_CONFIG_H)
25 # include <config.h>
26 #endif
28 #include <sys/types.h>
29 #include <fcntl.h>
30 #include "posixjmp.h"
32 #if defined (HAVE_UNISTD_H)
33 # include <unistd.h> /* for _POSIX_VERSION */
34 #endif /* HAVE_UNISTD_H */
36 #if defined (HAVE_STDLIB_H)
37 # include <stdlib.h>
38 #else
39 # include "ansi_stdlib.h"
40 #endif /* HAVE_STDLIB_H */
42 #include <stdio.h>
43 #include <ctype.h>
45 /* System-specific feature definitions and include files. */
46 #include "rldefs.h"
47 #include "rlmbutil.h"
49 #if defined (TIOCSTAT_IN_SYS_IOCTL)
50 # include <sys/ioctl.h>
51 #endif /* TIOCSTAT_IN_SYS_IOCTL */
53 /* Some standard library routines. */
54 #include "readline.h"
56 #include "rlprivate.h"
57 #include "xmalloc.h"
59 /* Declared here so it can be shared between the readline and history
60 libraries. */
61 #if defined (HANDLE_MULTIBYTE)
62 int rl_byte_oriented = 0;
63 #else
64 int rl_byte_oriented = 1;
65 #endif
67 /* **************************************************************** */
68 /* */
69 /* Multibyte Character Utility Functions */
70 /* */
71 /* **************************************************************** */
73 #if defined(HANDLE_MULTIBYTE)
75 static int
76 _rl_find_next_mbchar_internal (string, seed, count, find_non_zero)
77 char *string;
78 int seed, count, find_non_zero;
80 size_t tmp = 0;
81 mbstate_t ps;
82 int point = 0;
83 wchar_t wc;
85 memset(&ps, 0, sizeof (mbstate_t));
86 if (seed < 0)
87 seed = 0;
88 if (count <= 0)
89 return seed;
91 point = seed + _rl_adjust_point(string, seed, &ps);
92 /* if this is true, means that seed was not pointed character
93 started byte. So correct the point and consume count */
94 if (seed < point)
95 count--;
97 while (count > 0)
99 tmp = mbrtowc (&wc, string+point, strlen(string + point), &ps);
100 if (MB_INVALIDCH ((size_t)tmp))
102 /* invalid bytes. asume a byte represents a character */
103 point++;
104 count--;
105 /* reset states. */
106 memset(&ps, 0, sizeof(mbstate_t));
108 else if (MB_NULLWCH (tmp))
109 break; /* found wide '\0' */
110 else
112 /* valid bytes */
113 point += tmp;
114 if (find_non_zero)
116 if (wcwidth (wc) == 0)
117 continue;
118 else
119 count--;
121 else
122 count--;
126 if (find_non_zero)
128 tmp = mbrtowc (&wc, string + point, strlen (string + point), &ps);
129 while (wcwidth (wc) == 0)
131 point += tmp;
132 tmp = mbrtowc (&wc, string + point, strlen (string + point), &ps);
133 if (tmp == (size_t)(0) || tmp == (size_t)(-1) || tmp == (size_t)(-2))
134 break;
137 return point;
140 static int
141 _rl_find_prev_mbchar_internal (string, seed, find_non_zero)
142 char *string;
143 int seed, find_non_zero;
145 mbstate_t ps;
146 int prev, non_zero_prev, point, length;
147 size_t tmp;
148 wchar_t wc;
150 memset(&ps, 0, sizeof(mbstate_t));
151 length = strlen(string);
153 if (seed < 0)
154 return 0;
155 else if (length < seed)
156 return length;
158 prev = non_zero_prev = point = 0;
159 while (point < seed)
161 tmp = mbrtowc (&wc, string + point, length - point, &ps);
162 if (MB_INVALIDCH ((size_t)tmp))
164 /* in this case, bytes are invalid or shorted to compose
165 multibyte char, so assume that the first byte represents
166 a single character anyway. */
167 tmp = 1;
168 /* clear the state of the byte sequence, because
169 in this case effect of mbstate is undefined */
170 memset(&ps, 0, sizeof (mbstate_t));
172 /* Since we're assuming that this byte represents a single
173 non-zero-width character, don't forget about it. */
174 prev = point;
176 else if (MB_NULLWCH (tmp))
177 break; /* Found '\0' char. Can this happen? */
178 else
180 if (find_non_zero)
182 if (wcwidth (wc) != 0)
183 prev = point;
185 else
186 prev = point;
189 point += tmp;
192 return prev;
195 /* return the number of bytes parsed from the multibyte sequence starting
196 at src, if a non-L'\0' wide character was recognized. It returns 0,
197 if a L'\0' wide character was recognized. It returns (size_t)(-1),
198 if an invalid multibyte sequence was encountered. It returns (size_t)(-2)
199 if it couldn't parse a complete multibyte character. */
201 _rl_get_char_len (src, ps)
202 char *src;
203 mbstate_t *ps;
205 size_t tmp;
207 tmp = mbrlen((const char *)src, (size_t)strlen (src), ps);
208 if (tmp == (size_t)(-2))
210 /* shorted to compose multibyte char */
211 if (ps)
212 memset (ps, 0, sizeof(mbstate_t));
213 return -2;
215 else if (tmp == (size_t)(-1))
217 /* invalid to compose multibyte char */
218 /* initialize the conversion state */
219 if (ps)
220 memset (ps, 0, sizeof(mbstate_t));
221 return -1;
223 else if (tmp == (size_t)0)
224 return 0;
225 else
226 return (int)tmp;
229 /* compare the specified two characters. If the characters matched,
230 return 1. Otherwise return 0. */
232 _rl_compare_chars (buf1, pos1, ps1, buf2, pos2, ps2)
233 char *buf1;
234 int pos1;
235 mbstate_t *ps1;
236 char *buf2;
237 int pos2;
238 mbstate_t *ps2;
240 int i, w1, w2;
242 if ((w1 = _rl_get_char_len (&buf1[pos1], ps1)) <= 0 ||
243 (w2 = _rl_get_char_len (&buf2[pos2], ps2)) <= 0 ||
244 (w1 != w2) ||
245 (buf1[pos1] != buf2[pos2]))
246 return 0;
248 for (i = 1; i < w1; i++)
249 if (buf1[pos1+i] != buf2[pos2+i])
250 return 0;
252 return 1;
255 /* adjust pointed byte and find mbstate of the point of string.
256 adjusted point will be point <= adjusted_point, and returns
257 differences of the byte(adjusted_point - point).
258 if point is invalied (point < 0 || more than string length),
259 it returns -1 */
261 _rl_adjust_point(string, point, ps)
262 char *string;
263 int point;
264 mbstate_t *ps;
266 size_t tmp = 0;
267 int length;
268 int pos = 0;
270 length = strlen(string);
271 if (point < 0)
272 return -1;
273 if (length < point)
274 return -1;
276 while (pos < point)
278 tmp = mbrlen (string + pos, length - pos, ps);
279 if (MB_INVALIDCH ((size_t)tmp))
281 /* in this case, bytes are invalid or shorted to compose
282 multibyte char, so assume that the first byte represents
283 a single character anyway. */
284 pos++;
285 /* clear the state of the byte sequence, because
286 in this case effect of mbstate is undefined */
287 if (ps)
288 memset (ps, 0, sizeof (mbstate_t));
290 else if (MB_NULLWCH (tmp))
291 pos++;
292 else
293 pos += tmp;
296 return (pos - point);
300 _rl_is_mbchar_matched (string, seed, end, mbchar, length)
301 char *string;
302 int seed, end;
303 char *mbchar;
304 int length;
306 int i;
308 if ((end - seed) < length)
309 return 0;
311 for (i = 0; i < length; i++)
312 if (string[seed + i] != mbchar[i])
313 return 0;
314 return 1;
316 #endif /* HANDLE_MULTIBYTE */
318 /* Find next `count' characters started byte point of the specified seed.
319 If flags is MB_FIND_NONZERO, we look for non-zero-width multibyte
320 characters. */
321 #undef _rl_find_next_mbchar
323 _rl_find_next_mbchar (string, seed, count, flags)
324 char *string;
325 int seed, count, flags;
327 #if defined (HANDLE_MULTIBYTE)
328 return _rl_find_next_mbchar_internal (string, seed, count, flags);
329 #else
330 return (seed + count);
331 #endif
334 /* Find previous character started byte point of the specified seed.
335 Returned point will be point <= seed. If flags is MB_FIND_NONZERO,
336 we look for non-zero-width multibyte characters. */
337 #undef _rl_find_prev_mbchar
339 _rl_find_prev_mbchar (string, seed, flags)
340 char *string;
341 int seed, flags;
343 #if defined (HANDLE_MULTIBYTE)
344 return _rl_find_prev_mbchar_internal (string, seed, flags);
345 #else
346 return ((seed == 0) ? seed : seed - 1);
347 #endif