* gdbarch.h (target_gdbarch): Add comment documenting this global.
[gdb/SamB.git] / readline / mbutil.c
blob17dde53ed7b4636cd7904fb203bcabc6b9f7a6c0
1 /* mbutil.c -- readline multibyte character utility functions */
3 /* Copyright (C) 2001-2005 Free Software Foundation, Inc.
5 This file is part of the GNU Readline Library, a library for
6 reading lines of text with interactive input and history editing.
8 The GNU Readline Library is free software; you can redistribute it
9 and/or modify it under the terms of the GNU General Public License
10 as published by the Free Software Foundation; either version 2, or
11 (at your option) any later version.
13 The GNU Readline Library is distributed in the hope that it will be
14 useful, but WITHOUT ANY WARRANTY; without even the implied warranty
15 of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 The GNU General Public License is often shipped with GNU software, and
19 is generally kept in a file called COPYING or LICENSE. If you do not
20 have a copy of the license, write to the Free Software Foundation,
21 59 Temple Place, Suite 330, Boston, MA 02111 USA. */
22 #define READLINE_LIBRARY
24 #if defined (HAVE_CONFIG_H)
25 # include <config.h>
26 #endif
28 #include <sys/types.h>
29 #include <fcntl.h>
30 #include "posixjmp.h"
32 #if defined (HAVE_UNISTD_H)
33 # include <unistd.h> /* for _POSIX_VERSION */
34 #endif /* HAVE_UNISTD_H */
36 #if defined (HAVE_STDLIB_H)
37 # include <stdlib.h>
38 #else
39 # include "ansi_stdlib.h"
40 #endif /* HAVE_STDLIB_H */
42 #include <stdio.h>
43 #include <ctype.h>
45 /* System-specific feature definitions and include files. */
46 #include "rldefs.h"
47 #include "rlmbutil.h"
49 #if defined (TIOCSTAT_IN_SYS_IOCTL)
50 # include <sys/ioctl.h>
51 #endif /* TIOCSTAT_IN_SYS_IOCTL */
53 /* Some standard library routines. */
54 #include "readline.h"
56 #include "rlprivate.h"
57 #include "xmalloc.h"
59 /* Declared here so it can be shared between the readline and history
60 libraries. */
61 #if defined (HANDLE_MULTIBYTE)
62 int rl_byte_oriented = 0;
63 #else
64 int rl_byte_oriented = 1;
65 #endif
67 /* **************************************************************** */
68 /* */
69 /* Multibyte Character Utility Functions */
70 /* */
71 /* **************************************************************** */
73 #if defined(HANDLE_MULTIBYTE)
75 static int
76 _rl_find_next_mbchar_internal (string, seed, count, find_non_zero)
77 char *string;
78 int seed, count, find_non_zero;
80 size_t tmp;
81 mbstate_t ps;
82 int point;
83 wchar_t wc;
85 tmp = 0;
87 memset(&ps, 0, sizeof (mbstate_t));
88 if (seed < 0)
89 seed = 0;
90 if (count <= 0)
91 return seed;
93 point = seed + _rl_adjust_point (string, seed, &ps);
94 /* if this is true, means that seed was not pointed character
95 started byte. So correct the point and consume count */
96 if (seed < point)
97 count--;
99 while (count > 0)
101 tmp = mbrtowc (&wc, string+point, strlen(string + point), &ps);
102 if (MB_INVALIDCH ((size_t)tmp))
104 /* invalid bytes. asume a byte represents a character */
105 point++;
106 count--;
107 /* reset states. */
108 memset(&ps, 0, sizeof(mbstate_t));
110 else if (MB_NULLWCH (tmp))
111 break; /* found wide '\0' */
112 else
114 /* valid bytes */
115 point += tmp;
116 if (find_non_zero)
118 if (wcwidth (wc) == 0)
119 continue;
120 else
121 count--;
123 else
124 count--;
128 if (find_non_zero)
130 tmp = mbrtowc (&wc, string + point, strlen (string + point), &ps);
131 while (tmp > 0 && wcwidth (wc) == 0)
133 point += tmp;
134 tmp = mbrtowc (&wc, string + point, strlen (string + point), &ps);
135 if (MB_NULLWCH (tmp) || MB_INVALIDCH (tmp))
136 break;
140 return point;
143 static int
144 _rl_find_prev_mbchar_internal (string, seed, find_non_zero)
145 char *string;
146 int seed, find_non_zero;
148 mbstate_t ps;
149 int prev, non_zero_prev, point, length;
150 size_t tmp;
151 wchar_t wc;
153 memset(&ps, 0, sizeof(mbstate_t));
154 length = strlen(string);
156 if (seed < 0)
157 return 0;
158 else if (length < seed)
159 return length;
161 prev = non_zero_prev = point = 0;
162 while (point < seed)
164 tmp = mbrtowc (&wc, string + point, length - point, &ps);
165 if (MB_INVALIDCH ((size_t)tmp))
167 /* in this case, bytes are invalid or shorted to compose
168 multibyte char, so assume that the first byte represents
169 a single character anyway. */
170 tmp = 1;
171 /* clear the state of the byte sequence, because
172 in this case effect of mbstate is undefined */
173 memset(&ps, 0, sizeof (mbstate_t));
175 /* Since we're assuming that this byte represents a single
176 non-zero-width character, don't forget about it. */
177 prev = point;
179 else if (MB_NULLWCH (tmp))
180 break; /* Found '\0' char. Can this happen? */
181 else
183 if (find_non_zero)
185 if (wcwidth (wc) != 0)
186 prev = point;
188 else
189 prev = point;
192 point += tmp;
195 return prev;
198 /* return the number of bytes parsed from the multibyte sequence starting
199 at src, if a non-L'\0' wide character was recognized. It returns 0,
200 if a L'\0' wide character was recognized. It returns (size_t)(-1),
201 if an invalid multibyte sequence was encountered. It returns (size_t)(-2)
202 if it couldn't parse a complete multibyte character. */
204 _rl_get_char_len (src, ps)
205 char *src;
206 mbstate_t *ps;
208 size_t tmp;
210 tmp = mbrlen((const char *)src, (size_t)strlen (src), ps);
211 if (tmp == (size_t)(-2))
213 /* shorted to compose multibyte char */
214 if (ps)
215 memset (ps, 0, sizeof(mbstate_t));
216 return -2;
218 else if (tmp == (size_t)(-1))
220 /* invalid to compose multibyte char */
221 /* initialize the conversion state */
222 if (ps)
223 memset (ps, 0, sizeof(mbstate_t));
224 return -1;
226 else if (tmp == (size_t)0)
227 return 0;
228 else
229 return (int)tmp;
232 /* compare the specified two characters. If the characters matched,
233 return 1. Otherwise return 0. */
235 _rl_compare_chars (buf1, pos1, ps1, buf2, pos2, ps2)
236 char *buf1;
237 int pos1;
238 mbstate_t *ps1;
239 char *buf2;
240 int pos2;
241 mbstate_t *ps2;
243 int i, w1, w2;
245 if ((w1 = _rl_get_char_len (&buf1[pos1], ps1)) <= 0 ||
246 (w2 = _rl_get_char_len (&buf2[pos2], ps2)) <= 0 ||
247 (w1 != w2) ||
248 (buf1[pos1] != buf2[pos2]))
249 return 0;
251 for (i = 1; i < w1; i++)
252 if (buf1[pos1+i] != buf2[pos2+i])
253 return 0;
255 return 1;
258 /* adjust pointed byte and find mbstate of the point of string.
259 adjusted point will be point <= adjusted_point, and returns
260 differences of the byte(adjusted_point - point).
261 if point is invalied (point < 0 || more than string length),
262 it returns -1 */
264 _rl_adjust_point(string, point, ps)
265 char *string;
266 int point;
267 mbstate_t *ps;
269 size_t tmp = 0;
270 int length;
271 int pos = 0;
273 length = strlen(string);
274 if (point < 0)
275 return -1;
276 if (length < point)
277 return -1;
279 while (pos < point)
281 tmp = mbrlen (string + pos, length - pos, ps);
282 if (MB_INVALIDCH ((size_t)tmp))
284 /* in this case, bytes are invalid or shorted to compose
285 multibyte char, so assume that the first byte represents
286 a single character anyway. */
287 pos++;
288 /* clear the state of the byte sequence, because
289 in this case effect of mbstate is undefined */
290 if (ps)
291 memset (ps, 0, sizeof (mbstate_t));
293 else if (MB_NULLWCH (tmp))
294 pos++;
295 else
296 pos += tmp;
299 return (pos - point);
303 _rl_is_mbchar_matched (string, seed, end, mbchar, length)
304 char *string;
305 int seed, end;
306 char *mbchar;
307 int length;
309 int i;
311 if ((end - seed) < length)
312 return 0;
314 for (i = 0; i < length; i++)
315 if (string[seed + i] != mbchar[i])
316 return 0;
317 return 1;
320 wchar_t
321 _rl_char_value (buf, ind)
322 char *buf;
323 int ind;
325 size_t tmp;
326 wchar_t wc;
327 mbstate_t ps;
328 int l;
330 if (MB_LEN_MAX == 1 || rl_byte_oriented)
331 return ((wchar_t) buf[ind]);
332 l = strlen (buf);
333 if (ind >= l - 1)
334 return ((wchar_t) buf[ind]);
335 memset (&ps, 0, sizeof (mbstate_t));
336 tmp = mbrtowc (&wc, buf + ind, l - ind, &ps);
337 if (MB_INVALIDCH (tmp) || MB_NULLWCH (tmp))
338 return ((wchar_t) buf[ind]);
339 return wc;
341 #endif /* HANDLE_MULTIBYTE */
343 /* Find next `count' characters started byte point of the specified seed.
344 If flags is MB_FIND_NONZERO, we look for non-zero-width multibyte
345 characters. */
346 #undef _rl_find_next_mbchar
348 _rl_find_next_mbchar (string, seed, count, flags)
349 char *string;
350 int seed, count, flags;
352 #if defined (HANDLE_MULTIBYTE)
353 return _rl_find_next_mbchar_internal (string, seed, count, flags);
354 #else
355 return (seed + count);
356 #endif
359 /* Find previous character started byte point of the specified seed.
360 Returned point will be point <= seed. If flags is MB_FIND_NONZERO,
361 we look for non-zero-width multibyte characters. */
362 #undef _rl_find_prev_mbchar
364 _rl_find_prev_mbchar (string, seed, flags)
365 char *string;
366 int seed, flags;
368 #if defined (HANDLE_MULTIBYTE)
369 return _rl_find_prev_mbchar_internal (string, seed, flags);
370 #else
371 return ((seed == 0) ? seed : seed - 1);
372 #endif