1 /* mbutil.c -- readline multibyte character utility functions */
3 /* Copyright (C) 2001-2005 Free Software Foundation, Inc.
5 This file is part of the GNU Readline Library, a library for
6 reading lines of text with interactive input and history editing.
8 The GNU Readline Library is free software; you can redistribute it
9 and/or modify it under the terms of the GNU General Public License
10 as published by the Free Software Foundation; either version 2, or
11 (at your option) any later version.
13 The GNU Readline Library is distributed in the hope that it will be
14 useful, but WITHOUT ANY WARRANTY; without even the implied warranty
15 of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 The GNU General Public License is often shipped with GNU software, and
19 is generally kept in a file called COPYING or LICENSE. If you do not
20 have a copy of the license, write to the Free Software Foundation,
21 59 Temple Place, Suite 330, Boston, MA 02111 USA. */
22 #define READLINE_LIBRARY
24 #if defined (HAVE_CONFIG_H)
28 #include <sys/types.h>
32 #if defined (HAVE_UNISTD_H)
33 # include <unistd.h> /* for _POSIX_VERSION */
34 #endif /* HAVE_UNISTD_H */
36 #if defined (HAVE_STDLIB_H)
39 # include "ansi_stdlib.h"
40 #endif /* HAVE_STDLIB_H */
45 /* System-specific feature definitions and include files. */
49 #if defined (TIOCSTAT_IN_SYS_IOCTL)
50 # include <sys/ioctl.h>
51 #endif /* TIOCSTAT_IN_SYS_IOCTL */
53 /* Some standard library routines. */
56 #include "rlprivate.h"
59 /* Declared here so it can be shared between the readline and history
61 #if defined (HANDLE_MULTIBYTE)
62 int rl_byte_oriented
= 0;
64 int rl_byte_oriented
= 1;
67 /* **************************************************************** */
69 /* Multibyte Character Utility Functions */
71 /* **************************************************************** */
73 #if defined(HANDLE_MULTIBYTE)
76 _rl_find_next_mbchar_internal (string
, seed
, count
, find_non_zero
)
78 int seed
, count
, find_non_zero
;
87 memset(&ps
, 0, sizeof (mbstate_t));
93 point
= seed
+ _rl_adjust_point (string
, seed
, &ps
);
94 /* if this is true, means that seed was not pointed character
95 started byte. So correct the point and consume count */
101 tmp
= mbrtowc (&wc
, string
+point
, strlen(string
+ point
), &ps
);
102 if (MB_INVALIDCH ((size_t)tmp
))
104 /* invalid bytes. asume a byte represents a character */
108 memset(&ps
, 0, sizeof(mbstate_t));
110 else if (MB_NULLWCH (tmp
))
111 break; /* found wide '\0' */
118 if (wcwidth (wc
) == 0)
130 tmp
= mbrtowc (&wc
, string
+ point
, strlen (string
+ point
), &ps
);
131 while (tmp
> 0 && wcwidth (wc
) == 0)
134 tmp
= mbrtowc (&wc
, string
+ point
, strlen (string
+ point
), &ps
);
135 if (MB_NULLWCH (tmp
) || MB_INVALIDCH (tmp
))
144 _rl_find_prev_mbchar_internal (string
, seed
, find_non_zero
)
146 int seed
, find_non_zero
;
149 int prev
, non_zero_prev
, point
, length
;
153 memset(&ps
, 0, sizeof(mbstate_t));
154 length
= strlen(string
);
158 else if (length
< seed
)
161 prev
= non_zero_prev
= point
= 0;
164 tmp
= mbrtowc (&wc
, string
+ point
, length
- point
, &ps
);
165 if (MB_INVALIDCH ((size_t)tmp
))
167 /* in this case, bytes are invalid or shorted to compose
168 multibyte char, so assume that the first byte represents
169 a single character anyway. */
171 /* clear the state of the byte sequence, because
172 in this case effect of mbstate is undefined */
173 memset(&ps
, 0, sizeof (mbstate_t));
175 /* Since we're assuming that this byte represents a single
176 non-zero-width character, don't forget about it. */
179 else if (MB_NULLWCH (tmp
))
180 break; /* Found '\0' char. Can this happen? */
185 if (wcwidth (wc
) != 0)
198 /* return the number of bytes parsed from the multibyte sequence starting
199 at src, if a non-L'\0' wide character was recognized. It returns 0,
200 if a L'\0' wide character was recognized. It returns (size_t)(-1),
201 if an invalid multibyte sequence was encountered. It returns (size_t)(-2)
202 if it couldn't parse a complete multibyte character. */
204 _rl_get_char_len (src
, ps
)
210 tmp
= mbrlen((const char *)src
, (size_t)strlen (src
), ps
);
211 if (tmp
== (size_t)(-2))
213 /* shorted to compose multibyte char */
215 memset (ps
, 0, sizeof(mbstate_t));
218 else if (tmp
== (size_t)(-1))
220 /* invalid to compose multibyte char */
221 /* initialize the conversion state */
223 memset (ps
, 0, sizeof(mbstate_t));
226 else if (tmp
== (size_t)0)
232 /* compare the specified two characters. If the characters matched,
233 return 1. Otherwise return 0. */
235 _rl_compare_chars (buf1
, pos1
, ps1
, buf2
, pos2
, ps2
)
245 if ((w1
= _rl_get_char_len (&buf1
[pos1
], ps1
)) <= 0 ||
246 (w2
= _rl_get_char_len (&buf2
[pos2
], ps2
)) <= 0 ||
248 (buf1
[pos1
] != buf2
[pos2
]))
251 for (i
= 1; i
< w1
; i
++)
252 if (buf1
[pos1
+i
] != buf2
[pos2
+i
])
258 /* adjust pointed byte and find mbstate of the point of string.
259 adjusted point will be point <= adjusted_point, and returns
260 differences of the byte(adjusted_point - point).
261 if point is invalied (point < 0 || more than string length),
264 _rl_adjust_point(string
, point
, ps
)
273 length
= strlen(string
);
281 tmp
= mbrlen (string
+ pos
, length
- pos
, ps
);
282 if (MB_INVALIDCH ((size_t)tmp
))
284 /* in this case, bytes are invalid or shorted to compose
285 multibyte char, so assume that the first byte represents
286 a single character anyway. */
288 /* clear the state of the byte sequence, because
289 in this case effect of mbstate is undefined */
291 memset (ps
, 0, sizeof (mbstate_t));
293 else if (MB_NULLWCH (tmp
))
299 return (pos
- point
);
303 _rl_is_mbchar_matched (string
, seed
, end
, mbchar
, length
)
311 if ((end
- seed
) < length
)
314 for (i
= 0; i
< length
; i
++)
315 if (string
[seed
+ i
] != mbchar
[i
])
321 _rl_char_value (buf
, ind
)
330 if (MB_LEN_MAX
== 1 || rl_byte_oriented
)
331 return ((wchar_t) buf
[ind
]);
334 return ((wchar_t) buf
[ind
]);
335 memset (&ps
, 0, sizeof (mbstate_t));
336 tmp
= mbrtowc (&wc
, buf
+ ind
, l
- ind
, &ps
);
337 if (MB_INVALIDCH (tmp
) || MB_NULLWCH (tmp
))
338 return ((wchar_t) buf
[ind
]);
341 #endif /* HANDLE_MULTIBYTE */
343 /* Find next `count' characters started byte point of the specified seed.
344 If flags is MB_FIND_NONZERO, we look for non-zero-width multibyte
346 #undef _rl_find_next_mbchar
348 _rl_find_next_mbchar (string
, seed
, count
, flags
)
350 int seed
, count
, flags
;
352 #if defined (HANDLE_MULTIBYTE)
353 return _rl_find_next_mbchar_internal (string
, seed
, count
, flags
);
355 return (seed
+ count
);
359 /* Find previous character started byte point of the specified seed.
360 Returned point will be point <= seed. If flags is MB_FIND_NONZERO,
361 we look for non-zero-width multibyte characters. */
362 #undef _rl_find_prev_mbchar
364 _rl_find_prev_mbchar (string
, seed
, flags
)
368 #if defined (HANDLE_MULTIBYTE)
369 return _rl_find_prev_mbchar_internal (string
, seed
, flags
);
371 return ((seed
== 0) ? seed
: seed
- 1);