1 /* mbutil.c -- readline multibyte character utility functions */
3 /* Copyright (C) 2001-2004 Free Software Foundation, Inc.
5 This file is part of the GNU Readline Library, a library for
6 reading lines of text with interactive input and history editing.
8 The GNU Readline Library is free software; you can redistribute it
9 and/or modify it under the terms of the GNU General Public License
10 as published by the Free Software Foundation; either version 2, or
11 (at your option) any later version.
13 The GNU Readline Library is distributed in the hope that it will be
14 useful, but WITHOUT ANY WARRANTY; without even the implied warranty
15 of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 The GNU General Public License is often shipped with GNU software, and
19 is generally kept in a file called COPYING or LICENSE. If you do not
20 have a copy of the license, write to the Free Software Foundation,
21 59 Temple Place, Suite 330, Boston, MA 02111 USA. */
22 #define READLINE_LIBRARY
24 #if defined (HAVE_CONFIG_H)
28 #include <sys/types.h>
32 #if defined (HAVE_UNISTD_H)
33 # include <unistd.h> /* for _POSIX_VERSION */
34 #endif /* HAVE_UNISTD_H */
36 #if defined (HAVE_STDLIB_H)
39 # include "ansi_stdlib.h"
40 #endif /* HAVE_STDLIB_H */
45 /* System-specific feature definitions and include files. */
49 #if defined (TIOCSTAT_IN_SYS_IOCTL)
50 # include <sys/ioctl.h>
51 #endif /* TIOCSTAT_IN_SYS_IOCTL */
53 /* Some standard library routines. */
56 #include "rlprivate.h"
59 /* Declared here so it can be shared between the readline and history
61 #if defined (HANDLE_MULTIBYTE)
62 int rl_byte_oriented
= 0;
64 int rl_byte_oriented
= 1;
67 /* **************************************************************** */
69 /* Multibyte Character Utility Functions */
71 /* **************************************************************** */
73 #if defined(HANDLE_MULTIBYTE)
76 _rl_find_next_mbchar_internal (string
, seed
, count
, find_non_zero
)
78 int seed
, count
, find_non_zero
;
85 memset(&ps
, 0, sizeof (mbstate_t));
91 point
= seed
+ _rl_adjust_point(string
, seed
, &ps
);
92 /* if this is true, means that seed was not pointed character
93 started byte. So correct the point and consume count */
99 tmp
= mbrtowc (&wc
, string
+point
, strlen(string
+ point
), &ps
);
100 if (MB_INVALIDCH ((size_t)tmp
))
102 /* invalid bytes. asume a byte represents a character */
106 memset(&ps
, 0, sizeof(mbstate_t));
108 else if (MB_NULLWCH (tmp
))
109 break; /* found wide '\0' */
116 if (wcwidth (wc
) == 0)
128 tmp
= mbrtowc (&wc
, string
+ point
, strlen (string
+ point
), &ps
);
129 while (wcwidth (wc
) == 0)
132 tmp
= mbrtowc (&wc
, string
+ point
, strlen (string
+ point
), &ps
);
133 if (tmp
== (size_t)(0) || tmp
== (size_t)(-1) || tmp
== (size_t)(-2))
141 _rl_find_prev_mbchar_internal (string
, seed
, find_non_zero
)
143 int seed
, find_non_zero
;
146 int prev
, non_zero_prev
, point
, length
;
150 memset(&ps
, 0, sizeof(mbstate_t));
151 length
= strlen(string
);
155 else if (length
< seed
)
158 prev
= non_zero_prev
= point
= 0;
161 tmp
= mbrtowc (&wc
, string
+ point
, length
- point
, &ps
);
162 if (MB_INVALIDCH ((size_t)tmp
))
164 /* in this case, bytes are invalid or shorted to compose
165 multibyte char, so assume that the first byte represents
166 a single character anyway. */
168 /* clear the state of the byte sequence, because
169 in this case effect of mbstate is undefined */
170 memset(&ps
, 0, sizeof (mbstate_t));
172 /* Since we're assuming that this byte represents a single
173 non-zero-width character, don't forget about it. */
176 else if (MB_NULLWCH (tmp
))
177 break; /* Found '\0' char. Can this happen? */
182 if (wcwidth (wc
) != 0)
195 /* return the number of bytes parsed from the multibyte sequence starting
196 at src, if a non-L'\0' wide character was recognized. It returns 0,
197 if a L'\0' wide character was recognized. It returns (size_t)(-1),
198 if an invalid multibyte sequence was encountered. It returns (size_t)(-2)
199 if it couldn't parse a complete multibyte character. */
201 _rl_get_char_len (src
, ps
)
207 tmp
= mbrlen((const char *)src
, (size_t)strlen (src
), ps
);
208 if (tmp
== (size_t)(-2))
210 /* shorted to compose multibyte char */
212 memset (ps
, 0, sizeof(mbstate_t));
215 else if (tmp
== (size_t)(-1))
217 /* invalid to compose multibyte char */
218 /* initialize the conversion state */
220 memset (ps
, 0, sizeof(mbstate_t));
223 else if (tmp
== (size_t)0)
229 /* compare the specified two characters. If the characters matched,
230 return 1. Otherwise return 0. */
232 _rl_compare_chars (buf1
, pos1
, ps1
, buf2
, pos2
, ps2
)
242 if ((w1
= _rl_get_char_len (&buf1
[pos1
], ps1
)) <= 0 ||
243 (w2
= _rl_get_char_len (&buf2
[pos2
], ps2
)) <= 0 ||
245 (buf1
[pos1
] != buf2
[pos2
]))
248 for (i
= 1; i
< w1
; i
++)
249 if (buf1
[pos1
+i
] != buf2
[pos2
+i
])
255 /* adjust pointed byte and find mbstate of the point of string.
256 adjusted point will be point <= adjusted_point, and returns
257 differences of the byte(adjusted_point - point).
258 if point is invalied (point < 0 || more than string length),
261 _rl_adjust_point(string
, point
, ps
)
270 length
= strlen(string
);
278 tmp
= mbrlen (string
+ pos
, length
- pos
, ps
);
279 if (MB_INVALIDCH ((size_t)tmp
))
281 /* in this case, bytes are invalid or shorted to compose
282 multibyte char, so assume that the first byte represents
283 a single character anyway. */
285 /* clear the state of the byte sequence, because
286 in this case effect of mbstate is undefined */
288 memset (ps
, 0, sizeof (mbstate_t));
290 else if (MB_NULLWCH (tmp
))
296 return (pos
- point
);
300 _rl_is_mbchar_matched (string
, seed
, end
, mbchar
, length
)
308 if ((end
- seed
) < length
)
311 for (i
= 0; i
< length
; i
++)
312 if (string
[seed
+ i
] != mbchar
[i
])
316 #endif /* HANDLE_MULTIBYTE */
318 /* Find next `count' characters started byte point of the specified seed.
319 If flags is MB_FIND_NONZERO, we look for non-zero-width multibyte
321 #undef _rl_find_next_mbchar
323 _rl_find_next_mbchar (string
, seed
, count
, flags
)
325 int seed
, count
, flags
;
327 #if defined (HANDLE_MULTIBYTE)
328 return _rl_find_next_mbchar_internal (string
, seed
, count
, flags
);
330 return (seed
+ count
);
334 /* Find previous character started byte point of the specified seed.
335 Returned point will be point <= seed. If flags is MB_FIND_NONZERO,
336 we look for non-zero-width multibyte characters. */
337 #undef _rl_find_prev_mbchar
339 _rl_find_prev_mbchar (string
, seed
, flags
)
343 #if defined (HANDLE_MULTIBYTE)
344 return _rl_find_prev_mbchar_internal (string
, seed
, flags
);
346 return ((seed
== 0) ? seed
: seed
- 1);