Update.
[shishi.git] / gl / string_.h
bloba7c0c23f4a437af3a84c08201852e5ef7d8b787a
1 /* A GNU-like <string.h>.
3 Copyright (C) 1995-1996, 2001-2007 Free Software Foundation, Inc.
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
8 any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software Foundation,
17 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
19 #ifdef __DECC
20 # include_next <string.h>
21 #endif
23 #ifndef _GL_STRING_H
24 #define _GL_STRING_H
26 /* This #pragma avoids a warning with "gcc -Wmissing-prototypes" on some
27 mingw systems. */
28 #ifdef __GNUC__
29 # pragma GCC system_header
30 #endif
32 #ifndef __DECC
33 # include @ABSOLUTE_STRING_H@
34 #endif
37 /* The definition of GL_LINK_WARNING is copied here. */
40 #ifdef __cplusplus
41 extern "C" {
42 #endif
44 /* Return the first occurrence of NEEDLE in HAYSTACK. */
45 #if @GNULIB_MEMMEM@
46 # if ! @HAVE_DECL_MEMMEM@
47 extern void *memmem (void const *__haystack, size_t __haystack_len,
48 void const *__needle, size_t __needle_len);
49 # endif
50 #elif defined GNULIB_POSIXCHECK
51 # undef memmem
52 # define memmem(a,al,b,bl) \
53 (GL_LINK_WARNING ("memmem is unportable - " \
54 "use gnulib module memmem for portability"), \
55 memmem (a, al, b, bl))
56 #endif
58 /* Copy N bytes of SRC to DEST, return pointer to bytes after the
59 last written byte. */
60 #if @GNULIB_MEMPCPY@
61 # if ! @HAVE_MEMPCPY@
62 extern void *mempcpy (void *restrict __dest, void const *restrict __src,
63 size_t __n);
64 # endif
65 #elif defined GNULIB_POSIXCHECK
66 # undef mempcpy
67 # define mempcpy(a,b,n) \
68 (GL_LINK_WARNING ("mempcpy is unportable - " \
69 "use gnulib module mempcpy for portability"), \
70 mempcpy (a, b, n))
71 #endif
73 /* Search backwards through a block for a byte (specified as an int). */
74 #if @GNULIB_MEMRCHR@
75 # if ! @HAVE_DECL_MEMRCHR@
76 extern void *memrchr (void const *, int, size_t);
77 # endif
78 #elif defined GNULIB_POSIXCHECK
79 # undef memrchr
80 # define memrchr(a,b,c) \
81 (GL_LINK_WARNING ("memrchr is unportable - " \
82 "use gnulib module memrchr for portability"), \
83 memrchr (a, b, c))
84 #endif
86 /* Copy SRC to DST, returning the address of the terminating '\0' in DST. */
87 #if @GNULIB_STPCPY@
88 # if ! @HAVE_STPCPY@
89 extern char *stpcpy (char *restrict __dst, char const *restrict __src);
90 # endif
91 #elif defined GNULIB_POSIXCHECK
92 # undef stpcpy
93 # define stpcpy(a,b) \
94 (GL_LINK_WARNING ("stpcpy is unportable - " \
95 "use gnulib module stpcpy for portability"), \
96 stpcpy (a, b))
97 #endif
99 /* Copy no more than N bytes of SRC to DST, returning a pointer past the
100 last non-NUL byte written into DST. */
101 #if @GNULIB_STPNCPY@
102 # if ! @HAVE_STPNCPY@
103 # define stpncpy gnu_stpncpy
104 extern char *stpncpy (char *restrict __dst, char const *restrict __src,
105 size_t __n);
106 # endif
107 #elif defined GNULIB_POSIXCHECK
108 # undef stpncpy
109 # define stpncpy(a,b,n) \
110 (GL_LINK_WARNING ("stpncpy is unportable - " \
111 "use gnulib module stpncpy for portability"), \
112 stpncpy (a, b, n))
113 #endif
115 /* Compare strings S1 and S2, ignoring case, returning less than, equal to or
116 greater than zero if S1 is lexicographically less than, equal to or greater
117 than S2.
118 Note: This function does not work in multibyte locales. */
119 #if ! @HAVE_STRCASECMP@
120 extern int strcasecmp (char const *s1, char const *s2);
121 #endif
122 #if defined GNULIB_POSIXCHECK
123 /* strcasecmp() does not work with multibyte strings:
124 POSIX says that it operates on "strings", and "string" in POSIX is defined
125 as a sequence of bytes, not of characters. */
126 # undef strcasecmp
127 # define strcasecmp(a,b) \
128 (GL_LINK_WARNING ("strcasecmp cannot work correctly on character strings " \
129 "in multibyte locales - " \
130 "use mbscasecmp if you care about " \
131 "internationalization, or use c_strcasecmp (from " \
132 "gnulib module c-strcase) if you want a locale " \
133 "independent function"), \
134 strcasecmp (a, b))
135 #endif
137 /* Compare no more than N bytes of strings S1 and S2, ignoring case,
138 returning less than, equal to or greater than zero if S1 is
139 lexicographically less than, equal to or greater than S2.
140 Note: This function cannot work correctly in multibyte locales. */
141 #if ! @HAVE_DECL_STRNCASECMP@
142 extern int strncasecmp (char const *s1, char const *s2, size_t n);
143 #endif
144 #if defined GNULIB_POSIXCHECK
145 /* strncasecmp() does not work with multibyte strings:
146 POSIX says that it operates on "strings", and "string" in POSIX is defined
147 as a sequence of bytes, not of characters. */
148 # undef strncasecmp
149 # define strncasecmp(a,b,n) \
150 (GL_LINK_WARNING ("strncasecmp cannot work correctly on character " \
151 "strings in multibyte locales - " \
152 "use mbsncasecmp or mbspcasecmp if you care about " \
153 "internationalization, or use c_strncasecmp (from " \
154 "gnulib module c-strcase) if you want a locale " \
155 "independent function"), \
156 strncasecmp (a, b, n))
157 #endif
159 #if defined GNULIB_POSIXCHECK
160 /* strchr() does not work with multibyte strings if the locale encoding is
161 GB18030 and the character to be searched is a digit. */
162 # undef strchr
163 # define strchr(s,c) \
164 (GL_LINK_WARNING ("strchr cannot work correctly on character strings " \
165 "in some multibyte locales - " \
166 "use mbschr if you care about internationalization"), \
167 strchr (s, c))
168 #endif
170 /* Find the first occurrence of C in S or the final NUL byte. */
171 #if @GNULIB_STRCHRNUL@
172 # if ! @HAVE_STRCHRNUL@
173 extern char *strchrnul (char const *__s, int __c_in);
174 # endif
175 #elif defined GNULIB_POSIXCHECK
176 # undef strchrnul
177 # define strchrnul(a,b) \
178 (GL_LINK_WARNING ("strchrnul is unportable - " \
179 "use gnulib module strchrnul for portability"), \
180 strchrnul (a, b))
181 #endif
183 /* Duplicate S, returning an identical malloc'd string. */
184 #if @GNULIB_STRDUP@
185 # if ! @HAVE_DECL_STRDUP@ && ! defined strdup
186 extern char *strdup (char const *__s);
187 # endif
188 #elif defined GNULIB_POSIXCHECK
189 # undef strdup
190 # define strdup(a) \
191 (GL_LINK_WARNING ("strdup is unportable - " \
192 "use gnulib module strdup for portability"), \
193 strdup (a))
194 #endif
196 /* Return a newly allocated copy of at most N bytes of STRING. */
197 #if @GNULIB_STRNDUP@
198 # if ! @HAVE_STRNDUP@
199 # undef strndup
200 # define strndup rpl_strndup
201 # endif
202 # if ! @HAVE_STRNDUP@ || ! @HAVE_DECL_STRNDUP@
203 extern char *strndup (char const *__string, size_t __n);
204 # endif
205 #elif defined GNULIB_POSIXCHECK
206 # undef strndup
207 # define strndup(a,n) \
208 (GL_LINK_WARNING ("strndup is unportable - " \
209 "use gnulib module strndup for portability"), \
210 strndup (a, n))
211 #endif
213 /* Find the length (number of bytes) of STRING, but scan at most
214 MAXLEN bytes. If no '\0' terminator is found in that many bytes,
215 return MAXLEN. */
216 #if @GNULIB_STRNLEN@
217 # if ! @HAVE_DECL_STRNLEN@
218 extern size_t strnlen (char const *__string, size_t __maxlen);
219 # endif
220 #elif defined GNULIB_POSIXCHECK
221 # undef strnlen
222 # define strnlen(a,n) \
223 (GL_LINK_WARNING ("strnlen is unportable - " \
224 "use gnulib module strnlen for portability"), \
225 strnlen (a, n))
226 #endif
228 #if defined GNULIB_POSIXCHECK
229 /* strcspn() assumes the second argument is a list of single-byte characters.
230 Even in this simple case, it does not work with multibyte strings if the
231 locale encoding is GB18030 and one of the characters to be searched is a
232 digit. */
233 # undef strcspn
234 # define strcspn(s,a) \
235 (GL_LINK_WARNING ("strcspn cannot work correctly on character strings " \
236 "in multibyte locales - " \
237 "use mbscspn if you care about internationalization"), \
238 strcspn (s, a))
239 #endif
241 /* Find the first occurrence in S of any character in ACCEPT. */
242 #if @GNULIB_STRPBRK@
243 # if ! @HAVE_STRPBRK@
244 extern char *strpbrk (char const *__s, char const *__accept);
245 # endif
246 # if defined GNULIB_POSIXCHECK
247 /* strpbrk() assumes the second argument is a list of single-byte characters.
248 Even in this simple case, it does not work with multibyte strings if the
249 locale encoding is GB18030 and one of the characters to be searched is a
250 digit. */
251 # undef strpbrk
252 # define strpbrk(s,a) \
253 (GL_LINK_WARNING ("strpbrk cannot work correctly on character strings " \
254 "in multibyte locales - " \
255 "use mbspbrk if you care about internationalization"), \
256 strpbrk (s, a))
257 # endif
258 #elif defined GNULIB_POSIXCHECK
259 # undef strpbrk
260 # define strpbrk(s,a) \
261 (GL_LINK_WARNING ("strpbrk is unportable - " \
262 "use gnulib module strpbrk for portability"), \
263 strpbrk (s, a))
264 #endif
266 #if defined GNULIB_POSIXCHECK
267 /* strspn() assumes the second argument is a list of single-byte characters.
268 Even in this simple case, it cannot work with multibyte strings. */
269 # undef strspn
270 # define strspn(s,a) \
271 (GL_LINK_WARNING ("strspn cannot work correctly on character strings " \
272 "in multibyte locales - " \
273 "use mbsspn if you care about internationalization"), \
274 strspn (s, a))
275 #endif
277 #if defined GNULIB_POSIXCHECK
278 /* strrchr() does not work with multibyte strings if the locale encoding is
279 GB18030 and the character to be searched is a digit. */
280 # undef strrchr
281 # define strrchr(s,c) \
282 (GL_LINK_WARNING ("strrchr cannot work correctly on character strings " \
283 "in some multibyte locales - " \
284 "use mbsrchr if you care about internationalization"), \
285 strrchr (s, c))
286 #endif
288 /* Search the next delimiter (char listed in DELIM) starting at *STRINGP.
289 If one is found, overwrite it with a NUL, and advance *STRINGP
290 to point to the next char after it. Otherwise, set *STRINGP to NULL.
291 If *STRINGP was already NULL, nothing happens.
292 Return the old value of *STRINGP.
294 This is a variant of strtok() that is multithread-safe and supports
295 empty fields.
297 Caveat: It modifies the original string.
298 Caveat: These functions cannot be used on constant strings.
299 Caveat: The identity of the delimiting character is lost.
300 Caveat: It doesn't work with multibyte strings unless all of the delimiter
301 characters are ASCII characters < 0x30.
303 See also strtok_r(). */
304 #if @GNULIB_STRSEP@
305 # if ! @HAVE_STRSEP@
306 extern char *strsep (char **restrict __stringp, char const *restrict __delim);
307 # endif
308 # if defined GNULIB_POSIXCHECK
309 # undef strsep
310 # define strsep(s,d) \
311 (GL_LINK_WARNING ("strsep cannot work correctly on character strings " \
312 "in multibyte locales - " \
313 "use mbssep if you care about internationalization"), \
314 strsep (s, d))
315 # endif
316 #elif defined GNULIB_POSIXCHECK
317 # undef strsep
318 # define strsep(s,d) \
319 (GL_LINK_WARNING ("strsep is unportable - " \
320 "use gnulib module strsep for portability"), \
321 strsep (s, d))
322 #endif
324 #if defined GNULIB_POSIXCHECK
325 /* strstr() does not work with multibyte strings if the locale encoding is
326 different from UTF-8:
327 POSIX says that it operates on "strings", and "string" in POSIX is defined
328 as a sequence of bytes, not of characters. */
329 # undef strstr
330 # define strstr(a,b) \
331 (GL_LINK_WARNING ("strstr cannot work correctly on character strings " \
332 "in most multibyte locales - " \
333 "use mbsstr if you care about internationalization"), \
334 strstr (a, b))
335 #endif
337 /* Find the first occurrence of NEEDLE in HAYSTACK, using case-insensitive
338 comparison. */
339 #if ! @HAVE_STRCASESTR@
340 extern char *strcasestr (const char *haystack, const char *needle);
341 #endif
342 #if defined GNULIB_POSIXCHECK
343 /* strcasestr() does not work with multibyte strings:
344 It is a glibc extension, and glibc implements it only for unibyte
345 locales. */
346 # undef strcasestr
347 # define strcasestr(a,b) \
348 (GL_LINK_WARNING ("strcasestr does work correctly on character strings " \
349 "in multibyte locales - " \
350 "use mbscasestr if you care about " \
351 "internationalization, or use c-strcasestr if you want " \
352 "a locale independent function"), \
353 strcasestr (a, b))
354 #endif
356 /* Parse S into tokens separated by characters in DELIM.
357 If S is NULL, the saved pointer in SAVE_PTR is used as
358 the next starting point. For example:
359 char s[] = "-abc-=-def";
360 char *sp;
361 x = strtok_r(s, "-", &sp); // x = "abc", sp = "=-def"
362 x = strtok_r(NULL, "-=", &sp); // x = "def", sp = NULL
363 x = strtok_r(NULL, "=", &sp); // x = NULL
364 // s = "abc\0-def\0"
366 This is a variant of strtok() that is multithread-safe.
368 For the POSIX documentation for this function, see:
369 http://www.opengroup.org/susv3xsh/strtok.html
371 Caveat: It modifies the original string.
372 Caveat: These functions cannot be used on constant strings.
373 Caveat: The identity of the delimiting character is lost.
374 Caveat: It doesn't work with multibyte strings unless all of the delimiter
375 characters are ASCII characters < 0x30.
377 See also strsep(). */
378 #if @GNULIB_STRTOK_R@
379 # if ! @HAVE_DECL_STRTOK_R@
380 extern char *strtok_r (char *restrict s, char const *restrict delim,
381 char **restrict save_ptr);
382 # endif
383 # if defined GNULIB_POSIXCHECK
384 # undef strtok_r
385 # define strtok_r(s,d,p) \
386 (GL_LINK_WARNING ("strtok_r cannot work correctly on character strings " \
387 "in multibyte locales - " \
388 "use mbstok_r if you care about internationalization"), \
389 strtok_r (s, d, p))
390 # endif
391 #elif defined GNULIB_POSIXCHECK
392 # undef strtok_r
393 # define strtok_r(s,d,p) \
394 (GL_LINK_WARNING ("strtok_r is unportable - " \
395 "use gnulib module strtok_r for portability"), \
396 strtok_r (s, d, p))
397 #endif
400 /* The following functions are not specified by POSIX. They are gnulib
401 extensions. */
403 #if @GNULIB_MBSLEN@
404 /* Return the number of multibyte characters in the character string STRING.
405 This considers multibyte characters, unlike strlen, which counts bytes. */
406 extern size_t mbslen (const char *string);
407 #endif
409 #if @GNULIB_MBSCHR@
410 /* Locate the first single-byte character C in the character string STRING,
411 and return a pointer to it. Return NULL if C is not found in STRING.
412 Unlike strchr(), this function works correctly in multibyte locales with
413 encodings such as GB18030. */
414 # define mbschr rpl_mbschr /* avoid collision with HP-UX function */
415 extern char * mbschr (const char *string, int c);
416 #endif
418 #if @GNULIB_MBSRCHR@
419 /* Locate the last single-byte character C in the character string STRING,
420 and return a pointer to it. Return NULL if C is not found in STRING.
421 Unlike strrchr(), this function works correctly in multibyte locales with
422 encodings such as GB18030. */
423 # define mbsrchr rpl_mbsrchr /* avoid collision with HP-UX function */
424 extern char * mbsrchr (const char *string, int c);
425 #endif
427 #if @GNULIB_MBSSTR@
428 /* Find the first occurrence of the character string NEEDLE in the character
429 string HAYSTACK. Return NULL if NEEDLE is not found in HAYSTACK.
430 Unlike strstr(), this function works correctly in multibyte locales with
431 encodings different from UTF-8. */
432 extern char * mbsstr (const char *haystack, const char *needle);
433 #endif
435 #if @GNULIB_MBSCASECMP@
436 /* Compare the character strings S1 and S2, ignoring case, returning less than,
437 equal to or greater than zero if S1 is lexicographically less than, equal to
438 or greater than S2.
439 Note: This function may, in multibyte locales, return 0 for strings of
440 different lengths!
441 Unlike strcasecmp(), this function works correctly in multibyte locales. */
442 extern int mbscasecmp (const char *s1, const char *s2);
443 #endif
445 #if @GNULIB_MBSNCASECMP@
446 /* Compare the initial segment of the character string S1 consisting of at most
447 N characters with the initial segment of the character string S2 consisting
448 of at most N characters, ignoring case, returning less than, equal to or
449 greater than zero if the initial segment of S1 is lexicographically less
450 than, equal to or greater than the initial segment of S2.
451 Note: This function may, in multibyte locales, return 0 for initial segments
452 of different lengths!
453 Unlike strncasecmp(), this function works correctly in multibyte locales.
454 But beware that N is not a byte count but a character count! */
455 extern int mbsncasecmp (const char *s1, const char *s2, size_t n);
456 #endif
458 #if @GNULIB_MBSPCASECMP@
459 /* Compare the initial segment of the character string STRING consisting of
460 at most mbslen (PREFIX) characters with the character string PREFIX,
461 ignoring case, returning less than, equal to or greater than zero if this
462 initial segment is lexicographically less than, equal to or greater than
463 PREFIX.
464 Note: This function may, in multibyte locales, return 0 if STRING is of
465 smaller length than PREFIX!
466 Unlike strncasecmp(), this function works correctly in multibyte
467 locales. */
468 extern char * mbspcasecmp (const char *string, const char *prefix);
469 #endif
471 #if @GNULIB_MBSCASESTR@
472 /* Find the first occurrence of the character string NEEDLE in the character
473 string HAYSTACK, using case-insensitive comparison.
474 Note: This function may, in multibyte locales, return success even if
475 strlen (haystack) < strlen (needle) !
476 Unlike strcasestr(), this function works correctly in multibyte locales. */
477 extern char * mbscasestr (const char *haystack, const char *needle);
478 #endif
480 #if @GNULIB_MBSCSPN@
481 /* Find the first occurrence in the character string STRING of any character
482 in the character string ACCEPT. Return the number of bytes from the
483 beginning of the string to this occurrence, or to the end of the string
484 if none exists.
485 Unlike strcspn(), this function works correctly in multibyte locales. */
486 extern size_t mbscspn (const char *string, const char *accept);
487 #endif
489 #if @GNULIB_MBSPBRK@
490 /* Find the first occurrence in the character string STRING of any character
491 in the character string ACCEPT. Return the pointer to it, or NULL if none
492 exists.
493 Unlike strpbrk(), this function works correctly in multibyte locales. */
494 # define mbspbrk rpl_mbspbrk /* avoid collision with HP-UX function */
495 extern char * mbspbrk (const char *string, const char *accept);
496 #endif
498 #if @GNULIB_MBSSPN@
499 /* Find the first occurrence in the character string STRING of any character
500 not in the character string REJECT. Return the number of bytes from the
501 beginning of the string to this occurrence, or to the end of the string
502 if none exists.
503 Unlike strspn(), this function works correctly in multibyte locales. */
504 extern size_t mbsspn (const char *string, const char *reject);
505 #endif
507 #if @GNULIB_MBSSEP@
508 /* Search the next delimiter (multibyte character listed in the character
509 string DELIM) starting at the character string *STRINGP.
510 If one is found, overwrite it with a NUL, and advance *STRINGP to point
511 to the next multibyte character after it. Otherwise, set *STRINGP to NULL.
512 If *STRINGP was already NULL, nothing happens.
513 Return the old value of *STRINGP.
515 This is a variant of mbstok_r() that supports empty fields.
517 Caveat: It modifies the original string.
518 Caveat: These functions cannot be used on constant strings.
519 Caveat: The identity of the delimiting character is lost.
521 See also mbstok_r(). */
522 extern char * mbssep (char **stringp, const char *delim);
523 #endif
525 #if @GNULIB_MBSTOK_R@
526 /* Parse the character string STRING into tokens separated by characters in
527 the character string DELIM.
528 If STRING is NULL, the saved pointer in SAVE_PTR is used as
529 the next starting point. For example:
530 char s[] = "-abc-=-def";
531 char *sp;
532 x = mbstok_r(s, "-", &sp); // x = "abc", sp = "=-def"
533 x = mbstok_r(NULL, "-=", &sp); // x = "def", sp = NULL
534 x = mbstok_r(NULL, "=", &sp); // x = NULL
535 // s = "abc\0-def\0"
537 Caveat: It modifies the original string.
538 Caveat: These functions cannot be used on constant strings.
539 Caveat: The identity of the delimiting character is lost.
541 See also mbssep(). */
542 extern char * mbstok_r (char *string, const char *delim, char **save_ptr);
543 #endif
546 #ifdef __cplusplus
548 #endif
550 #endif