exp2l: Work around a NetBSD 10.0/i386 bug.
[gnulib.git] / lib / unistr.in.h
blob424678fc6fc74abd4c2edc9eb991eabdabe1b474
1 /* Elementary Unicode string functions.
2 Copyright (C) 2001-2002, 2005-2024 Free Software Foundation, Inc.
4 This file is free software: you can redistribute it and/or modify
5 it under the terms of the GNU Lesser General Public License as
6 published by the Free Software Foundation; either version 2.1 of the
7 License, or (at your option) any later version.
9 This file is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU Lesser General Public License for more details.
14 You should have received a copy of the GNU Lesser General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 #ifndef _UNISTR_H
18 #define _UNISTR_H
20 #include "unitypes.h"
22 /* Get bool. */
23 #include <stdbool.h>
25 /* Get size_t, ptrdiff_t. */
26 #include <stddef.h>
28 /* Get free(). */
29 #include <stdlib.h>
31 #ifdef __cplusplus
32 extern "C" {
33 #endif
36 /* Conventions:
38 All functions prefixed with u8_ operate on UTF-8 encoded strings.
39 Their unit is an uint8_t (1 byte).
41 All functions prefixed with u16_ operate on UTF-16 encoded strings.
42 Their unit is an uint16_t (a 2-byte word).
44 All functions prefixed with u32_ operate on UCS-4 encoded strings.
45 Their unit is an uint32_t (a 4-byte word).
47 All argument pairs (s, n) denote a Unicode string s[0..n-1] with exactly
48 n units.
50 All arguments starting with "str" and the arguments of functions starting
51 with u8_str/u16_str/u32_str denote a NUL terminated string, i.e. a string
52 which terminates at the first NUL unit. This termination unit is
53 considered part of the string for all memory allocation purposes, but
54 is not considered part of the string for all other logical purposes.
56 Functions returning a string result take a (resultbuf, lengthp) argument
57 pair. If resultbuf is not NULL and the result fits into *lengthp units,
58 it is put in resultbuf, and resultbuf is returned. Otherwise, a freshly
59 allocated string is returned. In both cases, *lengthp is set to the
60 length (number of units) of the returned string. In case of error,
61 NULL is returned and errno is set. */
64 /* Elementary string checks. */
66 /* Check whether an UTF-8 string is well-formed.
67 Return NULL if valid, or a pointer to the first invalid unit otherwise. */
68 extern const uint8_t *
69 u8_check (const uint8_t *s, size_t n)
70 _UC_ATTRIBUTE_PURE;
72 /* Check whether an UTF-16 string is well-formed.
73 Return NULL if valid, or a pointer to the first invalid unit otherwise. */
74 extern const uint16_t *
75 u16_check (const uint16_t *s, size_t n)
76 _UC_ATTRIBUTE_PURE;
78 /* Check whether an UCS-4 string is well-formed.
79 Return NULL if valid, or a pointer to the first invalid unit otherwise. */
80 extern const uint32_t *
81 u32_check (const uint32_t *s, size_t n)
82 _UC_ATTRIBUTE_PURE;
85 /* Elementary string conversions. */
87 /* Convert an UTF-8 string to an UTF-16 string. */
88 extern uint16_t *
89 u8_to_u16 (const uint8_t *s, size_t n, uint16_t *resultbuf,
90 size_t *lengthp);
92 /* Convert an UTF-8 string to an UCS-4 string. */
93 extern uint32_t *
94 u8_to_u32 (const uint8_t *s, size_t n, uint32_t *resultbuf,
95 size_t *lengthp);
97 /* Convert an UTF-16 string to an UTF-8 string. */
98 extern uint8_t *
99 u16_to_u8 (const uint16_t *s, size_t n, uint8_t *resultbuf,
100 size_t *lengthp);
102 /* Convert an UTF-16 string to an UCS-4 string. */
103 extern uint32_t *
104 u16_to_u32 (const uint16_t *s, size_t n, uint32_t *resultbuf,
105 size_t *lengthp);
107 /* Convert an UCS-4 string to an UTF-8 string. */
108 extern uint8_t *
109 u32_to_u8 (const uint32_t *s, size_t n, uint8_t *resultbuf,
110 size_t *lengthp);
112 /* Convert an UCS-4 string to an UTF-16 string. */
113 extern uint16_t *
114 u32_to_u16 (const uint32_t *s, size_t n, uint16_t *resultbuf,
115 size_t *lengthp);
118 /* Elementary string functions. */
120 /* Return the length (number of units) of the first character in S, which is
121 no longer than N. Return 0 if it is the NUL character. Return -1 upon
122 failure. */
123 /* Similar to mblen(), except that s must not be NULL. */
124 extern int
125 u8_mblen (const uint8_t *s, size_t n)
126 _UC_ATTRIBUTE_PURE;
127 extern int
128 u16_mblen (const uint16_t *s, size_t n)
129 _UC_ATTRIBUTE_PURE;
130 extern int
131 u32_mblen (const uint32_t *s, size_t n)
132 _UC_ATTRIBUTE_PURE;
134 /* Return the length (number of units) of the first character in S, putting
135 its 'ucs4_t' representation in *PUC. Upon failure, *PUC is set to 0xfffd,
136 and an appropriate number of units is returned.
137 The number of available units, N, must be > 0. */
138 /* Similar to mbtowc(), except that puc and s must not be NULL, n must be > 0,
139 and the NUL character is not treated specially. */
140 /* The variants with _unsafe suffix are for backward compatibility with
141 libunistring versions < 0.9.7. */
143 #if GNULIB_UNISTR_U8_MBTOUC_UNSAFE || HAVE_LIBUNISTRING
144 # if !HAVE_INLINE
145 extern int
146 u8_mbtouc_unsafe (ucs4_t *puc, const uint8_t *s, size_t n);
147 # else
148 extern int
149 u8_mbtouc_unsafe_aux (ucs4_t *puc, const uint8_t *s, size_t n);
150 static inline int
151 u8_mbtouc_unsafe (ucs4_t *puc, const uint8_t *s, size_t n)
153 uint8_t c = *s;
155 if (c < 0x80)
157 *puc = c;
158 return 1;
160 else
161 return u8_mbtouc_unsafe_aux (puc, s, n);
163 # endif
164 #endif
166 #if GNULIB_UNISTR_U16_MBTOUC_UNSAFE || HAVE_LIBUNISTRING
167 # if !HAVE_INLINE
168 extern int
169 u16_mbtouc_unsafe (ucs4_t *puc, const uint16_t *s, size_t n);
170 # else
171 extern int
172 u16_mbtouc_unsafe_aux (ucs4_t *puc, const uint16_t *s, size_t n);
173 static inline int
174 u16_mbtouc_unsafe (ucs4_t *puc, const uint16_t *s, size_t n)
176 uint16_t c = *s;
178 if (c < 0xd800 || c >= 0xe000)
180 *puc = c;
181 return 1;
183 else
184 return u16_mbtouc_unsafe_aux (puc, s, n);
186 # endif
187 #endif
189 #if GNULIB_UNISTR_U32_MBTOUC_UNSAFE || HAVE_LIBUNISTRING
190 # if !HAVE_INLINE
191 extern int
192 u32_mbtouc_unsafe (ucs4_t *puc, const uint32_t *s, size_t n);
193 # else
194 static inline int
195 u32_mbtouc_unsafe (ucs4_t *puc,
196 const uint32_t *s, _GL_ATTRIBUTE_MAYBE_UNUSED size_t n)
198 uint32_t c = *s;
200 if (c < 0xd800 || (c >= 0xe000 && c < 0x110000))
201 *puc = c;
202 else
203 /* invalid multibyte character */
204 *puc = 0xfffd;
205 return 1;
207 # endif
208 #endif
210 #if GNULIB_UNISTR_U8_MBTOUC || HAVE_LIBUNISTRING
211 # if !HAVE_INLINE
212 extern int
213 u8_mbtouc (ucs4_t *puc, const uint8_t *s, size_t n);
214 # else
215 extern int
216 u8_mbtouc_aux (ucs4_t *puc, const uint8_t *s, size_t n);
217 static inline int
218 u8_mbtouc (ucs4_t *puc, const uint8_t *s, size_t n)
220 uint8_t c = *s;
222 if (c < 0x80)
224 *puc = c;
225 return 1;
227 else
228 return u8_mbtouc_aux (puc, s, n);
230 # endif
231 #endif
233 #if GNULIB_UNISTR_U16_MBTOUC || HAVE_LIBUNISTRING
234 # if !HAVE_INLINE
235 extern int
236 u16_mbtouc (ucs4_t *puc, const uint16_t *s, size_t n);
237 # else
238 extern int
239 u16_mbtouc_aux (ucs4_t *puc, const uint16_t *s, size_t n);
240 static inline int
241 u16_mbtouc (ucs4_t *puc, const uint16_t *s, size_t n)
243 uint16_t c = *s;
245 if (c < 0xd800 || c >= 0xe000)
247 *puc = c;
248 return 1;
250 else
251 return u16_mbtouc_aux (puc, s, n);
253 # endif
254 #endif
256 #if GNULIB_UNISTR_U32_MBTOUC || HAVE_LIBUNISTRING
257 # if !HAVE_INLINE
258 extern int
259 u32_mbtouc (ucs4_t *puc, const uint32_t *s, size_t n);
260 # else
261 static inline int
262 u32_mbtouc (ucs4_t *puc, const uint32_t *s,
263 _GL_ATTRIBUTE_MAYBE_UNUSED size_t n)
265 uint32_t c = *s;
267 if (c < 0xd800 || (c >= 0xe000 && c < 0x110000))
268 *puc = c;
269 else
270 /* invalid multibyte character */
271 *puc = 0xfffd;
272 return 1;
274 # endif
275 #endif
277 /* Return the length (number of units) of the first character in S, putting
278 its 'ucs4_t' representation in *PUC. Upon failure, *PUC is set to 0xfffd,
279 and -1 is returned for an invalid sequence of units, -2 is returned for an
280 incomplete sequence of units.
281 The number of available units, N, must be > 0. */
282 /* Similar to u*_mbtouc(), except that the return value gives more details
283 about the failure, similar to mbrtowc(). */
285 #if GNULIB_UNISTR_U8_MBTOUCR || HAVE_LIBUNISTRING
286 extern int
287 u8_mbtoucr (ucs4_t *puc, const uint8_t *s, size_t n);
288 #endif
290 #if GNULIB_UNISTR_U16_MBTOUCR || HAVE_LIBUNISTRING
291 extern int
292 u16_mbtoucr (ucs4_t *puc, const uint16_t *s, size_t n);
293 #endif
295 #if GNULIB_UNISTR_U32_MBTOUCR || HAVE_LIBUNISTRING
296 extern int
297 u32_mbtoucr (ucs4_t *puc, const uint32_t *s, size_t n);
298 #endif
300 /* Put the multibyte character represented by UC in S, returning its
301 length. Return -1 upon failure, -2 if the number of available units, N,
302 is too small. The latter case cannot occur if N >= 6/2/1, respectively. */
303 /* Similar to wctomb(), except that s must not be NULL, and the argument n
304 must be specified. */
306 #if GNULIB_UNISTR_U8_UCTOMB || HAVE_LIBUNISTRING
307 /* Auxiliary function, also used by u8_chr, u8_strchr, u8_strrchr. */
308 extern int
309 u8_uctomb_aux (uint8_t *s, ucs4_t uc, ptrdiff_t n);
310 # if !HAVE_INLINE
311 extern int
312 u8_uctomb (uint8_t *s, ucs4_t uc, ptrdiff_t n);
313 # else
314 static inline int
315 u8_uctomb (uint8_t *s, ucs4_t uc, ptrdiff_t n)
317 if (uc < 0x80 && n > 0)
319 s[0] = uc;
320 return 1;
322 else
323 return u8_uctomb_aux (s, uc, n);
325 # endif
326 #endif
328 #if GNULIB_UNISTR_U16_UCTOMB || HAVE_LIBUNISTRING
329 /* Auxiliary function, also used by u16_chr, u16_strchr, u16_strrchr. */
330 extern int
331 u16_uctomb_aux (uint16_t *s, ucs4_t uc, ptrdiff_t n);
332 # if !HAVE_INLINE
333 extern int
334 u16_uctomb (uint16_t *s, ucs4_t uc, ptrdiff_t n);
335 # else
336 static inline int
337 u16_uctomb (uint16_t *s, ucs4_t uc, ptrdiff_t n)
339 if (uc < 0xd800 && n > 0)
341 s[0] = uc;
342 return 1;
344 else
345 return u16_uctomb_aux (s, uc, n);
347 # endif
348 #endif
350 #if GNULIB_UNISTR_U32_UCTOMB || HAVE_LIBUNISTRING
351 # if !HAVE_INLINE
352 extern int
353 u32_uctomb (uint32_t *s, ucs4_t uc, ptrdiff_t n);
354 # else
355 static inline int
356 u32_uctomb (uint32_t *s, ucs4_t uc, ptrdiff_t n)
358 if (uc < 0xd800 || (uc >= 0xe000 && uc < 0x110000))
360 if (n > 0)
362 *s = uc;
363 return 1;
365 else
366 return -2;
368 else
369 return -1;
371 # endif
372 #endif
374 /* Copy N units from SRC to DEST. */
375 /* Similar to memcpy(). */
376 extern uint8_t *
377 u8_cpy (uint8_t *_UC_RESTRICT dest, const uint8_t *src, size_t n);
378 extern uint16_t *
379 u16_cpy (uint16_t *_UC_RESTRICT dest, const uint16_t *src, size_t n);
380 extern uint32_t *
381 u32_cpy (uint32_t *_UC_RESTRICT dest, const uint32_t *src, size_t n);
383 /* Copy N units from SRC to DEST, returning pointer after last written unit. */
384 /* Similar to mempcpy(). */
385 extern uint8_t *
386 u8_pcpy (uint8_t *_UC_RESTRICT dest, const uint8_t *src, size_t n);
387 extern uint16_t *
388 u16_pcpy (uint16_t *_UC_RESTRICT dest, const uint16_t *src, size_t n);
389 extern uint32_t *
390 u32_pcpy (uint32_t *_UC_RESTRICT dest, const uint32_t *src, size_t n);
392 /* Copy N units from SRC to DEST, guaranteeing correct behavior for
393 overlapping memory areas. */
394 /* Similar to memmove(). */
395 extern uint8_t *
396 u8_move (uint8_t *dest, const uint8_t *src, size_t n);
397 extern uint16_t *
398 u16_move (uint16_t *dest, const uint16_t *src, size_t n);
399 extern uint32_t *
400 u32_move (uint32_t *dest, const uint32_t *src, size_t n);
402 /* Set the first N characters of S to UC. UC should be a character that
403 occupies only 1 unit. */
404 /* Similar to memset(). */
405 extern uint8_t *
406 u8_set (uint8_t *s, ucs4_t uc, size_t n);
407 extern uint16_t *
408 u16_set (uint16_t *s, ucs4_t uc, size_t n);
409 extern uint32_t *
410 u32_set (uint32_t *s, ucs4_t uc, size_t n);
412 /* Compare S1 and S2, each of length N. */
413 /* Similar to memcmp(). */
414 extern int
415 u8_cmp (const uint8_t *s1, const uint8_t *s2, size_t n)
416 _UC_ATTRIBUTE_PURE;
417 extern int
418 u16_cmp (const uint16_t *s1, const uint16_t *s2, size_t n)
419 _UC_ATTRIBUTE_PURE;
420 extern int
421 u32_cmp (const uint32_t *s1, const uint32_t *s2, size_t n)
422 _UC_ATTRIBUTE_PURE;
424 /* Compare S1 and S2. */
425 /* Similar to the gnulib function memcmp2(). */
426 extern int
427 u8_cmp2 (const uint8_t *s1, size_t n1, const uint8_t *s2, size_t n2)
428 _UC_ATTRIBUTE_PURE;
429 extern int
430 u16_cmp2 (const uint16_t *s1, size_t n1, const uint16_t *s2, size_t n2)
431 _UC_ATTRIBUTE_PURE;
432 extern int
433 u32_cmp2 (const uint32_t *s1, size_t n1, const uint32_t *s2, size_t n2)
434 _UC_ATTRIBUTE_PURE;
436 /* Search the string at S for UC. */
437 /* Similar to memchr(). */
438 extern uint8_t *
439 u8_chr (const uint8_t *s, size_t n, ucs4_t uc)
440 _UC_ATTRIBUTE_PURE;
441 extern uint16_t *
442 u16_chr (const uint16_t *s, size_t n, ucs4_t uc)
443 _UC_ATTRIBUTE_PURE;
444 extern uint32_t *
445 u32_chr (const uint32_t *s, size_t n, ucs4_t uc)
446 _UC_ATTRIBUTE_PURE;
448 /* Count the number of Unicode characters in the N units from S. */
449 /* Similar to mbsnlen(). */
450 extern size_t
451 u8_mbsnlen (const uint8_t *s, size_t n)
452 _UC_ATTRIBUTE_PURE;
453 extern size_t
454 u16_mbsnlen (const uint16_t *s, size_t n)
455 _UC_ATTRIBUTE_PURE;
456 extern size_t
457 u32_mbsnlen (const uint32_t *s, size_t n)
458 _UC_ATTRIBUTE_PURE;
460 /* Elementary string functions with memory allocation. */
462 /* Make a freshly allocated copy of S, of length N. */
463 extern uint8_t *
464 u8_cpy_alloc (const uint8_t *s, size_t n);
465 extern uint16_t *
466 u16_cpy_alloc (const uint16_t *s, size_t n);
467 extern uint32_t *
468 u32_cpy_alloc (const uint32_t *s, size_t n);
470 /* Elementary string functions on NUL terminated strings. */
472 /* Return the length (number of units) of the first character in S.
473 Return 0 if it is the NUL character. Return -1 upon failure. */
474 extern int
475 u8_strmblen (const uint8_t *s)
476 _UC_ATTRIBUTE_PURE;
477 extern int
478 u16_strmblen (const uint16_t *s)
479 _UC_ATTRIBUTE_PURE;
480 extern int
481 u32_strmblen (const uint32_t *s)
482 _UC_ATTRIBUTE_PURE;
484 /* Return the length (number of units) of the first character in S, putting
485 its 'ucs4_t' representation in *PUC. Return 0 if it is the NUL
486 character. Return -1 upon failure. */
487 extern int
488 u8_strmbtouc (ucs4_t *puc, const uint8_t *s);
489 extern int
490 u16_strmbtouc (ucs4_t *puc, const uint16_t *s);
491 extern int
492 u32_strmbtouc (ucs4_t *puc, const uint32_t *s);
494 /* Forward iteration step. Advances the pointer past the next character,
495 or returns NULL if the end of the string has been reached. Puts the
496 character's 'ucs4_t' representation in *PUC. */
497 extern const uint8_t *
498 u8_next (ucs4_t *puc, const uint8_t *s);
499 extern const uint16_t *
500 u16_next (ucs4_t *puc, const uint16_t *s);
501 extern const uint32_t *
502 u32_next (ucs4_t *puc, const uint32_t *s);
504 /* Backward iteration step. Advances the pointer to point to the previous
505 character, or returns NULL if the beginning of the string had been reached.
506 Puts the character's 'ucs4_t' representation in *PUC. */
507 extern const uint8_t *
508 u8_prev (ucs4_t *puc, const uint8_t *s, const uint8_t *start);
509 extern const uint16_t *
510 u16_prev (ucs4_t *puc, const uint16_t *s, const uint16_t *start);
511 extern const uint32_t *
512 u32_prev (ucs4_t *puc, const uint32_t *s, const uint32_t *start);
514 /* Return the number of units in S. */
515 /* Similar to strlen(), wcslen(). */
516 extern size_t
517 u8_strlen (const uint8_t *s)
518 _UC_ATTRIBUTE_PURE;
519 extern size_t
520 u16_strlen (const uint16_t *s)
521 _UC_ATTRIBUTE_PURE;
522 extern size_t
523 u32_strlen (const uint32_t *s)
524 _UC_ATTRIBUTE_PURE;
526 /* Return the number of units in S, but at most MAXLEN. */
527 /* Similar to strnlen(), wcsnlen(). */
528 extern size_t
529 u8_strnlen (const uint8_t *s, size_t maxlen)
530 _UC_ATTRIBUTE_PURE;
531 extern size_t
532 u16_strnlen (const uint16_t *s, size_t maxlen)
533 _UC_ATTRIBUTE_PURE;
534 extern size_t
535 u32_strnlen (const uint32_t *s, size_t maxlen)
536 _UC_ATTRIBUTE_PURE;
538 /* Copy SRC to DEST. */
539 /* Similar to strcpy(), wcscpy(). */
540 extern uint8_t *
541 u8_strcpy (uint8_t *_UC_RESTRICT dest, const uint8_t *src);
542 extern uint16_t *
543 u16_strcpy (uint16_t *_UC_RESTRICT dest, const uint16_t *src);
544 extern uint32_t *
545 u32_strcpy (uint32_t *_UC_RESTRICT dest, const uint32_t *src);
547 /* Copy SRC to DEST, returning the address of the terminating NUL in DEST. */
548 /* Similar to stpcpy(). */
549 extern uint8_t *
550 u8_stpcpy (uint8_t *_UC_RESTRICT dest, const uint8_t *src);
551 extern uint16_t *
552 u16_stpcpy (uint16_t *_UC_RESTRICT dest, const uint16_t *src);
553 extern uint32_t *
554 u32_stpcpy (uint32_t *_UC_RESTRICT dest, const uint32_t *src);
556 /* Copy no more than N units of SRC to DEST. */
557 /* Similar to strncpy(), wcsncpy(). */
558 extern uint8_t *
559 u8_strncpy (uint8_t *_UC_RESTRICT dest, const uint8_t *src, size_t n);
560 extern uint16_t *
561 u16_strncpy (uint16_t *_UC_RESTRICT dest, const uint16_t *src, size_t n);
562 extern uint32_t *
563 u32_strncpy (uint32_t *_UC_RESTRICT dest, const uint32_t *src, size_t n);
565 /* Copy no more than N units of SRC to DEST. Return a pointer past the last
566 non-NUL unit written into DEST. */
567 /* Similar to stpncpy(). */
568 extern uint8_t *
569 u8_stpncpy (uint8_t *_UC_RESTRICT dest, const uint8_t *src, size_t n);
570 extern uint16_t *
571 u16_stpncpy (uint16_t *_UC_RESTRICT dest, const uint16_t *src, size_t n);
572 extern uint32_t *
573 u32_stpncpy (uint32_t *_UC_RESTRICT dest, const uint32_t *src, size_t n);
575 /* Append SRC onto DEST. */
576 /* Similar to strcat(), wcscat(). */
577 extern uint8_t *
578 u8_strcat (uint8_t *_UC_RESTRICT dest, const uint8_t *src);
579 extern uint16_t *
580 u16_strcat (uint16_t *_UC_RESTRICT dest, const uint16_t *src);
581 extern uint32_t *
582 u32_strcat (uint32_t *_UC_RESTRICT dest, const uint32_t *src);
584 /* Append no more than N units of SRC onto DEST. */
585 /* Similar to strncat(), wcsncat(). */
586 extern uint8_t *
587 u8_strncat (uint8_t *_UC_RESTRICT dest, const uint8_t *src, size_t n);
588 extern uint16_t *
589 u16_strncat (uint16_t *_UC_RESTRICT dest, const uint16_t *src, size_t n);
590 extern uint32_t *
591 u32_strncat (uint32_t *_UC_RESTRICT dest, const uint32_t *src, size_t n);
593 /* Compare S1 and S2. */
594 /* Similar to strcmp(), wcscmp(). */
595 #ifdef __sun
596 /* Avoid a collision with the u8_strcmp() function in Solaris 11 libc. */
597 extern int
598 u8_strcmp_gnu (const uint8_t *s1, const uint8_t *s2)
599 _UC_ATTRIBUTE_PURE;
600 # define u8_strcmp u8_strcmp_gnu
601 #else
602 extern int
603 u8_strcmp (const uint8_t *s1, const uint8_t *s2)
604 _UC_ATTRIBUTE_PURE;
605 #endif
606 extern int
607 u16_strcmp (const uint16_t *s1, const uint16_t *s2)
608 _UC_ATTRIBUTE_PURE;
609 extern int
610 u32_strcmp (const uint32_t *s1, const uint32_t *s2)
611 _UC_ATTRIBUTE_PURE;
613 /* Compare S1 and S2 using the collation rules of the current locale.
614 Return -1 if S1 < S2, 0 if S1 = S2, 1 if S1 > S2.
615 Upon failure, set errno and return any value. */
616 /* Similar to strcoll(), wcscoll(). */
617 extern int
618 u8_strcoll (const uint8_t *s1, const uint8_t *s2);
619 extern int
620 u16_strcoll (const uint16_t *s1, const uint16_t *s2);
621 extern int
622 u32_strcoll (const uint32_t *s1, const uint32_t *s2);
624 /* Compare no more than N units of S1 and S2. */
625 /* Similar to strncmp(), wcsncmp(). */
626 extern int
627 u8_strncmp (const uint8_t *s1, const uint8_t *s2, size_t n)
628 _UC_ATTRIBUTE_PURE;
629 extern int
630 u16_strncmp (const uint16_t *s1, const uint16_t *s2, size_t n)
631 _UC_ATTRIBUTE_PURE;
632 extern int
633 u32_strncmp (const uint32_t *s1, const uint32_t *s2, size_t n)
634 _UC_ATTRIBUTE_PURE;
636 /* Duplicate S, returning an identical malloc'd string. */
637 /* Similar to strdup(), wcsdup(). */
638 extern uint8_t *
639 u8_strdup (const uint8_t *s)
640 _GL_ATTRIBUTE_MALLOC _GL_ATTRIBUTE_DEALLOC_FREE;
641 extern uint16_t *
642 u16_strdup (const uint16_t *s)
643 _GL_ATTRIBUTE_MALLOC _GL_ATTRIBUTE_DEALLOC_FREE;
644 extern uint32_t *
645 u32_strdup (const uint32_t *s)
646 _GL_ATTRIBUTE_MALLOC _GL_ATTRIBUTE_DEALLOC_FREE;
648 /* Find the first occurrence of UC in STR. */
649 /* Similar to strchr(), wcschr(). */
650 extern uint8_t *
651 u8_strchr (const uint8_t *str, ucs4_t uc)
652 _UC_ATTRIBUTE_PURE;
653 extern uint16_t *
654 u16_strchr (const uint16_t *str, ucs4_t uc)
655 _UC_ATTRIBUTE_PURE;
656 extern uint32_t *
657 u32_strchr (const uint32_t *str, ucs4_t uc)
658 _UC_ATTRIBUTE_PURE;
660 /* Find the last occurrence of UC in STR. */
661 /* Similar to strrchr(), wcsrchr(). */
662 extern uint8_t *
663 u8_strrchr (const uint8_t *str, ucs4_t uc)
664 _UC_ATTRIBUTE_PURE;
665 extern uint16_t *
666 u16_strrchr (const uint16_t *str, ucs4_t uc)
667 _UC_ATTRIBUTE_PURE;
668 extern uint32_t *
669 u32_strrchr (const uint32_t *str, ucs4_t uc)
670 _UC_ATTRIBUTE_PURE;
672 /* Return the length of the initial segment of STR which consists entirely
673 of Unicode characters not in REJECT. */
674 /* Similar to strcspn(), wcscspn(). */
675 extern size_t
676 u8_strcspn (const uint8_t *str, const uint8_t *reject)
677 _UC_ATTRIBUTE_PURE;
678 extern size_t
679 u16_strcspn (const uint16_t *str, const uint16_t *reject)
680 _UC_ATTRIBUTE_PURE;
681 extern size_t
682 u32_strcspn (const uint32_t *str, const uint32_t *reject)
683 _UC_ATTRIBUTE_PURE;
685 /* Return the length of the initial segment of STR which consists entirely
686 of Unicode characters in ACCEPT. */
687 /* Similar to strspn(), wcsspn(). */
688 extern size_t
689 u8_strspn (const uint8_t *str, const uint8_t *accept)
690 _UC_ATTRIBUTE_PURE;
691 extern size_t
692 u16_strspn (const uint16_t *str, const uint16_t *accept)
693 _UC_ATTRIBUTE_PURE;
694 extern size_t
695 u32_strspn (const uint32_t *str, const uint32_t *accept)
696 _UC_ATTRIBUTE_PURE;
698 /* Find the first occurrence in STR of any character in ACCEPT. */
699 /* Similar to strpbrk(), wcspbrk(). */
700 extern uint8_t *
701 u8_strpbrk (const uint8_t *str, const uint8_t *accept)
702 _UC_ATTRIBUTE_PURE;
703 extern uint16_t *
704 u16_strpbrk (const uint16_t *str, const uint16_t *accept)
705 _UC_ATTRIBUTE_PURE;
706 extern uint32_t *
707 u32_strpbrk (const uint32_t *str, const uint32_t *accept)
708 _UC_ATTRIBUTE_PURE;
710 /* Find the first occurrence of NEEDLE in HAYSTACK. */
711 /* Similar to strstr(), wcsstr(). */
712 extern uint8_t *
713 u8_strstr (const uint8_t *haystack, const uint8_t *needle)
714 _UC_ATTRIBUTE_PURE;
715 extern uint16_t *
716 u16_strstr (const uint16_t *haystack, const uint16_t *needle)
717 _UC_ATTRIBUTE_PURE;
718 extern uint32_t *
719 u32_strstr (const uint32_t *haystack, const uint32_t *needle)
720 _UC_ATTRIBUTE_PURE;
722 /* Test whether STR starts with PREFIX. */
723 extern bool
724 u8_startswith (const uint8_t *str, const uint8_t *prefix)
725 _UC_ATTRIBUTE_PURE;
726 extern bool
727 u16_startswith (const uint16_t *str, const uint16_t *prefix)
728 _UC_ATTRIBUTE_PURE;
729 extern bool
730 u32_startswith (const uint32_t *str, const uint32_t *prefix)
731 _UC_ATTRIBUTE_PURE;
733 /* Test whether STR ends with SUFFIX. */
734 extern bool
735 u8_endswith (const uint8_t *str, const uint8_t *suffix)
736 _UC_ATTRIBUTE_PURE;
737 extern bool
738 u16_endswith (const uint16_t *str, const uint16_t *suffix)
739 _UC_ATTRIBUTE_PURE;
740 extern bool
741 u32_endswith (const uint32_t *str, const uint32_t *suffix)
742 _UC_ATTRIBUTE_PURE;
744 /* Divide STR into tokens separated by characters in DELIM.
745 This interface is actually more similar to wcstok than to strtok. */
746 /* Similar to strtok_r(), wcstok(). */
747 extern uint8_t *
748 u8_strtok (uint8_t *_UC_RESTRICT str, const uint8_t *delim,
749 uint8_t **ptr);
750 extern uint16_t *
751 u16_strtok (uint16_t *_UC_RESTRICT str, const uint16_t *delim,
752 uint16_t **ptr);
753 extern uint32_t *
754 u32_strtok (uint32_t *_UC_RESTRICT str, const uint32_t *delim,
755 uint32_t **ptr);
758 #ifdef __cplusplus
760 #endif
762 #endif /* _UNISTR_H */