exp2l: Work around a NetBSD 10.0/i386 bug.
[gnulib.git] / lib / unicase.in.h
blobbd705baf01755a1ea25ba4e0be43518c8cfc9293
1 /* Unicode character case mappings.
2 Copyright (C) 2002, 2009-2024 Free Software Foundation, Inc.
4 This file is free software: you can redistribute it and/or modify
5 it under the terms of the GNU Lesser General Public License as
6 published by the Free Software Foundation; either version 2.1 of the
7 License, or (at your option) any later version.
9 This file is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU Lesser General Public License for more details.
14 You should have received a copy of the GNU Lesser General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 #ifndef _UNICASE_H
18 #define _UNICASE_H
20 #include "unitypes.h"
22 /* Get bool. */
23 #include <stdbool.h>
25 /* Get size_t. */
26 #include <stddef.h>
28 /* Get uninorm_t. */
29 #include "uninorm.h"
31 #if @HAVE_UNISTRING_WOE32DLL_H@
32 # include <unistring/woe32dll.h>
33 #else
34 # define LIBUNISTRING_DLL_VARIABLE
35 #endif
37 #ifdef __cplusplus
38 extern "C" {
39 #endif
41 /* ========================================================================= */
43 /* Character case mappings.
44 These mappings are locale and context independent.
45 WARNING! These functions are not sufficient for languages such as German.
46 Better use the functions below that treat an entire string at once and are
47 language aware. */
49 /* Return the uppercase mapping of a Unicode character. */
50 extern ucs4_t
51 uc_toupper (ucs4_t uc)
52 _UC_ATTRIBUTE_CONST;
54 /* Return the lowercase mapping of a Unicode character. */
55 extern ucs4_t
56 uc_tolower (ucs4_t uc)
57 _UC_ATTRIBUTE_CONST;
59 /* Return the titlecase mapping of a Unicode character. */
60 extern ucs4_t
61 uc_totitle (ucs4_t uc)
62 _UC_ATTRIBUTE_CONST;
64 /* ========================================================================= */
66 /* String case mappings. */
68 /* These functions are locale dependent. The iso639_language argument
69 identifies the language (e.g. "tr" for Turkish). NULL means to use
70 locale independent case mappings. */
72 /* Return the ISO 639 language code of the current locale.
73 Return "" if it is unknown, or in the "C" locale. */
74 extern const char *
75 uc_locale_language (void)
76 _UC_ATTRIBUTE_PURE;
78 /* Conventions:
80 All functions prefixed with u8_ operate on UTF-8 encoded strings.
81 Their unit is an uint8_t (1 byte).
83 All functions prefixed with u16_ operate on UTF-16 encoded strings.
84 Their unit is an uint16_t (a 2-byte word).
86 All functions prefixed with u32_ operate on UCS-4 encoded strings.
87 Their unit is an uint32_t (a 4-byte word).
89 All argument pairs (s, n) denote a Unicode string s[0..n-1] with exactly
90 n units.
92 Functions returning a string result take a (resultbuf, lengthp) argument
93 pair. If resultbuf is not NULL and the result fits into *lengthp units,
94 it is put in resultbuf, and resultbuf is returned. Otherwise, a freshly
95 allocated string is returned. In both cases, *lengthp is set to the
96 length (number of units) of the returned string. In case of error,
97 NULL is returned and errno is set. */
99 /* Return the uppercase mapping of a string.
100 The nf argument identifies the normalization form to apply after the
101 case-mapping. It can also be NULL, for no normalization. */
102 extern uint8_t *
103 u8_toupper (const uint8_t *s, size_t n, const char *iso639_language,
104 uninorm_t nf,
105 uint8_t *_UC_RESTRICT resultbuf, size_t *lengthp);
106 extern uint16_t *
107 u16_toupper (const uint16_t *s, size_t n, const char *iso639_language,
108 uninorm_t nf,
109 uint16_t *_UC_RESTRICT resultbuf, size_t *lengthp);
110 extern uint32_t *
111 u32_toupper (const uint32_t *s, size_t n, const char *iso639_language,
112 uninorm_t nf,
113 uint32_t *_UC_RESTRICT resultbuf, size_t *lengthp);
115 /* Return the lowercase mapping of a string.
116 The nf argument identifies the normalization form to apply after the
117 case-mapping. It can also be NULL, for no normalization. */
118 extern uint8_t *
119 u8_tolower (const uint8_t *s, size_t n, const char *iso639_language,
120 uninorm_t nf,
121 uint8_t *_UC_RESTRICT resultbuf, size_t *lengthp);
122 extern uint16_t *
123 u16_tolower (const uint16_t *s, size_t n, const char *iso639_language,
124 uninorm_t nf,
125 uint16_t *_UC_RESTRICT resultbuf, size_t *lengthp);
126 extern uint32_t *
127 u32_tolower (const uint32_t *s, size_t n, const char *iso639_language,
128 uninorm_t nf,
129 uint32_t *_UC_RESTRICT resultbuf, size_t *lengthp);
131 /* Return the titlecase mapping of a string.
132 The nf argument identifies the normalization form to apply after the
133 case-mapping. It can also be NULL, for no normalization. */
134 extern uint8_t *
135 u8_totitle (const uint8_t *s, size_t n, const char *iso639_language,
136 uninorm_t nf,
137 uint8_t *_UC_RESTRICT resultbuf, size_t *lengthp);
138 extern uint16_t *
139 u16_totitle (const uint16_t *s, size_t n, const char *iso639_language,
140 uninorm_t nf,
141 uint16_t *_UC_RESTRICT resultbuf, size_t *lengthp);
142 extern uint32_t *
143 u32_totitle (const uint32_t *s, size_t n, const char *iso639_language,
144 uninorm_t nf,
145 uint32_t *_UC_RESTRICT resultbuf, size_t *lengthp);
147 /* The case-mapping context given by a prefix string. */
148 typedef struct casing_prefix_context
150 /* These fields are private, undocumented. */
151 uint32_t last_char_except_ignorable;
152 uint32_t last_char_normal_or_above;
154 casing_prefix_context_t;
155 /* The case-mapping context of the empty prefix string. */
156 extern @GNULIB_UNICASE_EMPTY_PREFIX_CONTEXT_DLL_VARIABLE@ const casing_prefix_context_t unicase_empty_prefix_context;
157 /* Return the case-mapping context of a given prefix string. */
158 extern casing_prefix_context_t
159 u8_casing_prefix_context (const uint8_t *s, size_t n);
160 extern casing_prefix_context_t
161 u16_casing_prefix_context (const uint16_t *s, size_t n);
162 extern casing_prefix_context_t
163 u32_casing_prefix_context (const uint32_t *s, size_t n);
164 /* Return the case-mapping context of the prefix concat(A, S), given the
165 case-mapping context of the prefix A. */
166 extern casing_prefix_context_t
167 u8_casing_prefixes_context (const uint8_t *s, size_t n,
168 casing_prefix_context_t a_context);
169 extern casing_prefix_context_t
170 u16_casing_prefixes_context (const uint16_t *s, size_t n,
171 casing_prefix_context_t a_context);
172 extern casing_prefix_context_t
173 u32_casing_prefixes_context (const uint32_t *s, size_t n,
174 casing_prefix_context_t a_context);
176 /* The case-mapping context given by a suffix string. */
177 typedef struct casing_suffix_context
179 /* These fields are private, undocumented. */
180 uint32_t first_char_except_ignorable;
181 uint32_t bits;
183 casing_suffix_context_t;
184 /* The case-mapping context of the empty suffix string. */
185 extern @GNULIB_UNICASE_EMPTY_SUFFIX_CONTEXT_DLL_VARIABLE@ const casing_suffix_context_t unicase_empty_suffix_context;
186 /* Return the case-mapping context of a given suffix string. */
187 extern casing_suffix_context_t
188 u8_casing_suffix_context (const uint8_t *s, size_t n);
189 extern casing_suffix_context_t
190 u16_casing_suffix_context (const uint16_t *s, size_t n);
191 extern casing_suffix_context_t
192 u32_casing_suffix_context (const uint32_t *s, size_t n);
193 /* Return the case-mapping context of the suffix concat(S, A), given the
194 case-mapping context of the suffix A. */
195 extern casing_suffix_context_t
196 u8_casing_suffixes_context (const uint8_t *s, size_t n,
197 casing_suffix_context_t a_context);
198 extern casing_suffix_context_t
199 u16_casing_suffixes_context (const uint16_t *s, size_t n,
200 casing_suffix_context_t a_context);
201 extern casing_suffix_context_t
202 u32_casing_suffixes_context (const uint32_t *s, size_t n,
203 casing_suffix_context_t a_context);
205 /* Return the uppercase mapping of a string that is surrounded by a prefix
206 and a suffix. */
207 extern uint8_t *
208 u8_ct_toupper (const uint8_t *s, size_t n,
209 casing_prefix_context_t prefix_context,
210 casing_suffix_context_t suffix_context,
211 const char *iso639_language,
212 uninorm_t nf,
213 uint8_t *_UC_RESTRICT resultbuf, size_t *lengthp);
214 extern uint16_t *
215 u16_ct_toupper (const uint16_t *s, size_t n,
216 casing_prefix_context_t prefix_context,
217 casing_suffix_context_t suffix_context,
218 const char *iso639_language,
219 uninorm_t nf,
220 uint16_t *_UC_RESTRICT resultbuf, size_t *lengthp);
221 extern uint32_t *
222 u32_ct_toupper (const uint32_t *s, size_t n,
223 casing_prefix_context_t prefix_context,
224 casing_suffix_context_t suffix_context,
225 const char *iso639_language,
226 uninorm_t nf,
227 uint32_t *_UC_RESTRICT resultbuf, size_t *lengthp);
229 /* Return the lowercase mapping of a string that is surrounded by a prefix
230 and a suffix. */
231 extern uint8_t *
232 u8_ct_tolower (const uint8_t *s, size_t n,
233 casing_prefix_context_t prefix_context,
234 casing_suffix_context_t suffix_context,
235 const char *iso639_language,
236 uninorm_t nf,
237 uint8_t *_UC_RESTRICT resultbuf, size_t *lengthp);
238 extern uint16_t *
239 u16_ct_tolower (const uint16_t *s, size_t n,
240 casing_prefix_context_t prefix_context,
241 casing_suffix_context_t suffix_context,
242 const char *iso639_language,
243 uninorm_t nf,
244 uint16_t *_UC_RESTRICT resultbuf, size_t *lengthp);
245 extern uint32_t *
246 u32_ct_tolower (const uint32_t *s, size_t n,
247 casing_prefix_context_t prefix_context,
248 casing_suffix_context_t suffix_context,
249 const char *iso639_language,
250 uninorm_t nf,
251 uint32_t *_UC_RESTRICT resultbuf, size_t *lengthp);
253 /* Return the titlecase mapping of a string that is surrounded by a prefix
254 and a suffix. */
255 extern uint8_t *
256 u8_ct_totitle (const uint8_t *s, size_t n,
257 casing_prefix_context_t prefix_context,
258 casing_suffix_context_t suffix_context,
259 const char *iso639_language,
260 uninorm_t nf,
261 uint8_t *_UC_RESTRICT resultbuf, size_t *lengthp);
262 extern uint16_t *
263 u16_ct_totitle (const uint16_t *s, size_t n,
264 casing_prefix_context_t prefix_context,
265 casing_suffix_context_t suffix_context,
266 const char *iso639_language,
267 uninorm_t nf,
268 uint16_t *_UC_RESTRICT resultbuf, size_t *lengthp);
269 extern uint32_t *
270 u32_ct_totitle (const uint32_t *s, size_t n,
271 casing_prefix_context_t prefix_context,
272 casing_suffix_context_t suffix_context,
273 const char *iso639_language,
274 uninorm_t nf,
275 uint32_t *_UC_RESTRICT resultbuf, size_t *lengthp);
277 /* Return the case folded string.
278 Comparing uN_casefold (S1) and uN_casefold (S2) with uN_cmp2() is equivalent
279 to comparing S1 and S2 with uN_casecmp().
280 The nf argument identifies the normalization form to apply after the
281 case-mapping. It can also be NULL, for no normalization. */
282 extern uint8_t *
283 u8_casefold (const uint8_t *s, size_t n, const char *iso639_language,
284 uninorm_t nf,
285 uint8_t *_UC_RESTRICT resultbuf, size_t *lengthp);
286 extern uint16_t *
287 u16_casefold (const uint16_t *s, size_t n, const char *iso639_language,
288 uninorm_t nf,
289 uint16_t *_UC_RESTRICT resultbuf, size_t *lengthp);
290 extern uint32_t *
291 u32_casefold (const uint32_t *s, size_t n, const char *iso639_language,
292 uninorm_t nf,
293 uint32_t *_UC_RESTRICT resultbuf, size_t *lengthp);
294 /* Likewise, for a string that is surrounded by a prefix and a suffix. */
295 extern uint8_t *
296 u8_ct_casefold (const uint8_t *s, size_t n,
297 casing_prefix_context_t prefix_context,
298 casing_suffix_context_t suffix_context,
299 const char *iso639_language,
300 uninorm_t nf,
301 uint8_t *_UC_RESTRICT resultbuf, size_t *lengthp);
302 extern uint16_t *
303 u16_ct_casefold (const uint16_t *s, size_t n,
304 casing_prefix_context_t prefix_context,
305 casing_suffix_context_t suffix_context,
306 const char *iso639_language,
307 uninorm_t nf,
308 uint16_t *_UC_RESTRICT resultbuf, size_t *lengthp);
309 extern uint32_t *
310 u32_ct_casefold (const uint32_t *s, size_t n,
311 casing_prefix_context_t prefix_context,
312 casing_suffix_context_t suffix_context,
313 const char *iso639_language,
314 uninorm_t nf,
315 uint32_t *_UC_RESTRICT resultbuf, size_t *lengthp);
317 /* Compare S1 and S2, ignoring differences in case and normalization.
318 The nf argument identifies the normalization form to apply after the
319 case-mapping. It can also be NULL, for no normalization.
320 If successful, set *RESULTP to -1 if S1 < S2, 0 if S1 = S2, 1 if S1 > S2, and
321 return 0. Upon failure, return -1 with errno set. */
322 extern int
323 u8_casecmp (const uint8_t *s1, size_t n1,
324 const uint8_t *s2, size_t n2,
325 const char *iso639_language, uninorm_t nf, int *resultp);
326 extern int
327 u16_casecmp (const uint16_t *s1, size_t n1,
328 const uint16_t *s2, size_t n2,
329 const char *iso639_language, uninorm_t nf, int *resultp);
330 extern int
331 u32_casecmp (const uint32_t *s1, size_t n1,
332 const uint32_t *s2, size_t n2,
333 const char *iso639_language, uninorm_t nf, int *resultp);
334 extern int
335 ulc_casecmp (const char *s1, size_t n1,
336 const char *s2, size_t n2,
337 const char *iso639_language, uninorm_t nf, int *resultp);
339 /* Convert the string S of length N to a NUL-terminated byte sequence, in such
340 a way that comparing uN_casexfrm (S1) and uN_casexfrm (S2) with the gnulib
341 function memcmp2() is equivalent to comparing S1 and S2 with uN_casecoll().
342 NF must be either UNINORM_NFC, UNINORM_NFKC, or NULL for no normalization. */
343 extern char *
344 u8_casexfrm (const uint8_t *s, size_t n, const char *iso639_language,
345 uninorm_t nf,
346 char *_UC_RESTRICT resultbuf, size_t *lengthp);
347 extern char *
348 u16_casexfrm (const uint16_t *s, size_t n, const char *iso639_language,
349 uninorm_t nf,
350 char *_UC_RESTRICT resultbuf, size_t *lengthp);
351 extern char *
352 u32_casexfrm (const uint32_t *s, size_t n, const char *iso639_language,
353 uninorm_t nf,
354 char *_UC_RESTRICT resultbuf, size_t *lengthp);
355 extern char *
356 ulc_casexfrm (const char *s, size_t n, const char *iso639_language,
357 uninorm_t nf,
358 char *_UC_RESTRICT resultbuf, size_t *lengthp);
360 /* Compare S1 and S2, ignoring differences in case and normalization, using the
361 collation rules of the current locale.
362 The nf argument identifies the normalization form to apply after the
363 case-mapping. It must be either UNINORM_NFC or UNINORM_NFKC. It can also
364 be NULL, for no normalization.
365 If successful, set *RESULTP to -1 if S1 < S2, 0 if S1 = S2, 1 if S1 > S2, and
366 return 0. Upon failure, return -1 with errno set. */
367 extern int
368 u8_casecoll (const uint8_t *s1, size_t n1,
369 const uint8_t *s2, size_t n2,
370 const char *iso639_language, uninorm_t nf, int *resultp);
371 extern int
372 u16_casecoll (const uint16_t *s1, size_t n1,
373 const uint16_t *s2, size_t n2,
374 const char *iso639_language, uninorm_t nf, int *resultp);
375 extern int
376 u32_casecoll (const uint32_t *s1, size_t n1,
377 const uint32_t *s2, size_t n2,
378 const char *iso639_language, uninorm_t nf, int *resultp);
379 extern int
380 ulc_casecoll (const char *s1, size_t n1,
381 const char *s2, size_t n2,
382 const char *iso639_language, uninorm_t nf, int *resultp);
385 /* Set *RESULTP to true if mapping NFD(S) to upper case is a no-op, or to false
386 otherwise, and return 0. Upon failure, return -1 with errno set. */
387 extern int
388 u8_is_uppercase (const uint8_t *s, size_t n,
389 const char *iso639_language,
390 bool *resultp);
391 extern int
392 u16_is_uppercase (const uint16_t *s, size_t n,
393 const char *iso639_language,
394 bool *resultp);
395 extern int
396 u32_is_uppercase (const uint32_t *s, size_t n,
397 const char *iso639_language,
398 bool *resultp);
400 /* Set *RESULTP to true if mapping NFD(S) to lower case is a no-op, or to false
401 otherwise, and return 0. Upon failure, return -1 with errno set. */
402 extern int
403 u8_is_lowercase (const uint8_t *s, size_t n,
404 const char *iso639_language,
405 bool *resultp);
406 extern int
407 u16_is_lowercase (const uint16_t *s, size_t n,
408 const char *iso639_language,
409 bool *resultp);
410 extern int
411 u32_is_lowercase (const uint32_t *s, size_t n,
412 const char *iso639_language,
413 bool *resultp);
415 /* Set *RESULTP to true if mapping NFD(S) to title case is a no-op, or to false
416 otherwise, and return 0. Upon failure, return -1 with errno set. */
417 extern int
418 u8_is_titlecase (const uint8_t *s, size_t n,
419 const char *iso639_language,
420 bool *resultp);
421 extern int
422 u16_is_titlecase (const uint16_t *s, size_t n,
423 const char *iso639_language,
424 bool *resultp);
425 extern int
426 u32_is_titlecase (const uint32_t *s, size_t n,
427 const char *iso639_language,
428 bool *resultp);
430 /* Set *RESULTP to true if applying case folding to NFD(S) is a no-op, or to
431 false otherwise, and return 0. Upon failure, return -1 with errno set. */
432 extern int
433 u8_is_casefolded (const uint8_t *s, size_t n,
434 const char *iso639_language,
435 bool *resultp);
436 extern int
437 u16_is_casefolded (const uint16_t *s, size_t n,
438 const char *iso639_language,
439 bool *resultp);
440 extern int
441 u32_is_casefolded (const uint32_t *s, size_t n,
442 const char *iso639_language,
443 bool *resultp);
445 /* Set *RESULTP to true if case matters for S, that is, if mapping NFD(S) to
446 either upper case or lower case or title case is not a no-op.
447 Set *RESULTP to false if NFD(S) maps to itself under the upper case mapping,
448 under the lower case mapping, and under the title case mapping; in other
449 words, when NFD(S) consists entirely of caseless characters.
450 Upon failure, return -1 with errno set. */
451 extern int
452 u8_is_cased (const uint8_t *s, size_t n,
453 const char *iso639_language,
454 bool *resultp);
455 extern int
456 u16_is_cased (const uint16_t *s, size_t n,
457 const char *iso639_language,
458 bool *resultp);
459 extern int
460 u32_is_cased (const uint32_t *s, size_t n,
461 const char *iso639_language,
462 bool *resultp);
465 /* ========================================================================= */
467 #ifdef __cplusplus
469 #endif
471 #endif /* _UNICASE_H */