malloc-h: New module.
[gnulib.git] / lib / unicase.in.h
blobd74afa132e895007c5dc6ef982f41743ea93bf84
1 /* Unicode character case mappings.
2 Copyright (C) 2002, 2009-2020 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify it
5 under the terms of the GNU Lesser General Public License as published
6 by the Free Software Foundation; either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
14 You should have received a copy of the GNU Lesser General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 #ifndef _UNICASE_H
18 #define _UNICASE_H
20 #include "unitypes.h"
22 /* Get bool. */
23 #include <stdbool.h>
25 /* Get size_t. */
26 #include <stddef.h>
28 /* Get uninorm_t. */
29 #include "uninorm.h"
31 #ifdef __cplusplus
32 extern "C" {
33 #endif
35 /* ========================================================================= */
37 /* Character case mappings.
38 These mappings are locale and context independent.
39 WARNING! These functions are not sufficient for languages such as German.
40 Better use the functions below that treat an entire string at once and are
41 language aware. */
43 /* Return the uppercase mapping of a Unicode character. */
44 extern ucs4_t
45 uc_toupper (ucs4_t uc)
46 _UC_ATTRIBUTE_CONST;
48 /* Return the lowercase mapping of a Unicode character. */
49 extern ucs4_t
50 uc_tolower (ucs4_t uc)
51 _UC_ATTRIBUTE_CONST;
53 /* Return the titlecase mapping of a Unicode character. */
54 extern ucs4_t
55 uc_totitle (ucs4_t uc)
56 _UC_ATTRIBUTE_CONST;
58 /* ========================================================================= */
60 /* String case mappings. */
62 /* These functions are locale dependent. The iso639_language argument
63 identifies the language (e.g. "tr" for Turkish). NULL means to use
64 locale independent case mappings. */
66 /* Return the ISO 639 language code of the current locale.
67 Return "" if it is unknown, or in the "C" locale. */
68 extern const char *
69 uc_locale_language (void)
70 _UC_ATTRIBUTE_PURE;
72 /* Conventions:
74 All functions prefixed with u8_ operate on UTF-8 encoded strings.
75 Their unit is an uint8_t (1 byte).
77 All functions prefixed with u16_ operate on UTF-16 encoded strings.
78 Their unit is an uint16_t (a 2-byte word).
80 All functions prefixed with u32_ operate on UCS-4 encoded strings.
81 Their unit is an uint32_t (a 4-byte word).
83 All argument pairs (s, n) denote a Unicode string s[0..n-1] with exactly
84 n units.
86 Functions returning a string result take a (resultbuf, lengthp) argument
87 pair. If resultbuf is not NULL and the result fits into *lengthp units,
88 it is put in resultbuf, and resultbuf is returned. Otherwise, a freshly
89 allocated string is returned. In both cases, *lengthp is set to the
90 length (number of units) of the returned string. In case of error,
91 NULL is returned and errno is set. */
93 /* Return the uppercase mapping of a string.
94 The nf argument identifies the normalization form to apply after the
95 case-mapping. It can also be NULL, for no normalization. */
96 extern uint8_t *
97 u8_toupper (const uint8_t *s, size_t n, const char *iso639_language,
98 uninorm_t nf,
99 uint8_t *_UC_RESTRICT resultbuf, size_t *lengthp);
100 extern uint16_t *
101 u16_toupper (const uint16_t *s, size_t n, const char *iso639_language,
102 uninorm_t nf,
103 uint16_t *_UC_RESTRICT resultbuf, size_t *lengthp);
104 extern uint32_t *
105 u32_toupper (const uint32_t *s, size_t n, const char *iso639_language,
106 uninorm_t nf,
107 uint32_t *_UC_RESTRICT resultbuf, size_t *lengthp);
109 /* Return the lowercase mapping of a string.
110 The nf argument identifies the normalization form to apply after the
111 case-mapping. It can also be NULL, for no normalization. */
112 extern uint8_t *
113 u8_tolower (const uint8_t *s, size_t n, const char *iso639_language,
114 uninorm_t nf,
115 uint8_t *_UC_RESTRICT resultbuf, size_t *lengthp);
116 extern uint16_t *
117 u16_tolower (const uint16_t *s, size_t n, const char *iso639_language,
118 uninorm_t nf,
119 uint16_t *_UC_RESTRICT resultbuf, size_t *lengthp);
120 extern uint32_t *
121 u32_tolower (const uint32_t *s, size_t n, const char *iso639_language,
122 uninorm_t nf,
123 uint32_t *_UC_RESTRICT resultbuf, size_t *lengthp);
125 /* Return the titlecase mapping of a string.
126 The nf argument identifies the normalization form to apply after the
127 case-mapping. It can also be NULL, for no normalization. */
128 extern uint8_t *
129 u8_totitle (const uint8_t *s, size_t n, const char *iso639_language,
130 uninorm_t nf,
131 uint8_t *_UC_RESTRICT resultbuf, size_t *lengthp);
132 extern uint16_t *
133 u16_totitle (const uint16_t *s, size_t n, const char *iso639_language,
134 uninorm_t nf,
135 uint16_t *_UC_RESTRICT resultbuf, size_t *lengthp);
136 extern uint32_t *
137 u32_totitle (const uint32_t *s, size_t n, const char *iso639_language,
138 uninorm_t nf,
139 uint32_t *_UC_RESTRICT resultbuf, size_t *lengthp);
141 /* The case-mapping context given by a prefix string. */
142 typedef struct casing_prefix_context
144 /* These fields are private, undocumented. */
145 uint32_t last_char_except_ignorable;
146 uint32_t last_char_normal_or_above;
148 casing_prefix_context_t;
149 /* The case-mapping context of the empty prefix string. */
150 extern const casing_prefix_context_t unicase_empty_prefix_context;
151 /* Return the case-mapping context of a given prefix string. */
152 extern casing_prefix_context_t
153 u8_casing_prefix_context (const uint8_t *s, size_t n);
154 extern casing_prefix_context_t
155 u16_casing_prefix_context (const uint16_t *s, size_t n);
156 extern casing_prefix_context_t
157 u32_casing_prefix_context (const uint32_t *s, size_t n);
158 /* Return the case-mapping context of the prefix concat(A, S), given the
159 case-mapping context of the prefix A. */
160 extern casing_prefix_context_t
161 u8_casing_prefixes_context (const uint8_t *s, size_t n,
162 casing_prefix_context_t a_context);
163 extern casing_prefix_context_t
164 u16_casing_prefixes_context (const uint16_t *s, size_t n,
165 casing_prefix_context_t a_context);
166 extern casing_prefix_context_t
167 u32_casing_prefixes_context (const uint32_t *s, size_t n,
168 casing_prefix_context_t a_context);
170 /* The case-mapping context given by a suffix string. */
171 typedef struct casing_suffix_context
173 /* These fields are private, undocumented. */
174 uint32_t first_char_except_ignorable;
175 uint32_t bits;
177 casing_suffix_context_t;
178 /* The case-mapping context of the empty suffix string. */
179 extern const casing_suffix_context_t unicase_empty_suffix_context;
180 /* Return the case-mapping context of a given suffix string. */
181 extern casing_suffix_context_t
182 u8_casing_suffix_context (const uint8_t *s, size_t n);
183 extern casing_suffix_context_t
184 u16_casing_suffix_context (const uint16_t *s, size_t n);
185 extern casing_suffix_context_t
186 u32_casing_suffix_context (const uint32_t *s, size_t n);
187 /* Return the case-mapping context of the suffix concat(S, A), given the
188 case-mapping context of the suffix A. */
189 extern casing_suffix_context_t
190 u8_casing_suffixes_context (const uint8_t *s, size_t n,
191 casing_suffix_context_t a_context);
192 extern casing_suffix_context_t
193 u16_casing_suffixes_context (const uint16_t *s, size_t n,
194 casing_suffix_context_t a_context);
195 extern casing_suffix_context_t
196 u32_casing_suffixes_context (const uint32_t *s, size_t n,
197 casing_suffix_context_t a_context);
199 /* Return the uppercase mapping of a string that is surrounded by a prefix
200 and a suffix. */
201 extern uint8_t *
202 u8_ct_toupper (const uint8_t *s, size_t n,
203 casing_prefix_context_t prefix_context,
204 casing_suffix_context_t suffix_context,
205 const char *iso639_language,
206 uninorm_t nf,
207 uint8_t *_UC_RESTRICT resultbuf, size_t *lengthp);
208 extern uint16_t *
209 u16_ct_toupper (const uint16_t *s, size_t n,
210 casing_prefix_context_t prefix_context,
211 casing_suffix_context_t suffix_context,
212 const char *iso639_language,
213 uninorm_t nf,
214 uint16_t *_UC_RESTRICT resultbuf, size_t *lengthp);
215 extern uint32_t *
216 u32_ct_toupper (const uint32_t *s, size_t n,
217 casing_prefix_context_t prefix_context,
218 casing_suffix_context_t suffix_context,
219 const char *iso639_language,
220 uninorm_t nf,
221 uint32_t *_UC_RESTRICT resultbuf, size_t *lengthp);
223 /* Return the lowercase mapping of a string that is surrounded by a prefix
224 and a suffix. */
225 extern uint8_t *
226 u8_ct_tolower (const uint8_t *s, size_t n,
227 casing_prefix_context_t prefix_context,
228 casing_suffix_context_t suffix_context,
229 const char *iso639_language,
230 uninorm_t nf,
231 uint8_t *_UC_RESTRICT resultbuf, size_t *lengthp);
232 extern uint16_t *
233 u16_ct_tolower (const uint16_t *s, size_t n,
234 casing_prefix_context_t prefix_context,
235 casing_suffix_context_t suffix_context,
236 const char *iso639_language,
237 uninorm_t nf,
238 uint16_t *_UC_RESTRICT resultbuf, size_t *lengthp);
239 extern uint32_t *
240 u32_ct_tolower (const uint32_t *s, size_t n,
241 casing_prefix_context_t prefix_context,
242 casing_suffix_context_t suffix_context,
243 const char *iso639_language,
244 uninorm_t nf,
245 uint32_t *_UC_RESTRICT resultbuf, size_t *lengthp);
247 /* Return the titlecase mapping of a string that is surrounded by a prefix
248 and a suffix. */
249 extern uint8_t *
250 u8_ct_totitle (const uint8_t *s, size_t n,
251 casing_prefix_context_t prefix_context,
252 casing_suffix_context_t suffix_context,
253 const char *iso639_language,
254 uninorm_t nf,
255 uint8_t *_UC_RESTRICT resultbuf, size_t *lengthp);
256 extern uint16_t *
257 u16_ct_totitle (const uint16_t *s, size_t n,
258 casing_prefix_context_t prefix_context,
259 casing_suffix_context_t suffix_context,
260 const char *iso639_language,
261 uninorm_t nf,
262 uint16_t *_UC_RESTRICT resultbuf, size_t *lengthp);
263 extern uint32_t *
264 u32_ct_totitle (const uint32_t *s, size_t n,
265 casing_prefix_context_t prefix_context,
266 casing_suffix_context_t suffix_context,
267 const char *iso639_language,
268 uninorm_t nf,
269 uint32_t *_UC_RESTRICT resultbuf, size_t *lengthp);
271 /* Return the case folded string.
272 Comparing uN_casefold (S1) and uN_casefold (S2) with uN_cmp2() is equivalent
273 to comparing S1 and S2 with uN_casecmp().
274 The nf argument identifies the normalization form to apply after the
275 case-mapping. It can also be NULL, for no normalization. */
276 extern uint8_t *
277 u8_casefold (const uint8_t *s, size_t n, const char *iso639_language,
278 uninorm_t nf,
279 uint8_t *_UC_RESTRICT resultbuf, size_t *lengthp);
280 extern uint16_t *
281 u16_casefold (const uint16_t *s, size_t n, const char *iso639_language,
282 uninorm_t nf,
283 uint16_t *_UC_RESTRICT resultbuf, size_t *lengthp);
284 extern uint32_t *
285 u32_casefold (const uint32_t *s, size_t n, const char *iso639_language,
286 uninorm_t nf,
287 uint32_t *_UC_RESTRICT resultbuf, size_t *lengthp);
288 /* Likewise, for a string that is surrounded by a prefix and a suffix. */
289 extern uint8_t *
290 u8_ct_casefold (const uint8_t *s, size_t n,
291 casing_prefix_context_t prefix_context,
292 casing_suffix_context_t suffix_context,
293 const char *iso639_language,
294 uninorm_t nf,
295 uint8_t *_UC_RESTRICT resultbuf, size_t *lengthp);
296 extern uint16_t *
297 u16_ct_casefold (const uint16_t *s, size_t n,
298 casing_prefix_context_t prefix_context,
299 casing_suffix_context_t suffix_context,
300 const char *iso639_language,
301 uninorm_t nf,
302 uint16_t *_UC_RESTRICT resultbuf, size_t *lengthp);
303 extern uint32_t *
304 u32_ct_casefold (const uint32_t *s, size_t n,
305 casing_prefix_context_t prefix_context,
306 casing_suffix_context_t suffix_context,
307 const char *iso639_language,
308 uninorm_t nf,
309 uint32_t *_UC_RESTRICT resultbuf, size_t *lengthp);
311 /* Compare S1 and S2, ignoring differences in case and normalization.
312 The nf argument identifies the normalization form to apply after the
313 case-mapping. It can also be NULL, for no normalization.
314 If successful, set *RESULTP to -1 if S1 < S2, 0 if S1 = S2, 1 if S1 > S2, and
315 return 0. Upon failure, return -1 with errno set. */
316 extern int
317 u8_casecmp (const uint8_t *s1, size_t n1,
318 const uint8_t *s2, size_t n2,
319 const char *iso639_language, uninorm_t nf, int *resultp);
320 extern int
321 u16_casecmp (const uint16_t *s1, size_t n1,
322 const uint16_t *s2, size_t n2,
323 const char *iso639_language, uninorm_t nf, int *resultp);
324 extern int
325 u32_casecmp (const uint32_t *s1, size_t n1,
326 const uint32_t *s2, size_t n2,
327 const char *iso639_language, uninorm_t nf, int *resultp);
328 extern int
329 ulc_casecmp (const char *s1, size_t n1,
330 const char *s2, size_t n2,
331 const char *iso639_language, uninorm_t nf, int *resultp);
333 /* Convert the string S of length N to a NUL-terminated byte sequence, in such
334 a way that comparing uN_casexfrm (S1) and uN_casexfrm (S2) with the gnulib
335 function memcmp2() is equivalent to comparing S1 and S2 with uN_casecoll().
336 NF must be either UNINORM_NFC, UNINORM_NFKC, or NULL for no normalization. */
337 extern char *
338 u8_casexfrm (const uint8_t *s, size_t n, const char *iso639_language,
339 uninorm_t nf,
340 char *_UC_RESTRICT resultbuf, size_t *lengthp);
341 extern char *
342 u16_casexfrm (const uint16_t *s, size_t n, const char *iso639_language,
343 uninorm_t nf,
344 char *_UC_RESTRICT resultbuf, size_t *lengthp);
345 extern char *
346 u32_casexfrm (const uint32_t *s, size_t n, const char *iso639_language,
347 uninorm_t nf,
348 char *_UC_RESTRICT resultbuf, size_t *lengthp);
349 extern char *
350 ulc_casexfrm (const char *s, size_t n, const char *iso639_language,
351 uninorm_t nf,
352 char *_UC_RESTRICT resultbuf, size_t *lengthp);
354 /* Compare S1 and S2, ignoring differences in case and normalization, using the
355 collation rules of the current locale.
356 The nf argument identifies the normalization form to apply after the
357 case-mapping. It must be either UNINORM_NFC or UNINORM_NFKC. It can also
358 be NULL, for no normalization.
359 If successful, set *RESULTP to -1 if S1 < S2, 0 if S1 = S2, 1 if S1 > S2, and
360 return 0. Upon failure, return -1 with errno set. */
361 extern int
362 u8_casecoll (const uint8_t *s1, size_t n1,
363 const uint8_t *s2, size_t n2,
364 const char *iso639_language, uninorm_t nf, int *resultp);
365 extern int
366 u16_casecoll (const uint16_t *s1, size_t n1,
367 const uint16_t *s2, size_t n2,
368 const char *iso639_language, uninorm_t nf, int *resultp);
369 extern int
370 u32_casecoll (const uint32_t *s1, size_t n1,
371 const uint32_t *s2, size_t n2,
372 const char *iso639_language, uninorm_t nf, int *resultp);
373 extern int
374 ulc_casecoll (const char *s1, size_t n1,
375 const char *s2, size_t n2,
376 const char *iso639_language, uninorm_t nf, int *resultp);
379 /* Set *RESULTP to true if mapping NFD(S) to upper case is a no-op, or to false
380 otherwise, and return 0. Upon failure, return -1 with errno set. */
381 extern int
382 u8_is_uppercase (const uint8_t *s, size_t n,
383 const char *iso639_language,
384 bool *resultp);
385 extern int
386 u16_is_uppercase (const uint16_t *s, size_t n,
387 const char *iso639_language,
388 bool *resultp);
389 extern int
390 u32_is_uppercase (const uint32_t *s, size_t n,
391 const char *iso639_language,
392 bool *resultp);
394 /* Set *RESULTP to true if mapping NFD(S) to lower case is a no-op, or to false
395 otherwise, and return 0. Upon failure, return -1 with errno set. */
396 extern int
397 u8_is_lowercase (const uint8_t *s, size_t n,
398 const char *iso639_language,
399 bool *resultp);
400 extern int
401 u16_is_lowercase (const uint16_t *s, size_t n,
402 const char *iso639_language,
403 bool *resultp);
404 extern int
405 u32_is_lowercase (const uint32_t *s, size_t n,
406 const char *iso639_language,
407 bool *resultp);
409 /* Set *RESULTP to true if mapping NFD(S) to title case is a no-op, or to false
410 otherwise, and return 0. Upon failure, return -1 with errno set. */
411 extern int
412 u8_is_titlecase (const uint8_t *s, size_t n,
413 const char *iso639_language,
414 bool *resultp);
415 extern int
416 u16_is_titlecase (const uint16_t *s, size_t n,
417 const char *iso639_language,
418 bool *resultp);
419 extern int
420 u32_is_titlecase (const uint32_t *s, size_t n,
421 const char *iso639_language,
422 bool *resultp);
424 /* Set *RESULTP to true if applying case folding to NFD(S) is a no-op, or to
425 false otherwise, and return 0. Upon failure, return -1 with errno set. */
426 extern int
427 u8_is_casefolded (const uint8_t *s, size_t n,
428 const char *iso639_language,
429 bool *resultp);
430 extern int
431 u16_is_casefolded (const uint16_t *s, size_t n,
432 const char *iso639_language,
433 bool *resultp);
434 extern int
435 u32_is_casefolded (const uint32_t *s, size_t n,
436 const char *iso639_language,
437 bool *resultp);
439 /* Set *RESULTP to true if case matters for S, that is, if mapping NFD(S) to
440 either upper case or lower case or title case is not a no-op.
441 Set *RESULTP to false if NFD(S) maps to itself under the upper case mapping,
442 under the lower case mapping, and under the title case mapping; in other
443 words, when NFD(S) consists entirely of caseless characters.
444 Upon failure, return -1 with errno set. */
445 extern int
446 u8_is_cased (const uint8_t *s, size_t n,
447 const char *iso639_language,
448 bool *resultp);
449 extern int
450 u16_is_cased (const uint16_t *s, size_t n,
451 const char *iso639_language,
452 bool *resultp);
453 extern int
454 u32_is_cased (const uint32_t *s, size_t n,
455 const char *iso639_language,
456 bool *resultp);
459 /* ========================================================================= */
461 #ifdef __cplusplus
463 #endif
465 #endif /* _UNICASE_H */