c32snrtombs: Add tests.
[gnulib.git] / tests / unicase / test-u32-casefold.c
blobf1d931db993c355d844cf0209fde7b60aceff778
1 /* Test of casefolding mapping for UTF-32 strings.
2 Copyright (C) 2009-2020 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 /* Written by Bruno Haible <bruno@clisp.org>, 2009. */
19 #include <config.h>
21 #include "unicase.h"
23 #include <stdlib.h>
25 #include "unistr.h"
26 #include "uninorm.h"
27 #include "macros.h"
29 static int
30 check (const uint32_t *input, size_t input_length,
31 const char *iso639_language, uninorm_t nf,
32 const uint32_t *expected, size_t expected_length)
34 size_t length;
35 uint32_t *result;
37 /* Test return conventions with resultbuf == NULL. */
38 result = u32_casefold (input, input_length, iso639_language, nf, NULL, &length);
39 if (!(result != NULL))
40 return 1;
41 if (!(length == expected_length))
42 return 2;
43 if (!(u32_cmp (result, expected, expected_length) == 0))
44 return 3;
45 free (result);
47 /* Test return conventions with resultbuf too small. */
48 if (expected_length > 0)
50 uint32_t *preallocated;
52 length = expected_length - 1;
53 preallocated = (uint32_t *) malloc (length * sizeof (uint32_t));
54 result = u32_casefold (input, input_length, iso639_language, nf, preallocated, &length);
55 if (!(result != NULL))
56 return 4;
57 if (!(result != preallocated))
58 return 5;
59 if (!(length == expected_length))
60 return 6;
61 if (!(u32_cmp (result, expected, expected_length) == 0))
62 return 7;
63 free (result);
64 free (preallocated);
67 /* Test return conventions with resultbuf large enough. */
69 uint32_t *preallocated;
71 length = expected_length;
72 preallocated = (uint32_t *) malloc (length * sizeof (uint32_t));
73 result = u32_casefold (input, input_length, iso639_language, nf, preallocated, &length);
74 if (!(result != NULL))
75 return 8;
76 if (!(preallocated == NULL || result == preallocated))
77 return 9;
78 if (!(length == expected_length))
79 return 10;
80 if (!(u32_cmp (result, expected, expected_length) == 0))
81 return 11;
82 free (preallocated);
85 return 0;
88 int
89 main ()
91 { /* Empty string. */
92 ASSERT (check (NULL, 0, NULL, NULL, NULL, 0) == 0);
93 ASSERT (check (NULL, 0, NULL, UNINORM_NFC, NULL, 0) == 0);
96 /* Simple string. */
97 { /* "Grüß Gott. Здравствуйте! x=(-b±sqrt(b²-4ac))/(2a) 日本語,中文,한글" */
98 static const uint32_t input[] =
99 { 'G', 'r', 0x00FC, 0x00DF, ' ', 'G', 'o', 't', 't', '.', ' ',
100 0x0417, 0x0434, 0x0440, 0x0430, 0x0432, 0x0441, 0x0442, 0x0432, 0x0443,
101 0x0439, 0x0442, 0x0435, '!', ' ',
102 'x', '=', '(', '-', 'b', 0x00B1, 's', 'q', 'r', 't', '(', 'b', 0x00B2,
103 '-', '4', 'a', 'c', ')', ')', '/', '(', '2', 'a', ')', ' ', ' ',
104 0x65E5, 0x672C, 0x8A9E, ',', 0x4E2D, 0x6587, ',', 0xD55C, 0xAE00, '\n'
106 static const uint32_t casefolded[] =
107 { 'g', 'r', 0x00FC, 0x0073, 0x0073, ' ', 'g', 'o', 't', 't', '.', ' ',
108 0x0437, 0x0434, 0x0440, 0x0430, 0x0432, 0x0441, 0x0442, 0x0432, 0x0443,
109 0x0439, 0x0442, 0x0435, '!', ' ',
110 'x', '=', '(', '-', 'b', 0x00B1, 's', 'q', 'r', 't', '(', 'b', 0x00B2,
111 '-', '4', 'a', 'c', ')', ')', '/', '(', '2', 'a', ')', ' ', ' ',
112 0x65E5, 0x672C, 0x8A9E, ',', 0x4E2D, 0x6587, ',', 0xD55C, 0xAE00, '\n'
114 ASSERT (check (input, SIZEOF (input), NULL, NULL, casefolded, SIZEOF (casefolded)) == 0);
117 /* Case mapping can increase the number of Unicode characters. */
118 { /* LATIN SMALL LETTER N PRECEDED BY APOSTROPHE */
119 static const uint32_t input[] = { 0x0149 };
120 static const uint32_t casefolded[] = { 0x02BC, 0x006E };
121 ASSERT (check (input, SIZEOF (input), NULL, NULL, casefolded, SIZEOF (casefolded)) == 0);
123 { /* GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS */
124 static const uint32_t input[] = { 0x0390 };
125 static const uint32_t casefolded[] = { 0x03B9, 0x0308, 0x0301 };
126 ASSERT (check (input, SIZEOF (input), NULL, NULL, casefolded, SIZEOF (casefolded)) == 0);
129 /* Turkish letters i İ ı I */
130 { /* LATIN CAPITAL LETTER I */
131 static const uint32_t input[] = { 0x0049 };
132 static const uint32_t casefolded[] = { 0x0069 };
133 static const uint32_t casefolded_tr[] = { 0x0131 };
134 ASSERT (check (input, SIZEOF (input), NULL, NULL, casefolded, SIZEOF (casefolded)) == 0);
135 ASSERT (check (input, SIZEOF (input), "tr", NULL, casefolded_tr, SIZEOF (casefolded_tr)) == 0);
137 { /* LATIN SMALL LETTER I */
138 static const uint32_t input[] = { 0x0069 };
139 static const uint32_t casefolded[] = { 0x0069 };
140 ASSERT (check (input, SIZEOF (input), NULL, NULL, casefolded, SIZEOF (casefolded)) == 0);
141 ASSERT (check (input, SIZEOF (input), "tr", NULL, casefolded, SIZEOF (casefolded)) == 0);
143 { /* LATIN CAPITAL LETTER I WITH DOT ABOVE */
144 static const uint32_t input[] = { 0x0130 };
145 static const uint32_t casefolded[] = { 0x0069, 0x0307 };
146 static const uint32_t casefolded_tr[] = { 0x0069 };
147 ASSERT (check (input, SIZEOF (input), NULL, NULL, casefolded, SIZEOF (casefolded)) == 0);
148 ASSERT (check (input, SIZEOF (input), "tr", NULL, casefolded_tr, SIZEOF (casefolded_tr)) == 0);
150 { /* LATIN SMALL LETTER DOTLESS I */
151 static const uint32_t input[] = { 0x0131 };
152 static const uint32_t casefolded[] = { 0x0131 };
153 ASSERT (check (input, SIZEOF (input), NULL, NULL, casefolded, SIZEOF (casefolded)) == 0);
154 ASSERT (check (input, SIZEOF (input), "tr", NULL, casefolded, SIZEOF (casefolded)) == 0);
156 { /* "topkapı" */
157 static const uint32_t input[] =
158 { 0x0074, 0x006F, 0x0070, 0x006B, 0x0061, 0x0070, 0x0131 };
159 static const uint32_t casefolded[] =
160 { 0x0074, 0x006F, 0x0070, 0x006B, 0x0061, 0x0070, 0x0131 };
161 ASSERT (check (input, SIZEOF (input), NULL, NULL, casefolded, SIZEOF (casefolded)) == 0);
162 ASSERT (check (input, SIZEOF (input), "tr", NULL, casefolded, SIZEOF (casefolded)) == 0);
165 /* Uppercasing can increase the number of Unicode characters. */
166 { /* "heiß" */
167 static const uint32_t input[] = { 0x0068, 0x0065, 0x0069, 0x00DF };
168 static const uint32_t casefolded[] = { 0x0068, 0x0065, 0x0069, 0x0073, 0x0073 };
169 ASSERT (check (input, SIZEOF (input), NULL, NULL, casefolded, SIZEOF (casefolded)) == 0);
172 /* Case mappings for some characters can depend on the surrounding characters. */
173 { /* "περισσότερες πληροφορίες" */
174 static const uint32_t input[] =
176 0x03C0, 0x03B5, 0x03C1, 0x03B9, 0x03C3, 0x03C3, 0x03CC, 0x03C4,
177 0x03B5, 0x03C1, 0x03B5, 0x03C2, 0x0020, 0x03C0, 0x03BB, 0x03B7,
178 0x03C1, 0x03BF, 0x03C6, 0x03BF, 0x03C1, 0x03AF, 0x03B5, 0x03C2
180 static const uint32_t casefolded[] =
182 0x03C0, 0x03B5, 0x03C1, 0x03B9, 0x03C3, 0x03C3, 0x03CC, 0x03C4,
183 0x03B5, 0x03C1, 0x03B5, 0x03C3, 0x0020, 0x03C0, 0x03BB, 0x03B7,
184 0x03C1, 0x03BF, 0x03C6, 0x03BF, 0x03C1, 0x03AF, 0x03B5, 0x03C3
186 ASSERT (check (input, SIZEOF (input), NULL, NULL, casefolded, SIZEOF (casefolded)) == 0);
189 /* Case mapping can require subsequent normalization. */
190 { /* LATIN SMALL LETTER J WITH CARON, COMBINING DOT BELOW */
191 static const uint32_t input[] = { 0x01F0, 0x0323 };
192 static const uint32_t casefolded[] = { 0x006A, 0x030C, 0x0323 };
193 static const uint32_t casefolded_decomposed[] = { 0x006A, 0x0323, 0x030C };
194 static const uint32_t casefolded_normalized[] = { 0x01F0, 0x0323 };
195 ASSERT (check (input, SIZEOF (input), NULL, NULL, casefolded, SIZEOF (casefolded)) == 0);
196 ASSERT (check (input, SIZEOF (input), NULL, UNINORM_NFD, casefolded_decomposed, SIZEOF (casefolded_decomposed)) == 0);
197 ASSERT (check (input, SIZEOF (input), NULL, UNINORM_NFC, casefolded_normalized, SIZEOF (casefolded_normalized)) == 0);
200 return 0;