sigprocmask: Fix configuration failure on Solaris 10 (regr. 2020-07-25).
[gnulib.git] / tests / unicase / test-u16-casecmp.c
blobd82b482bd5e83f42cb4d91a5e10d78c3c8298d77
1 /* Test of case and normalization insensitive comparison of UTF-16 strings.
2 Copyright (C) 2009-2020 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 /* Written by Bruno Haible <bruno@clisp.org>, 2009. */
19 #include <config.h>
21 #include "unicase.h"
23 #include "uninorm.h"
24 #include "macros.h"
26 #define UNIT uint16_t
27 #include "test-casecmp.h"
28 #undef UNIT
30 static void
31 test_nonascii (int (*my_casecmp) (const uint16_t *, size_t, const uint16_t *, size_t, const char *, uninorm_t, int *))
33 /* Normalization effects. */
35 static const uint16_t input1[] = { 'H', 0x00F6, 'h', 'l', 'e' };
36 static const uint16_t input2[] = { 'H', 'O', 0x0308, 'h', 'L', 'e' };
37 static const uint16_t input3[] = { 'H', 0x00F6, 'h', 'l', 'e', 'n' };
38 static const uint16_t input4[] = { 'H', 'O', 0x0308, 'h', 'L', 'e', 'n' };
39 static const uint16_t input5[] = { 'H', 'u', 'r', 'z' };
40 int cmp;
42 ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, UNINORM_NFD, &cmp) == 0);
43 ASSERT (cmp == 0);
45 ASSERT (my_casecmp (input2, SIZEOF (input2), input1, SIZEOF (input1), NULL, UNINORM_NFD, &cmp) == 0);
46 ASSERT (cmp == 0);
48 ASSERT (my_casecmp (input3, SIZEOF (input3), input4, SIZEOF (input4), NULL, UNINORM_NFD, &cmp) == 0);
49 ASSERT (cmp == 0);
51 ASSERT (my_casecmp (input4, SIZEOF (input4), input3, SIZEOF (input3), NULL, UNINORM_NFD, &cmp) == 0);
52 ASSERT (cmp == 0);
54 ASSERT (my_casecmp (input2, SIZEOF (input2), input3, SIZEOF (input3), NULL, UNINORM_NFD, &cmp) == 0);
55 ASSERT (cmp == -1);
57 ASSERT (my_casecmp (input1, SIZEOF (input1), input4, SIZEOF (input4), NULL, UNINORM_NFD, &cmp) == 0);
58 ASSERT (cmp == -1);
60 ASSERT (my_casecmp (input1, SIZEOF (input1), input5, SIZEOF (input5), NULL, UNINORM_NFD, &cmp) == 0);
61 ASSERT (cmp == -1);
63 ASSERT (my_casecmp (input2, SIZEOF (input2), input5, SIZEOF (input5), NULL, UNINORM_NFD, &cmp) == 0);
64 ASSERT (cmp == -1);
66 { /* LATIN CAPITAL LETTER A WITH DIAERESIS */
67 static const uint16_t input1[] = { 0x00C4 };
68 static const uint16_t input2[] = { 0x0041, 0x0308 };
69 int cmp;
71 ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, UNINORM_NFD, &cmp) == 0);
72 ASSERT (cmp == 0);
74 { /* LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON */
75 static const uint16_t input1[] = { 0x01DE };
76 static const uint16_t input2[] = { 0x0041, 0x0308, 0x0304 };
77 int cmp;
79 ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, UNINORM_NFD, &cmp) == 0);
80 ASSERT (cmp == 0);
82 { /* GREEK DIALYTIKA AND PERISPOMENI */
83 static const uint16_t input1[] = { 0x1FC1 };
84 static const uint16_t input2[] = { 0x00A8, 0x0342 };
85 int cmp;
87 ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, UNINORM_NFD, &cmp) == 0);
88 ASSERT (cmp == 0);
90 { /* HANGUL SYLLABLE GEUL */
91 static const uint16_t input1[] = { 0xAE00 };
92 static const uint16_t input2[] = { 0xADF8, 0x11AF };
93 static const uint16_t input3[] = { 0x1100, 0x1173, 0x11AF };
94 int cmp;
96 ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, UNINORM_NFD, &cmp) == 0);
97 ASSERT (cmp == 0);
99 ASSERT (my_casecmp (input1, SIZEOF (input1), input3, SIZEOF (input3), NULL, UNINORM_NFD, &cmp) == 0);
100 ASSERT (cmp == 0);
102 { /* HANGUL SYLLABLE GEU */
103 static const uint16_t input1[] = { 0xADF8 };
104 static const uint16_t input2[] = { 0x1100, 0x1173 };
105 int cmp;
107 ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, UNINORM_NFD, &cmp) == 0);
108 ASSERT (cmp == 0);
111 /* Simple string. */
112 { /* "Grüß Gott. Здравствуйте! x=(-b±sqrt(b²-4ac))/(2a) 日本語,中文,한글" */
113 static const uint16_t input1[] =
114 { 'G', 'r', 0x00FC, 0x00DF, ' ', 'G', 'o', 't', 't', '.', ' ',
115 0x0417, 0x0434, 0x0440, 0x0430, 0x0432, 0x0441, 0x0442, 0x0432, 0x0443,
116 0x0439, 0x0442, 0x0435, '!', ' ',
117 'x', '=', '(', '-', 'b', 0x00B1, 's', 'q', 'r', 't', '(', 'b', 0x00B2,
118 '-', '4', 'a', 'c', ')', ')', '/', '(', '2', 'a', ')', ' ', ' ',
119 0x65E5, 0x672C, 0x8A9E, ',', 0x4E2D, 0x6587, ',', 0xD55C, 0xAE00, '\n'
121 static const uint16_t input2[] =
122 { 'g', 'r', 0x00FC, 0x0073, 0x0073, ' ', 'g', 'o', 't', 't', '.', ' ',
123 0x0437, 0x0434, 0x0440, 0x0430, 0x0432, 0x0441, 0x0442, 0x0432, 0x0443,
124 0x0439, 0x0442, 0x0435, '!', ' ',
125 'x', '=', '(', '-', 'b', 0x00B1, 's', 'q', 'r', 't', '(', 'b', 0x00B2,
126 '-', '4', 'a', 'c', ')', ')', '/', '(', '2', 'a', ')', ' ', ' ',
127 0x65E5, 0x672C, 0x8A9E, ',', 0x4E2D, 0x6587, ',', 0xD55C, 0xAE00, '\n'
129 static const uint16_t input3[] =
130 { 'G', 'R', 0x00DC, 0x0053, 0x0053, ' ', 'G', 'O', 'T', 'T', '.', ' ',
131 0x0417, 0x0414, 0x0420, 0x0410, 0x0412, 0x0421, 0x0422, 0x0412, 0x0423,
132 0x0419, 0x0422, 0x0415, '!', ' ',
133 'X', '=', '(', '-', 'B', 0x00B1, 'S', 'Q', 'R', 'T', '(', 'B', 0x00B2,
134 '-', '4', 'A', 'C', ')', ')', '/', '(', '2', 'A', ')', ' ', ' ',
135 0x65E5, 0x672C, 0x8A9E, ',', 0x4E2D, 0x6587, ',', 0xD55C, 0xAE00, '\n'
137 int cmp;
139 ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, NULL, &cmp) == 0);
140 ASSERT (cmp == 0);
142 ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, UNINORM_NFD, &cmp) == 0);
143 ASSERT (cmp == 0);
145 ASSERT (my_casecmp (input1, SIZEOF (input1), input3, SIZEOF (input3), NULL, NULL, &cmp) == 0);
146 ASSERT (cmp == 0);
148 ASSERT (my_casecmp (input1, SIZEOF (input1), input3, SIZEOF (input3), NULL, UNINORM_NFD, &cmp) == 0);
149 ASSERT (cmp == 0);
151 ASSERT (my_casecmp (input2, SIZEOF (input2), input3, SIZEOF (input3), NULL, NULL, &cmp) == 0);
152 ASSERT (cmp == 0);
154 ASSERT (my_casecmp (input2, SIZEOF (input2), input3, SIZEOF (input3), NULL, UNINORM_NFD, &cmp) == 0);
155 ASSERT (cmp == 0);
158 /* Case mapping can increase the number of Unicode characters. */
159 { /* LATIN SMALL LETTER N PRECEDED BY APOSTROPHE */
160 static const uint16_t input1[] = { 0x0149 };
161 static const uint16_t input2[] = { 0x02BC, 0x006E };
162 static const uint16_t input3[] = { 0x02BC, 0x004E };
163 int cmp;
165 ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, NULL, &cmp) == 0);
166 ASSERT (cmp == 0);
168 ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, UNINORM_NFD, &cmp) == 0);
169 ASSERT (cmp == 0);
171 ASSERT (my_casecmp (input1, SIZEOF (input1), input3, SIZEOF (input3), NULL, NULL, &cmp) == 0);
172 ASSERT (cmp == 0);
174 ASSERT (my_casecmp (input1, SIZEOF (input1), input3, SIZEOF (input3), NULL, UNINORM_NFD, &cmp) == 0);
175 ASSERT (cmp == 0);
177 { /* GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS */
178 static const uint16_t input1[] = { 0x0390 };
179 static const uint16_t input2[] = { 0x03B9, 0x0308, 0x0301 };
180 int cmp;
182 ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, NULL, &cmp) == 0);
183 ASSERT (cmp == 0);
185 ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, UNINORM_NFD, &cmp) == 0);
186 ASSERT (cmp == 0);
189 /* Turkish letters i İ ı I */
190 { /* LATIN CAPITAL LETTER I */
191 static const uint16_t input[] = { 0x0049 };
192 static const uint16_t casefolded[] = { 0x0069 };
193 static const uint16_t casefolded_tr[] = { 0x0131 };
194 int cmp;
196 ASSERT (my_casecmp (input, SIZEOF (input), casefolded, SIZEOF (casefolded), NULL, NULL, &cmp) == 0);
197 ASSERT (cmp == 0);
199 ASSERT (my_casecmp (input, SIZEOF (input), casefolded_tr, SIZEOF (casefolded_tr), "tr", NULL, &cmp) == 0);
200 ASSERT (cmp == 0);
202 { /* LATIN SMALL LETTER I */
203 static const uint16_t input[] = { 0x0069 };
204 static const uint16_t casefolded[] = { 0x0049 };
205 static const uint16_t casefolded_tr[] = { 0x0130 };
206 int cmp;
208 ASSERT (my_casecmp (input, SIZEOF (input), casefolded, SIZEOF (casefolded), NULL, NULL, &cmp) == 0);
209 ASSERT (cmp == 0);
211 ASSERT (my_casecmp (input, SIZEOF (input), casefolded_tr, SIZEOF (casefolded_tr), "tr", NULL, &cmp) == 0);
212 ASSERT (cmp == 0);
214 { /* LATIN CAPITAL LETTER I WITH DOT ABOVE */
215 static const uint16_t input[] = { 0x0130 };
216 static const uint16_t casefolded[] = { 0x0069, 0x0307 };
217 static const uint16_t casefolded_tr[] = { 0x0069 };
218 int cmp;
220 ASSERT (my_casecmp (input, SIZEOF (input), casefolded, SIZEOF (casefolded), NULL, NULL, &cmp) == 0);
221 ASSERT (cmp == 0);
223 ASSERT (my_casecmp (input, SIZEOF (input), casefolded_tr, SIZEOF (casefolded_tr), "tr", NULL, &cmp) == 0);
224 ASSERT (cmp == 0);
226 { /* LATIN SMALL LETTER DOTLESS I */
227 static const uint16_t input[] = { 0x0131 };
228 static const uint16_t casefolded[] = { 0x0049 };
229 int cmp;
231 ASSERT (my_casecmp (input, SIZEOF (input), casefolded, SIZEOF (casefolded), NULL, NULL, &cmp) == 0);
232 ASSERT (cmp == 1);
234 ASSERT (my_casecmp (input, SIZEOF (input), casefolded, SIZEOF (casefolded), "tr", NULL, &cmp) == 0);
235 ASSERT (cmp == 0);
237 { /* "topkapı" */
238 static const uint16_t input[] =
239 { 0x0054, 0x004F, 0x0050, 0x004B, 0x0041, 0x0050, 0x0049 };
240 static const uint16_t casefolded[] =
241 { 0x0074, 0x006F, 0x0070, 0x006B, 0x0061, 0x0070, 0x0131 };
242 int cmp;
244 ASSERT (my_casecmp (input, SIZEOF (input), casefolded, SIZEOF (casefolded), NULL, NULL, &cmp) == 0);
245 ASSERT (cmp == -1);
247 ASSERT (my_casecmp (input, SIZEOF (input), casefolded, SIZEOF (casefolded), "tr", NULL, &cmp) == 0);
248 ASSERT (cmp == 0);
251 /* Uppercasing can increase the number of Unicode characters. */
252 { /* "heiß" */
253 static const uint16_t input1[] = { 0x0068, 0x0065, 0x0069, 0x00DF };
254 static const uint16_t input2[] = { 0x0068, 0x0065, 0x0069, 0x0073, 0x0073 };
255 int cmp;
257 ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, NULL, &cmp) == 0);
258 ASSERT (cmp == 0);
261 /* Case mappings for some characters can depend on the surrounding characters. */
262 { /* "περισσότερες πληροφορίες" */
263 static const uint16_t input1[] =
265 0x03C0, 0x03B5, 0x03C1, 0x03B9, 0x03C3, 0x03C3, 0x03CC, 0x03C4,
266 0x03B5, 0x03C1, 0x03B5, 0x03C2, 0x0020, 0x03C0, 0x03BB, 0x03B7,
267 0x03C1, 0x03BF, 0x03C6, 0x03BF, 0x03C1, 0x03AF, 0x03B5, 0x03C2
269 static const uint16_t input2[] =
271 0x03C0, 0x03B5, 0x03C1, 0x03B9, 0x03C3, 0x03C3, 0x03CC, 0x03C4,
272 0x03B5, 0x03C1, 0x03B5, 0x03C3, 0x0020, 0x03C0, 0x03BB, 0x03B7,
273 0x03C1, 0x03BF, 0x03C6, 0x03BF, 0x03C1, 0x03AF, 0x03B5, 0x03C3
275 static const uint16_t input3[] =
277 0x03A0, 0x0395, 0x03A1, 0x0399, 0x03A3, 0x03A3, 0x038C, 0x03A4,
278 0x0395, 0x03A1, 0x0395, 0x03A3, 0x0020, 0x03A0, 0x039B, 0x0397,
279 0x03A1, 0x039F, 0x03A6, 0x039F, 0x03A1, 0x038A, 0x0395, 0x03A3
281 int cmp;
283 ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, NULL, &cmp) == 0);
284 ASSERT (cmp == 0);
286 ASSERT (my_casecmp (input1, SIZEOF (input1), input3, SIZEOF (input3), NULL, NULL, &cmp) == 0);
287 ASSERT (cmp == 0);
289 ASSERT (my_casecmp (input2, SIZEOF (input2), input3, SIZEOF (input3), NULL, NULL, &cmp) == 0);
290 ASSERT (cmp == 0);
293 /* Case mapping can require subsequent normalization. */
294 { /* LATIN SMALL LETTER J WITH CARON, COMBINING DOT BELOW */
295 static const uint16_t input[] = { 0x01F0, 0x0323 };
296 static const uint16_t casefolded[] = { 0x006A, 0x030C, 0x0323 };
297 static const uint16_t casefolded_decomposed[] = { 0x006A, 0x0323, 0x030C };
298 int cmp;
300 ASSERT (my_casecmp (input, SIZEOF (input), casefolded, SIZEOF (casefolded), NULL, NULL, &cmp) == 0);
301 ASSERT (cmp == 0);
303 ASSERT (my_casecmp (input, SIZEOF (input), casefolded_decomposed, SIZEOF (casefolded_decomposed), NULL, NULL, &cmp) == 0);
304 ASSERT (cmp != 0);
306 ASSERT (my_casecmp (input, SIZEOF (input), casefolded, SIZEOF (casefolded), NULL, UNINORM_NFD, &cmp) == 0);
307 ASSERT (cmp == 0);
309 ASSERT (my_casecmp (input, SIZEOF (input), casefolded_decomposed, SIZEOF (casefolded_decomposed), NULL, UNINORM_NFD, &cmp) == 0);
310 ASSERT (cmp == 0);
315 main ()
317 test_ascii (u16_casecmp, UNINORM_NFD);
318 test_nonascii (u16_casecmp);
320 return 0;