1 /* Test of case and normalization insensitive comparison of UTF-16 strings.
2 Copyright (C) 2009-2020 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 /* Written by Bruno Haible <bruno@clisp.org>, 2009. */
27 #include "test-casecmp.h"
31 test_nonascii (int (*my_casecmp
) (const uint16_t *, size_t, const uint16_t *, size_t, const char *, uninorm_t
, int *))
33 /* Normalization effects. */
35 static const uint16_t input1
[] = { 'H', 0x00F6, 'h', 'l', 'e' };
36 static const uint16_t input2
[] = { 'H', 'O', 0x0308, 'h', 'L', 'e' };
37 static const uint16_t input3
[] = { 'H', 0x00F6, 'h', 'l', 'e', 'n' };
38 static const uint16_t input4
[] = { 'H', 'O', 0x0308, 'h', 'L', 'e', 'n' };
39 static const uint16_t input5
[] = { 'H', 'u', 'r', 'z' };
42 ASSERT (my_casecmp (input1
, SIZEOF (input1
), input2
, SIZEOF (input2
), NULL
, UNINORM_NFD
, &cmp
) == 0);
45 ASSERT (my_casecmp (input2
, SIZEOF (input2
), input1
, SIZEOF (input1
), NULL
, UNINORM_NFD
, &cmp
) == 0);
48 ASSERT (my_casecmp (input3
, SIZEOF (input3
), input4
, SIZEOF (input4
), NULL
, UNINORM_NFD
, &cmp
) == 0);
51 ASSERT (my_casecmp (input4
, SIZEOF (input4
), input3
, SIZEOF (input3
), NULL
, UNINORM_NFD
, &cmp
) == 0);
54 ASSERT (my_casecmp (input2
, SIZEOF (input2
), input3
, SIZEOF (input3
), NULL
, UNINORM_NFD
, &cmp
) == 0);
57 ASSERT (my_casecmp (input1
, SIZEOF (input1
), input4
, SIZEOF (input4
), NULL
, UNINORM_NFD
, &cmp
) == 0);
60 ASSERT (my_casecmp (input1
, SIZEOF (input1
), input5
, SIZEOF (input5
), NULL
, UNINORM_NFD
, &cmp
) == 0);
63 ASSERT (my_casecmp (input2
, SIZEOF (input2
), input5
, SIZEOF (input5
), NULL
, UNINORM_NFD
, &cmp
) == 0);
66 { /* LATIN CAPITAL LETTER A WITH DIAERESIS */
67 static const uint16_t input1
[] = { 0x00C4 };
68 static const uint16_t input2
[] = { 0x0041, 0x0308 };
71 ASSERT (my_casecmp (input1
, SIZEOF (input1
), input2
, SIZEOF (input2
), NULL
, UNINORM_NFD
, &cmp
) == 0);
74 { /* LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON */
75 static const uint16_t input1
[] = { 0x01DE };
76 static const uint16_t input2
[] = { 0x0041, 0x0308, 0x0304 };
79 ASSERT (my_casecmp (input1
, SIZEOF (input1
), input2
, SIZEOF (input2
), NULL
, UNINORM_NFD
, &cmp
) == 0);
82 { /* GREEK DIALYTIKA AND PERISPOMENI */
83 static const uint16_t input1
[] = { 0x1FC1 };
84 static const uint16_t input2
[] = { 0x00A8, 0x0342 };
87 ASSERT (my_casecmp (input1
, SIZEOF (input1
), input2
, SIZEOF (input2
), NULL
, UNINORM_NFD
, &cmp
) == 0);
90 { /* HANGUL SYLLABLE GEUL */
91 static const uint16_t input1
[] = { 0xAE00 };
92 static const uint16_t input2
[] = { 0xADF8, 0x11AF };
93 static const uint16_t input3
[] = { 0x1100, 0x1173, 0x11AF };
96 ASSERT (my_casecmp (input1
, SIZEOF (input1
), input2
, SIZEOF (input2
), NULL
, UNINORM_NFD
, &cmp
) == 0);
99 ASSERT (my_casecmp (input1
, SIZEOF (input1
), input3
, SIZEOF (input3
), NULL
, UNINORM_NFD
, &cmp
) == 0);
102 { /* HANGUL SYLLABLE GEU */
103 static const uint16_t input1
[] = { 0xADF8 };
104 static const uint16_t input2
[] = { 0x1100, 0x1173 };
107 ASSERT (my_casecmp (input1
, SIZEOF (input1
), input2
, SIZEOF (input2
), NULL
, UNINORM_NFD
, &cmp
) == 0);
112 { /* "Grüß Gott. Здравствуйте! x=(-b±sqrt(b²-4ac))/(2a) 日本語,中文,한글" */
113 static const uint16_t input1
[] =
114 { 'G', 'r', 0x00FC, 0x00DF, ' ', 'G', 'o', 't', 't', '.', ' ',
115 0x0417, 0x0434, 0x0440, 0x0430, 0x0432, 0x0441, 0x0442, 0x0432, 0x0443,
116 0x0439, 0x0442, 0x0435, '!', ' ',
117 'x', '=', '(', '-', 'b', 0x00B1, 's', 'q', 'r', 't', '(', 'b', 0x00B2,
118 '-', '4', 'a', 'c', ')', ')', '/', '(', '2', 'a', ')', ' ', ' ',
119 0x65E5, 0x672C, 0x8A9E, ',', 0x4E2D, 0x6587, ',', 0xD55C, 0xAE00, '\n'
121 static const uint16_t input2
[] =
122 { 'g', 'r', 0x00FC, 0x0073, 0x0073, ' ', 'g', 'o', 't', 't', '.', ' ',
123 0x0437, 0x0434, 0x0440, 0x0430, 0x0432, 0x0441, 0x0442, 0x0432, 0x0443,
124 0x0439, 0x0442, 0x0435, '!', ' ',
125 'x', '=', '(', '-', 'b', 0x00B1, 's', 'q', 'r', 't', '(', 'b', 0x00B2,
126 '-', '4', 'a', 'c', ')', ')', '/', '(', '2', 'a', ')', ' ', ' ',
127 0x65E5, 0x672C, 0x8A9E, ',', 0x4E2D, 0x6587, ',', 0xD55C, 0xAE00, '\n'
129 static const uint16_t input3
[] =
130 { 'G', 'R', 0x00DC, 0x0053, 0x0053, ' ', 'G', 'O', 'T', 'T', '.', ' ',
131 0x0417, 0x0414, 0x0420, 0x0410, 0x0412, 0x0421, 0x0422, 0x0412, 0x0423,
132 0x0419, 0x0422, 0x0415, '!', ' ',
133 'X', '=', '(', '-', 'B', 0x00B1, 'S', 'Q', 'R', 'T', '(', 'B', 0x00B2,
134 '-', '4', 'A', 'C', ')', ')', '/', '(', '2', 'A', ')', ' ', ' ',
135 0x65E5, 0x672C, 0x8A9E, ',', 0x4E2D, 0x6587, ',', 0xD55C, 0xAE00, '\n'
139 ASSERT (my_casecmp (input1
, SIZEOF (input1
), input2
, SIZEOF (input2
), NULL
, NULL
, &cmp
) == 0);
142 ASSERT (my_casecmp (input1
, SIZEOF (input1
), input2
, SIZEOF (input2
), NULL
, UNINORM_NFD
, &cmp
) == 0);
145 ASSERT (my_casecmp (input1
, SIZEOF (input1
), input3
, SIZEOF (input3
), NULL
, NULL
, &cmp
) == 0);
148 ASSERT (my_casecmp (input1
, SIZEOF (input1
), input3
, SIZEOF (input3
), NULL
, UNINORM_NFD
, &cmp
) == 0);
151 ASSERT (my_casecmp (input2
, SIZEOF (input2
), input3
, SIZEOF (input3
), NULL
, NULL
, &cmp
) == 0);
154 ASSERT (my_casecmp (input2
, SIZEOF (input2
), input3
, SIZEOF (input3
), NULL
, UNINORM_NFD
, &cmp
) == 0);
158 /* Case mapping can increase the number of Unicode characters. */
159 { /* LATIN SMALL LETTER N PRECEDED BY APOSTROPHE */
160 static const uint16_t input1
[] = { 0x0149 };
161 static const uint16_t input2
[] = { 0x02BC, 0x006E };
162 static const uint16_t input3
[] = { 0x02BC, 0x004E };
165 ASSERT (my_casecmp (input1
, SIZEOF (input1
), input2
, SIZEOF (input2
), NULL
, NULL
, &cmp
) == 0);
168 ASSERT (my_casecmp (input1
, SIZEOF (input1
), input2
, SIZEOF (input2
), NULL
, UNINORM_NFD
, &cmp
) == 0);
171 ASSERT (my_casecmp (input1
, SIZEOF (input1
), input3
, SIZEOF (input3
), NULL
, NULL
, &cmp
) == 0);
174 ASSERT (my_casecmp (input1
, SIZEOF (input1
), input3
, SIZEOF (input3
), NULL
, UNINORM_NFD
, &cmp
) == 0);
177 { /* GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS */
178 static const uint16_t input1
[] = { 0x0390 };
179 static const uint16_t input2
[] = { 0x03B9, 0x0308, 0x0301 };
182 ASSERT (my_casecmp (input1
, SIZEOF (input1
), input2
, SIZEOF (input2
), NULL
, NULL
, &cmp
) == 0);
185 ASSERT (my_casecmp (input1
, SIZEOF (input1
), input2
, SIZEOF (input2
), NULL
, UNINORM_NFD
, &cmp
) == 0);
189 /* Turkish letters i İ ı I */
190 { /* LATIN CAPITAL LETTER I */
191 static const uint16_t input
[] = { 0x0049 };
192 static const uint16_t casefolded
[] = { 0x0069 };
193 static const uint16_t casefolded_tr
[] = { 0x0131 };
196 ASSERT (my_casecmp (input
, SIZEOF (input
), casefolded
, SIZEOF (casefolded
), NULL
, NULL
, &cmp
) == 0);
199 ASSERT (my_casecmp (input
, SIZEOF (input
), casefolded_tr
, SIZEOF (casefolded_tr
), "tr", NULL
, &cmp
) == 0);
202 { /* LATIN SMALL LETTER I */
203 static const uint16_t input
[] = { 0x0069 };
204 static const uint16_t casefolded
[] = { 0x0049 };
205 static const uint16_t casefolded_tr
[] = { 0x0130 };
208 ASSERT (my_casecmp (input
, SIZEOF (input
), casefolded
, SIZEOF (casefolded
), NULL
, NULL
, &cmp
) == 0);
211 ASSERT (my_casecmp (input
, SIZEOF (input
), casefolded_tr
, SIZEOF (casefolded_tr
), "tr", NULL
, &cmp
) == 0);
214 { /* LATIN CAPITAL LETTER I WITH DOT ABOVE */
215 static const uint16_t input
[] = { 0x0130 };
216 static const uint16_t casefolded
[] = { 0x0069, 0x0307 };
217 static const uint16_t casefolded_tr
[] = { 0x0069 };
220 ASSERT (my_casecmp (input
, SIZEOF (input
), casefolded
, SIZEOF (casefolded
), NULL
, NULL
, &cmp
) == 0);
223 ASSERT (my_casecmp (input
, SIZEOF (input
), casefolded_tr
, SIZEOF (casefolded_tr
), "tr", NULL
, &cmp
) == 0);
226 { /* LATIN SMALL LETTER DOTLESS I */
227 static const uint16_t input
[] = { 0x0131 };
228 static const uint16_t casefolded
[] = { 0x0049 };
231 ASSERT (my_casecmp (input
, SIZEOF (input
), casefolded
, SIZEOF (casefolded
), NULL
, NULL
, &cmp
) == 0);
234 ASSERT (my_casecmp (input
, SIZEOF (input
), casefolded
, SIZEOF (casefolded
), "tr", NULL
, &cmp
) == 0);
238 static const uint16_t input
[] =
239 { 0x0054, 0x004F, 0x0050, 0x004B, 0x0041, 0x0050, 0x0049 };
240 static const uint16_t casefolded
[] =
241 { 0x0074, 0x006F, 0x0070, 0x006B, 0x0061, 0x0070, 0x0131 };
244 ASSERT (my_casecmp (input
, SIZEOF (input
), casefolded
, SIZEOF (casefolded
), NULL
, NULL
, &cmp
) == 0);
247 ASSERT (my_casecmp (input
, SIZEOF (input
), casefolded
, SIZEOF (casefolded
), "tr", NULL
, &cmp
) == 0);
251 /* Uppercasing can increase the number of Unicode characters. */
253 static const uint16_t input1
[] = { 0x0068, 0x0065, 0x0069, 0x00DF };
254 static const uint16_t input2
[] = { 0x0068, 0x0065, 0x0069, 0x0073, 0x0073 };
257 ASSERT (my_casecmp (input1
, SIZEOF (input1
), input2
, SIZEOF (input2
), NULL
, NULL
, &cmp
) == 0);
261 /* Case mappings for some characters can depend on the surrounding characters. */
262 { /* "περισσότερες πληροφορίες" */
263 static const uint16_t input1
[] =
265 0x03C0, 0x03B5, 0x03C1, 0x03B9, 0x03C3, 0x03C3, 0x03CC, 0x03C4,
266 0x03B5, 0x03C1, 0x03B5, 0x03C2, 0x0020, 0x03C0, 0x03BB, 0x03B7,
267 0x03C1, 0x03BF, 0x03C6, 0x03BF, 0x03C1, 0x03AF, 0x03B5, 0x03C2
269 static const uint16_t input2
[] =
271 0x03C0, 0x03B5, 0x03C1, 0x03B9, 0x03C3, 0x03C3, 0x03CC, 0x03C4,
272 0x03B5, 0x03C1, 0x03B5, 0x03C3, 0x0020, 0x03C0, 0x03BB, 0x03B7,
273 0x03C1, 0x03BF, 0x03C6, 0x03BF, 0x03C1, 0x03AF, 0x03B5, 0x03C3
275 static const uint16_t input3
[] =
277 0x03A0, 0x0395, 0x03A1, 0x0399, 0x03A3, 0x03A3, 0x038C, 0x03A4,
278 0x0395, 0x03A1, 0x0395, 0x03A3, 0x0020, 0x03A0, 0x039B, 0x0397,
279 0x03A1, 0x039F, 0x03A6, 0x039F, 0x03A1, 0x038A, 0x0395, 0x03A3
283 ASSERT (my_casecmp (input1
, SIZEOF (input1
), input2
, SIZEOF (input2
), NULL
, NULL
, &cmp
) == 0);
286 ASSERT (my_casecmp (input1
, SIZEOF (input1
), input3
, SIZEOF (input3
), NULL
, NULL
, &cmp
) == 0);
289 ASSERT (my_casecmp (input2
, SIZEOF (input2
), input3
, SIZEOF (input3
), NULL
, NULL
, &cmp
) == 0);
293 /* Case mapping can require subsequent normalization. */
294 { /* LATIN SMALL LETTER J WITH CARON, COMBINING DOT BELOW */
295 static const uint16_t input
[] = { 0x01F0, 0x0323 };
296 static const uint16_t casefolded
[] = { 0x006A, 0x030C, 0x0323 };
297 static const uint16_t casefolded_decomposed
[] = { 0x006A, 0x0323, 0x030C };
300 ASSERT (my_casecmp (input
, SIZEOF (input
), casefolded
, SIZEOF (casefolded
), NULL
, NULL
, &cmp
) == 0);
303 ASSERT (my_casecmp (input
, SIZEOF (input
), casefolded_decomposed
, SIZEOF (casefolded_decomposed
), NULL
, NULL
, &cmp
) == 0);
306 ASSERT (my_casecmp (input
, SIZEOF (input
), casefolded
, SIZEOF (casefolded
), NULL
, UNINORM_NFD
, &cmp
) == 0);
309 ASSERT (my_casecmp (input
, SIZEOF (input
), casefolded_decomposed
, SIZEOF (casefolded_decomposed
), NULL
, UNINORM_NFD
, &cmp
) == 0);
317 test_ascii (u16_casecmp
, UNINORM_NFD
);
318 test_nonascii (u16_casecmp
);