spawn-pipe tests: Fix test failure with MSVC.
[gnulib.git] / tests / uninorm / test-u8-nfc.c
blobd91c1d43d380dea87f89c8acbff32bba8b60edbf
1 /* Test of canonical normalization of UTF-8 strings.
2 Copyright (C) 2009-2020 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 /* Written by Bruno Haible <bruno@clisp.org>, 2009. */
19 #include <config.h>
21 #if GNULIB_TEST_UNINORM_U8_NORMALIZE
23 #include "uninorm.h"
25 #include <signal.h>
26 #include <stdlib.h>
27 #include <unistd.h>
29 #include "unistr.h"
30 #include "macros.h"
32 static int
33 check (const uint8_t *input, size_t input_length,
34 const uint8_t *expected, size_t expected_length)
36 size_t length;
37 uint8_t *result;
39 /* Test return conventions with resultbuf == NULL. */
40 result = u8_normalize (UNINORM_NFC, input, input_length, NULL, &length);
41 if (!(result != NULL))
42 return 1;
43 if (!(length == expected_length))
44 return 2;
45 if (!(u8_cmp (result, expected, expected_length) == 0))
46 return 3;
47 free (result);
49 /* Test return conventions with resultbuf too small. */
50 if (expected_length > 0)
52 uint8_t *preallocated;
54 length = expected_length - 1;
55 preallocated = (uint8_t *) malloc (length * sizeof (uint8_t));
56 result = u8_normalize (UNINORM_NFC, input, input_length, preallocated, &length);
57 if (!(result != NULL))
58 return 4;
59 if (!(result != preallocated))
60 return 5;
61 if (!(length == expected_length))
62 return 6;
63 if (!(u8_cmp (result, expected, expected_length) == 0))
64 return 7;
65 free (result);
66 free (preallocated);
69 /* Test return conventions with resultbuf large enough. */
71 uint8_t *preallocated;
73 length = expected_length;
74 preallocated = (uint8_t *) malloc (length * sizeof (uint8_t));
75 result = u8_normalize (UNINORM_NFC, input, input_length, preallocated, &length);
76 if (!(result != NULL))
77 return 8;
78 if (!(preallocated == NULL || result == preallocated))
79 return 9;
80 if (!(length == expected_length))
81 return 10;
82 if (!(u8_cmp (result, expected, expected_length) == 0))
83 return 11;
84 free (preallocated);
87 return 0;
90 void
91 test_u8_nfc (void)
93 { /* Empty string. */
94 ASSERT (check (NULL, 0, NULL, 0) == 0);
96 { /* SPACE */
97 static const uint8_t input[] = { 0x20 };
98 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
101 { /* LATIN CAPITAL LETTER A WITH DIAERESIS */
102 static const uint8_t input[] = { 0xC3, 0x84 };
103 static const uint8_t decomposed[] = { 0x41, 0xCC, 0x88 };
104 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
105 ASSERT (check (decomposed, SIZEOF (decomposed), input, SIZEOF (input)) == 0);
108 { /* LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON */
109 static const uint8_t input[] = { 0xC7, 0x9E };
110 static const uint8_t decomposed[] = { 0x41, 0xCC, 0x88, 0xCC, 0x84 };
111 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
112 ASSERT (check (decomposed, SIZEOF (decomposed), input, SIZEOF (input)) == 0);
115 { /* ANGSTROM SIGN */
116 static const uint8_t input[] = { 0xE2, 0x84, 0xAB };
117 static const uint8_t decomposed[] = { 0x41, 0xCC, 0x8A };
118 static const uint8_t expected[] = { 0xC3, 0x85 };
119 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
120 ASSERT (check (decomposed, SIZEOF (decomposed), expected, SIZEOF (expected)) == 0);
121 ASSERT (check (expected, SIZEOF (expected), expected, SIZEOF (expected)) == 0);
124 { /* GREEK DIALYTIKA AND PERISPOMENI */
125 static const uint8_t input[] = { 0xE1, 0xBF, 0x81 };
126 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
129 { /* SCRIPT SMALL L */
130 static const uint8_t input[] = { 0xE2, 0x84, 0x93 };
131 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
134 { /* NO-BREAK SPACE */
135 static const uint8_t input[] = { 0xC2, 0xA0 };
136 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
139 { /* ARABIC LETTER VEH INITIAL FORM */
140 static const uint8_t input[] = { 0xEF, 0xAD, 0xAC };
141 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
144 { /* ARABIC LETTER VEH MEDIAL FORM */
145 static const uint8_t input[] = { 0xEF, 0xAD, 0xAD };
146 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
149 { /* ARABIC LETTER VEH FINAL FORM */
150 static const uint8_t input[] = { 0xEF, 0xAD, 0xAB };
151 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
154 { /* ARABIC LETTER VEH ISOLATED FORM */
155 static const uint8_t input[] = { 0xEF, 0xAD, 0xAA };
156 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
159 { /* CIRCLED NUMBER FIFTEEN */
160 static const uint8_t input[] = { 0xE2, 0x91, 0xAE };
161 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
164 { /* TRADE MARK SIGN */
165 static const uint8_t input[] = { 0xE2, 0x84, 0xA2 };
166 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
169 { /* LATIN SUBSCRIPT SMALL LETTER I */
170 static const uint8_t input[] = { 0xE1, 0xB5, 0xA2 };
171 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
174 { /* PRESENTATION FORM FOR VERTICAL LEFT PARENTHESIS */
175 static const uint8_t input[] = { 0xEF, 0xB8, 0xB5 };
176 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
179 { /* FULLWIDTH LATIN CAPITAL LETTER A */
180 static const uint8_t input[] = { 0xEF, 0xBC, 0xA1 };
181 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
184 { /* HALFWIDTH IDEOGRAPHIC COMMA */
185 static const uint8_t input[] = { 0xEF, 0xBD, 0xA4 };
186 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
189 { /* SMALL IDEOGRAPHIC COMMA */
190 static const uint8_t input[] = { 0xEF, 0xB9, 0x91 };
191 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
194 { /* SQUARE MHZ */
195 static const uint8_t input[] = { 0xE3, 0x8E, 0x92 };
196 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
199 { /* VULGAR FRACTION THREE EIGHTHS */
200 static const uint8_t input[] = { 0xE2, 0x85, 0x9C };
201 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
204 { /* MICRO SIGN */
205 static const uint8_t input[] = { 0xC2, 0xB5 };
206 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
209 { /* ARABIC LIGATURE SALLALLAHOU ALAYHE WASALLAM */
210 static const uint8_t input[] = { 0xEF, 0xB7, 0xBA };
211 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
214 { /* HANGUL SYLLABLE GEUL */
215 static const uint8_t input[] = { 0xEA, 0xB8, 0x80 };
216 static const uint8_t decomposed[] =
217 { 0xE1, 0x84, 0x80, 0xE1, 0x85, 0xB3, 0xE1, 0x86, 0xAF };
218 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
219 ASSERT (check (decomposed, SIZEOF (decomposed), input, SIZEOF (input)) == 0);
222 { /* HANGUL SYLLABLE GEU */
223 static const uint8_t input[] = { 0xEA, 0xB7, 0xB8 };
224 static const uint8_t decomposed[] = { 0xE1, 0x84, 0x80, 0xE1, 0x85, 0xB3 };
225 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
226 ASSERT (check (decomposed, SIZEOF (decomposed), input, SIZEOF (input)) == 0);
229 { /* "Grüß Gott. Здравствуйте! x=(-b±sqrt(b²-4ac))/(2a) 日本語,中文,한글" */
230 static const uint8_t input[] =
231 { 'G', 'r', 0xC3, 0xBC, 0xC3, 0x9F, ' ', 'G', 'o', 't', 't', '.',
232 ' ', 0xD0, 0x97, 0xD0, 0xB4, 0xD1, 0x80, 0xD0, 0xB0, 0xD0, 0xB2, 0xD1,
233 0x81, 0xD1, 0x82, 0xD0, 0xB2, 0xD1, 0x83, 0xD0, 0xB9,
234 0xD1, 0x82, 0xD0, 0xB5, '!', ' ', 'x', '=', '(', '-', 'b', 0xC2, 0xB1,
235 's', 'q', 'r', 't', '(', 'b', 0xC2, 0xB2, '-', '4', 'a', 'c', ')', ')',
236 '/', '(', '2', 'a', ')', ' ', ' ', 0xE6, 0x97, 0xA5, 0xE6, 0x9C, 0xAC,
237 0xE8, 0xAA, 0x9E, ',', 0xE4, 0xB8, 0xAD, 0xE6, 0x96, 0x87, ',',
238 0xED, 0x95, 0x9C,
239 0xEA, 0xB8, 0x80, '\n'
241 static const uint8_t decomposed[] =
242 { 'G', 'r', 0x75, 0xCC, 0x88, 0xC3, 0x9F, ' ', 'G', 'o', 't', 't', '.',
243 ' ', 0xD0, 0x97, 0xD0, 0xB4, 0xD1, 0x80, 0xD0, 0xB0, 0xD0, 0xB2, 0xD1,
244 0x81, 0xD1, 0x82, 0xD0, 0xB2, 0xD1, 0x83, 0xD0, 0xB8, 0xCC, 0x86,
245 0xD1, 0x82, 0xD0, 0xB5, '!', ' ', 'x', '=', '(', '-', 'b', 0xC2, 0xB1,
246 's', 'q', 'r', 't', '(', 'b', 0xC2, 0xB2, '-', '4', 'a', 'c', ')', ')',
247 '/', '(', '2', 'a', ')', ' ', ' ', 0xE6, 0x97, 0xA5, 0xE6, 0x9C, 0xAC,
248 0xE8, 0xAA, 0x9E, ',', 0xE4, 0xB8, 0xAD, 0xE6, 0x96, 0x87, ',',
249 0xE1, 0x84, 0x92, 0xE1, 0x85, 0xA1, 0xE1, 0x86, 0xAB,
250 0xE1, 0x84, 0x80, 0xE1, 0x85, 0xB3, 0xE1, 0x86, 0xAF, '\n'
252 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
253 ASSERT (check (decomposed, SIZEOF (decomposed), input, SIZEOF (input)) == 0);
256 #if HAVE_DECL_ALARM
257 /* Declare failure if test takes too long, by using default abort
258 caused by SIGALRM. */
259 signal (SIGALRM, SIG_DFL);
260 alarm (50);
261 #endif
263 /* Check that the sorting is not O(n²) but O(n log n). */
265 int pass;
266 for (pass = 0; pass < 3; pass++)
268 size_t repeat = 1;
269 size_t m = 100000;
270 uint8_t *input = (uint8_t *) malloc (2 * (2 * m - 1) * sizeof (uint8_t));
271 if (input != NULL)
273 uint8_t *expected = input + (2 * m - 1);
274 size_t m1 = m / 2;
275 size_t m2 = (m - 1) / 2;
276 /* NB: m1 + m2 == m - 1. */
277 uint8_t *p;
278 size_t i;
280 input[0] = 0x41;
281 p = input + 1;
282 switch (pass)
284 case 0:
285 for (i = 0; i < m1; i++)
287 *p++ = 0xCC;
288 *p++ = 0x99;
290 for (i = 0; i < m2; i++)
292 *p++ = 0xCC;
293 *p++ = 0x80;
295 break;
297 case 1:
298 for (i = 0; i < m2; i++)
300 *p++ = 0xCC;
301 *p++ = 0x80;
303 for (i = 0; i < m1; i++)
305 *p++ = 0xCC;
306 *p++ = 0x99;
308 break;
310 case 2:
311 for (i = 0; i < m2; i++)
313 *p++ = 0xCC;
314 *p++ = 0x99;
315 *p++ = 0xCC;
316 *p++ = 0x80;
318 for (; i < m1; i++)
320 *p++ = 0xCC;
321 *p++ = 0x99;
323 break;
325 default:
326 abort ();
329 expected[0] = 0xC3;
330 expected[1] = 0x80;
331 p = expected + 2;
332 for (i = 0; i < m1; i++)
334 *p++ = 0xCC;
335 *p++ = 0x99;
337 for (i = 0; i < m2 - 1; i++)
339 *p++ = 0xCC;
340 *p++ = 0x80;
343 for (; repeat > 0; repeat--)
345 ASSERT (check (input, 2 * m - 1, expected, 2 * m - 2) == 0);
346 ASSERT (check (expected, 2 * m - 2, expected, 2 * m - 2) == 0);
349 free (input);
355 #else
357 void
358 test_u8_nfc (void)
362 #endif