1 /* Test of Unicode compliance of normalization of UTF-32 strings.
2 Copyright (C) 2009-2018 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 /* Written by Bruno Haible <bruno@clisp.org>, 2009. */
22 #include "test-u32-normalize-big.h"
24 #if GNULIB_TEST_UNINORM_U32_NORMALIZE
33 #define ASSERT_WITH_LINE(expr, file, line) \
38 fprintf (stderr, "%s:%d: assertion failed for %s:%u\n", \
39 __FILE__, __LINE__, file, line); \
47 cmp_ucs4_t (const void *a
, const void *b
)
49 ucs4_t a_value
= *(const ucs4_t
*)a
;
50 ucs4_t b_value
= *(const ucs4_t
*)b
;
51 return (a_value
< b_value
? -1 : a_value
> b_value
? 1 : 0);
55 read_normalization_test_file (const char *filename
,
56 struct normalization_test_file
*file
)
61 struct normalization_test_line
*lines
;
63 size_t lines_allocated
;
65 stream
= fopen (filename
, "r");
68 fprintf (stderr
, "error during fopen of '%s'\n", filename
);
72 for (part_index
= 0; part_index
< 4; part_index
++)
74 file
->parts
[part_index
].lines
= NULL
;
75 file
->parts
[part_index
].lines_length
= 0;
90 struct normalization_test_line line
;
91 size_t sequence_index
;
100 if (c
== EOF
|| c
== '\n')
104 while (ptr
< buf
+ 1000);
109 /* Ignore empty lines and comment lines. */
110 if (buf
[0] == '\0' || buf
[0] == '#')
113 /* Handle lines that introduce a new part. */
116 /* Switch to the next part. */
120 (struct normalization_test_line
*)
121 xnrealloc (lines
, lines_length
, sizeof (struct normalization_test_line
));
122 file
->parts
[part_index
].lines
= lines
;
123 file
->parts
[part_index
].lines_length
= lines_length
;
132 /* It's a line containing 5 sequences of Unicode characters.
133 Parse it and append it to the current part. */
134 if (!(part_index
>= 0 && part_index
< 4))
136 fprintf (stderr
, "unexpected structure of '%s'\n", filename
);
140 line
.lineno
= lineno
;
141 for (sequence_index
= 0; sequence_index
< 5; sequence_index
++)
142 line
.sequences
[sequence_index
] = NULL
;
143 for (sequence_index
= 0; sequence_index
< 5; sequence_index
++)
145 uint32_t *sequence
= XNMALLOC (1, uint32_t);
146 size_t sequence_length
= 0;
153 uc
= strtoul (ptr
, &endptr
, 16);
158 /* Append uc to the sequence. */
161 xnrealloc (sequence
, sequence_length
+ 2, sizeof (uint32_t));
162 sequence
[sequence_length
] = uc
;
168 if (sequence_length
== 0)
170 fprintf (stderr
, "empty character sequence in '%s'\n", filename
);
173 sequence
[sequence_length
] = 0; /* terminator */
175 line
.sequences
[sequence_index
] = sequence
;
179 fprintf (stderr
, "error parsing '%s'\n", filename
);
185 /* Append the line to the current part. */
186 if (lines_length
== lines_allocated
)
188 lines_allocated
= 2 * lines_allocated
;
189 if (lines_allocated
< 7)
192 (struct normalization_test_line
*)
193 xnrealloc (lines
, lines_allocated
, sizeof (struct normalization_test_line
));
195 lines
[lines_length
] = line
;
202 (struct normalization_test_line
*)
203 xnrealloc (lines
, lines_length
, sizeof (struct normalization_test_line
));
204 file
->parts
[part_index
].lines
= lines
;
205 file
->parts
[part_index
].lines_length
= lines_length
;
209 /* Collect all c1 values from the part 1 in an array. */
210 const struct normalization_test_part
*p
= &file
->parts
[1];
211 ucs4_t
*c1_array
= XNMALLOC (p
->lines_length
+ 1, ucs4_t
);
214 for (line_index
= 0; line_index
< p
->lines_length
; line_index
++)
216 const uint32_t *sequence
= p
->lines
[line_index
].sequences
[0];
217 /* In part 1, every sequences[0] consists of a single character. */
218 if (!(sequence
[0] != 0 && sequence
[1] == 0))
220 c1_array
[line_index
] = sequence
[0];
223 /* Sort this array. */
224 qsort (c1_array
, p
->lines_length
, sizeof (ucs4_t
), cmp_ucs4_t
);
226 /* Add the sentinel at the end. */
227 c1_array
[p
->lines_length
] = 0x110000;
229 file
->part1_c1_sorted
= c1_array
;
232 file
->filename
= xstrdup (filename
);
234 if (ferror (stream
) || fclose (stream
))
236 fprintf (stderr
, "error reading from '%s'\n", filename
);
242 test_specific (const struct normalization_test_file
*file
,
243 int (*check
) (const uint32_t *c1
, size_t c1_length
,
244 const uint32_t *c2
, size_t c2_length
,
245 const uint32_t *c3
, size_t c3_length
,
246 const uint32_t *c4
, size_t c4_length
,
247 const uint32_t *c5
, size_t c5_length
))
251 for (part_index
= 0; part_index
< 4; part_index
++)
253 const struct normalization_test_part
*p
= &file
->parts
[part_index
];
256 for (line_index
= 0; line_index
< p
->lines_length
; line_index
++)
258 const struct normalization_test_line
*l
= &p
->lines
[line_index
];
260 ASSERT_WITH_LINE (check (l
->sequences
[0], u32_strlen (l
->sequences
[0]),
261 l
->sequences
[1], u32_strlen (l
->sequences
[1]),
262 l
->sequences
[2], u32_strlen (l
->sequences
[2]),
263 l
->sequences
[3], u32_strlen (l
->sequences
[3]),
264 l
->sequences
[4], u32_strlen (l
->sequences
[4]))
266 file
->filename
, l
->lineno
);
272 test_other (const struct normalization_test_file
*file
, uninorm_t nf
)
274 /* Check that for every character not listed in part 1 of the
275 NormalizationTest.txt file, the character maps to itself in each
276 of the four normalization forms. */
277 const ucs4_t
*p
= file
->part1_c1_sorted
;
280 for (uc
= 0; uc
< 0x110000; uc
++)
282 if (uc
>= 0xD800 && uc
< 0xE000)
284 /* A surrogate, not a character. Skip uc. */
298 result
= u32_normalize (nf
, input
, 1, NULL
, &length
);
299 ASSERT (result
!= NULL
&& length
== 1 && result
[0] == uc
);