truncate: Work around trailing slash bug in truncate() on AIX 7.2.
[gnulib.git] / tests / uniname / test-uninames.c
blobabc416a4a428300d5c6c5e60badbc11b31fa3e83
1 /* Test the Unicode character name functions.
2 Copyright (C) 2000-2003, 2005, 2007, 2009-2021 Free Software Foundation,
3 Inc.
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <https://www.gnu.org/licenses/>. */
18 #include <config.h>
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include <string.h>
24 #include "xalloc.h"
25 #include "uniname.h"
27 /* The names according to the UnicodeData.txt file, modified to contain the
28 Hangul syllable names, as described in the Unicode 3.0 book. */
29 static const char * unicode_names [0x110000];
31 /* Maximum entries in unicode_aliases. */
32 #define ALIASLEN 0x200
34 /* The aliases according to the NameAliases.txt file. */
35 struct unicode_alias
37 const char *name;
38 unsigned int uc;
41 static struct unicode_alias unicode_aliases [ALIASLEN];
42 static int aliases_count;
44 /* Stores in unicode_names[] the relevant contents of the UnicodeData.txt
45 file. */
46 static void
47 fill_names (const char *unicodedata_filename)
49 FILE *stream;
50 char *field0;
51 char *field1;
52 char line[1024];
53 int lineno = 0;
55 stream = fopen (unicodedata_filename, "r");
56 if (stream == NULL)
58 fprintf (stderr, "error during fopen of '%s'\n", unicodedata_filename);
59 exit (EXIT_FAILURE);
62 while (fgets (line, sizeof line, stream))
64 char *p;
65 char *comment;
66 unsigned long i;
68 lineno++;
70 comment = strchr (line, '#');
71 if (comment != NULL)
72 *comment = '\0';
73 if (line[strspn (line, " \t\r\n")] == '\0')
74 continue;
76 field0 = p = line;
77 p = strchr (p, ';');
78 if (!p)
80 fprintf (stderr, "short line in '%s':%d\n",
81 unicodedata_filename, lineno);
82 exit (EXIT_FAILURE);
84 *p++ = '\0';
86 field1 = p;
87 if (*field1 == '<')
88 continue;
89 p = strchr (p, ';');
90 if (!p)
92 fprintf (stderr, "short line in '%s':%d\n",
93 unicodedata_filename, lineno);
94 exit (EXIT_FAILURE);
96 *p = '\0';
97 i = strtoul (field0, NULL, 16);
98 if (i >= 0x110000)
100 fprintf (stderr, "index too large\n");
101 exit (EXIT_FAILURE);
103 unicode_names[i] = xstrdup (field1);
105 if (ferror (stream) || fclose (stream))
107 fprintf (stderr, "error reading from '%s'\n", unicodedata_filename);
108 exit (1);
112 /* Stores in unicode_aliases[] the relevant contents of the NameAliases.txt
113 file. */
114 static void
115 fill_aliases (const char *namealiases_filename)
117 FILE *stream;
118 char *field0;
119 char *field1;
120 char line[1024];
121 int lineno = 0;
123 stream = fopen (namealiases_filename, "r");
124 if (stream == NULL)
126 fprintf (stderr, "error during fopen of '%s'\n", namealiases_filename);
127 exit (EXIT_FAILURE);
130 while (fgets (line, sizeof line, stream))
132 char *p;
133 char *comment;
134 unsigned long uc;
136 comment = strchr (line, '#');
137 if (comment != NULL)
138 *comment = '\0';
139 if (line[strspn (line, " \t\r\n")] == '\0')
140 continue;
142 lineno++;
144 field0 = p = line;
145 p = strchr (p, ';');
146 if (!p)
148 fprintf (stderr, "short line in '%s':%d\n",
149 namealiases_filename, lineno);
150 exit (EXIT_FAILURE);
152 *p++ = '\0';
154 field1 = p;
155 p = strchr (p, ';');
156 if (!p)
158 fprintf (stderr, "short line in '%s':%d\n",
159 namealiases_filename, lineno);
160 exit (EXIT_FAILURE);
162 *p = '\0';
164 uc = strtoul (field0, NULL, 16);
165 if (uc >= 0x110000)
167 fprintf (stderr, "index too large\n");
168 exit (EXIT_FAILURE);
171 if (aliases_count == ALIASLEN)
173 fprintf (stderr, "too many aliases\n");
174 exit (EXIT_FAILURE);
176 unicode_aliases[aliases_count].name = xstrdup (field1);
177 unicode_aliases[aliases_count].uc = uc;
178 aliases_count++;
180 if (ferror (stream) || fclose (stream))
182 fprintf (stderr, "error reading from '%s'\n", namealiases_filename);
183 exit (1);
187 static int
188 name_has_alias (unsigned int uc)
190 int i;
191 for (i = 0; i < ALIASLEN; i++)
192 if (unicode_aliases[i].uc == uc)
193 return 1;
194 return 0;
197 /* Perform an exhaustive test of the unicode_character_name function. */
198 static int
199 test_name_lookup ()
201 int error = 0;
202 unsigned int i;
203 char buf[UNINAME_MAX];
205 for (i = 0; i < 0x11000; i++)
207 char *result = unicode_character_name (i, buf);
209 if (unicode_names[i] != NULL)
211 if (result == NULL)
213 fprintf (stderr, "\\u%04X name lookup failed!\n", i);
214 error = 1;
216 else if (strcmp (result, unicode_names[i]) != 0)
218 fprintf (stderr, "\\u%04X name lookup returned wrong name: %s\n",
219 i, result);
220 error = 1;
223 else
225 if (result != NULL)
227 fprintf (stderr, "\\u%04X name lookup returned wrong name: %s\n",
228 i, result);
229 error = 1;
234 for (i = 0x110000; i < 0x1000000; i++)
236 char *result = unicode_character_name (i, buf);
238 if (result != NULL)
240 fprintf (stderr, "\\u%04X name lookup returned wrong name: %s\n",
241 i, result);
242 error = 1;
246 return error;
249 /* Perform a test of the unicode_name_character function. */
250 static int
251 test_inverse_lookup ()
253 int error = 0;
254 unsigned int i;
256 /* First, verify all valid character names are recognized. */
257 for (i = 0; i < 0x110000; i++)
258 if (unicode_names[i] != NULL)
260 unsigned int result = unicode_name_character (unicode_names[i]);
261 if (result != i)
263 if (result == UNINAME_INVALID)
264 fprintf (stderr, "inverse name lookup of \"%s\" failed\n",
265 unicode_names[i]);
266 else
267 fprintf (stderr,
268 "inverse name lookup of \"%s\" returned 0x%04X\n",
269 unicode_names[i], result);
270 error = 1;
274 /* Second, generate random but likely names and verify they are not
275 recognized unless really valid. */
276 for (i = 0; i < 10000; i++)
278 unsigned int i1, i2;
279 const char *s1;
280 const char *s2;
281 unsigned int l1, l2, j1, j2;
282 char buf[2*UNINAME_MAX];
283 unsigned int result;
285 do i1 = ((rand () % 0x11) << 16)
286 + ((rand () & 0xff) << 8)
287 + (rand () & 0xff);
288 while (unicode_names[i1] == NULL);
290 do i2 = ((rand () % 0x11) << 16)
291 + ((rand () & 0xff) << 8)
292 + (rand () & 0xff);
293 while (unicode_names[i2] == NULL);
295 s1 = unicode_names[i1];
296 l1 = strlen (s1);
297 s2 = unicode_names[i2];
298 l2 = strlen (s2);
300 /* Concatenate a starting piece of s1 with an ending piece of s2. */
301 for (j1 = 1; j1 <= l1; j1++)
302 if (j1 == l1 || s1[j1] == ' ')
303 for (j2 = 0; j2 < l2; j2++)
304 if (j2 == 0 || s2[j2-1] == ' ')
306 memcpy (buf, s1, j1);
307 buf[j1] = ' ';
308 memcpy (buf + j1 + 1, s2 + j2, l2 - j2 + 1);
310 result = unicode_name_character (buf);
311 if (result != UNINAME_INVALID
312 && !name_has_alias (result)
313 && !(unicode_names[result] != NULL
314 && strcmp (unicode_names[result], buf) == 0))
316 fprintf (stderr,
317 "inverse name lookup of \"%s\" returned 0x%04X\n",
318 unicode_names[i], result);
319 error = 1;
324 /* Third, some extreme case that used to loop. */
325 if (unicode_name_character ("A A") != UNINAME_INVALID)
326 error = 1;
328 return error;
331 /* Perform a test of the unicode_name_character function for aliases. */
332 static int
333 test_alias_lookup ()
335 int error = 0;
336 unsigned int i;
337 char buf[UNINAME_MAX];
339 /* Verify all valid character names are recognized. */
340 for (i = 0; i < ALIASLEN; i++)
341 if (unicode_aliases[i].uc != UNINAME_INVALID
342 /* Skip if the character has no canonical name (e.g. control
343 characters). */
344 && unicode_character_name (unicode_aliases[i].uc, buf))
346 unsigned int result = unicode_name_character (unicode_aliases[i].name);
347 if (result != unicode_aliases[i].uc)
349 if (result == UNINAME_INVALID)
350 fprintf (stderr, "inverse name lookup of \"%s\" failed\n",
351 unicode_aliases[i].name);
352 else
353 fprintf (stderr,
354 "inverse name lookup of \"%s\" returned 0x%04X\n",
355 unicode_aliases[i].name, result);
356 error = 1;
360 return error;
364 main (int argc, char *argv[])
366 int error = 0;
367 int i;
369 for (i = 1; i < argc && strcmp (argv[i], "--") != 0; i++)
370 fill_names (argv[i]);
372 if (i < argc)
374 int j;
375 for (j = 0; j < ALIASLEN; j++)
376 unicode_aliases[j].uc = UNINAME_INVALID;
378 i++;
379 for (; i < argc; i++)
380 fill_aliases (argv[i]);
383 error |= test_name_lookup ();
384 error |= test_inverse_lookup ();
386 if (aliases_count > 0)
387 error |= test_alias_lookup ();
389 return error;