1 /* Test the Unicode character name functions.
2 Copyright (C) 2000-2003, 2005, 2007, 2009-2021 Free Software Foundation,
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <https://www.gnu.org/licenses/>. */
27 /* The names according to the UnicodeData.txt file, modified to contain the
28 Hangul syllable names, as described in the Unicode 3.0 book. */
29 static const char * unicode_names
[0x110000];
31 /* Maximum entries in unicode_aliases. */
32 #define ALIASLEN 0x200
34 /* The aliases according to the NameAliases.txt file. */
41 static struct unicode_alias unicode_aliases
[ALIASLEN
];
42 static int aliases_count
;
44 /* Stores in unicode_names[] the relevant contents of the UnicodeData.txt
47 fill_names (const char *unicodedata_filename
)
55 stream
= fopen (unicodedata_filename
, "r");
58 fprintf (stderr
, "error during fopen of '%s'\n", unicodedata_filename
);
62 while (fgets (line
, sizeof line
, stream
))
70 comment
= strchr (line
, '#');
73 if (line
[strspn (line
, " \t\r\n")] == '\0')
80 fprintf (stderr
, "short line in '%s':%d\n",
81 unicodedata_filename
, lineno
);
92 fprintf (stderr
, "short line in '%s':%d\n",
93 unicodedata_filename
, lineno
);
97 i
= strtoul (field0
, NULL
, 16);
100 fprintf (stderr
, "index too large\n");
103 unicode_names
[i
] = xstrdup (field1
);
105 if (ferror (stream
) || fclose (stream
))
107 fprintf (stderr
, "error reading from '%s'\n", unicodedata_filename
);
112 /* Stores in unicode_aliases[] the relevant contents of the NameAliases.txt
115 fill_aliases (const char *namealiases_filename
)
123 stream
= fopen (namealiases_filename
, "r");
126 fprintf (stderr
, "error during fopen of '%s'\n", namealiases_filename
);
130 while (fgets (line
, sizeof line
, stream
))
136 comment
= strchr (line
, '#');
139 if (line
[strspn (line
, " \t\r\n")] == '\0')
148 fprintf (stderr
, "short line in '%s':%d\n",
149 namealiases_filename
, lineno
);
158 fprintf (stderr
, "short line in '%s':%d\n",
159 namealiases_filename
, lineno
);
164 uc
= strtoul (field0
, NULL
, 16);
167 fprintf (stderr
, "index too large\n");
171 if (aliases_count
== ALIASLEN
)
173 fprintf (stderr
, "too many aliases\n");
176 unicode_aliases
[aliases_count
].name
= xstrdup (field1
);
177 unicode_aliases
[aliases_count
].uc
= uc
;
180 if (ferror (stream
) || fclose (stream
))
182 fprintf (stderr
, "error reading from '%s'\n", namealiases_filename
);
188 name_has_alias (unsigned int uc
)
191 for (i
= 0; i
< ALIASLEN
; i
++)
192 if (unicode_aliases
[i
].uc
== uc
)
197 /* Perform an exhaustive test of the unicode_character_name function. */
203 char buf
[UNINAME_MAX
];
205 for (i
= 0; i
< 0x11000; i
++)
207 char *result
= unicode_character_name (i
, buf
);
209 if (unicode_names
[i
] != NULL
)
213 fprintf (stderr
, "\\u%04X name lookup failed!\n", i
);
216 else if (strcmp (result
, unicode_names
[i
]) != 0)
218 fprintf (stderr
, "\\u%04X name lookup returned wrong name: %s\n",
227 fprintf (stderr
, "\\u%04X name lookup returned wrong name: %s\n",
234 for (i
= 0x110000; i
< 0x1000000; i
++)
236 char *result
= unicode_character_name (i
, buf
);
240 fprintf (stderr
, "\\u%04X name lookup returned wrong name: %s\n",
249 /* Perform a test of the unicode_name_character function. */
251 test_inverse_lookup ()
256 /* First, verify all valid character names are recognized. */
257 for (i
= 0; i
< 0x110000; i
++)
258 if (unicode_names
[i
] != NULL
)
260 unsigned int result
= unicode_name_character (unicode_names
[i
]);
263 if (result
== UNINAME_INVALID
)
264 fprintf (stderr
, "inverse name lookup of \"%s\" failed\n",
268 "inverse name lookup of \"%s\" returned 0x%04X\n",
269 unicode_names
[i
], result
);
274 /* Second, generate random but likely names and verify they are not
275 recognized unless really valid. */
276 for (i
= 0; i
< 10000; i
++)
281 unsigned int l1
, l2
, j1
, j2
;
282 char buf
[2*UNINAME_MAX
];
285 do i1
= ((rand () % 0x11) << 16)
286 + ((rand () & 0xff) << 8)
288 while (unicode_names
[i1
] == NULL
);
290 do i2
= ((rand () % 0x11) << 16)
291 + ((rand () & 0xff) << 8)
293 while (unicode_names
[i2
] == NULL
);
295 s1
= unicode_names
[i1
];
297 s2
= unicode_names
[i2
];
300 /* Concatenate a starting piece of s1 with an ending piece of s2. */
301 for (j1
= 1; j1
<= l1
; j1
++)
302 if (j1
== l1
|| s1
[j1
] == ' ')
303 for (j2
= 0; j2
< l2
; j2
++)
304 if (j2
== 0 || s2
[j2
-1] == ' ')
306 memcpy (buf
, s1
, j1
);
308 memcpy (buf
+ j1
+ 1, s2
+ j2
, l2
- j2
+ 1);
310 result
= unicode_name_character (buf
);
311 if (result
!= UNINAME_INVALID
312 && !name_has_alias (result
)
313 && !(unicode_names
[result
] != NULL
314 && strcmp (unicode_names
[result
], buf
) == 0))
317 "inverse name lookup of \"%s\" returned 0x%04X\n",
318 unicode_names
[i
], result
);
324 /* Third, some extreme case that used to loop. */
325 if (unicode_name_character ("A A") != UNINAME_INVALID
)
331 /* Perform a test of the unicode_name_character function for aliases. */
337 char buf
[UNINAME_MAX
];
339 /* Verify all valid character names are recognized. */
340 for (i
= 0; i
< ALIASLEN
; i
++)
341 if (unicode_aliases
[i
].uc
!= UNINAME_INVALID
342 /* Skip if the character has no canonical name (e.g. control
344 && unicode_character_name (unicode_aliases
[i
].uc
, buf
))
346 unsigned int result
= unicode_name_character (unicode_aliases
[i
].name
);
347 if (result
!= unicode_aliases
[i
].uc
)
349 if (result
== UNINAME_INVALID
)
350 fprintf (stderr
, "inverse name lookup of \"%s\" failed\n",
351 unicode_aliases
[i
].name
);
354 "inverse name lookup of \"%s\" returned 0x%04X\n",
355 unicode_aliases
[i
].name
, result
);
364 main (int argc
, char *argv
[])
369 for (i
= 1; i
< argc
&& strcmp (argv
[i
], "--") != 0; i
++)
370 fill_names (argv
[i
]);
375 for (j
= 0; j
< ALIASLEN
; j
++)
376 unicode_aliases
[j
].uc
= UNINAME_INVALID
;
379 for (; i
< argc
; i
++)
380 fill_aliases (argv
[i
]);
383 error
|= test_name_lookup ();
384 error
|= test_inverse_lookup ();
386 if (aliases_count
> 0)
387 error
|= test_alias_lookup ();