Change unsigned long to uint32_t.
[libidn.git] / tst_idna.c
blob268892cc10008c39825ef51bf91109556ab3b990
1 /* tst_idna.c Self tests for idna_to_ascii().
2 * Copyright (C) 2002, 2003 Simon Josefsson
4 * This file is part of GNU Libidn.
6 * GNU Libidn is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * GNU Libidn is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with GNU Libidn; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 #include "internal.h"
24 static int debug = 0;
25 static int error_count = 0;
26 static int break_on_error = 0;
28 static void
29 fail (const char *format, ...)
31 va_list arg_ptr;
33 va_start (arg_ptr, format);
34 vfprintf (stderr, format, arg_ptr);
35 va_end (arg_ptr);
36 error_count++;
37 if (break_on_error)
38 exit (1);
41 static void
42 escapeprint (char *str, int len)
44 int i;
46 printf ("(length %d bytes):\n", len);
47 printf ("\t'");
48 for (i = 0; i < len; i++)
50 if (((str[i] & 0xFF) >= 'A' && (str[i] & 0xFF) <= 'Z') ||
51 ((str[i] & 0xFF) >= 'a' && (str[i] & 0xFF) <= 'z') ||
52 ((str[i] & 0xFF) >= '0' && (str[i] & 0xFF) <= '9')
53 || (str[i] & 0xFF) == ' ' || (str[i] & 0xFF) == '.')
54 printf ("%c", (str[i] & 0xFF));
55 else
56 printf ("\\x%02X", (str[i] & 0xFF));
57 if ((i+1)%16 == 0 && (i+1) < len)
58 printf("'\n\t'");
62 static void
63 ucs4print (unsigned long *str, ssize_t len)
65 int i;
67 printf ("\t;; ");
68 for (i = 0; len >= 0 ? i < len : str[i]; i++)
70 printf ("U+%04lu ", str[i] & 0xFFFF);
71 if ((i + 1) % 4 == 0)
72 printf (" ");
73 if ((i + 1) % 8 == 0 && i + 1 < len)
74 printf ("\n\t;; ");
76 puts ("");
79 struct idna
81 char *name;
82 size_t inlen;
83 unsigned long in[100];
84 char *out;
85 int allowunassigned;
86 int usestd3asciirules;
87 int toasciirc;
88 int tounicoderc;
89 } idna[] =
92 "Arabic (Egyptian)", 17,
94 0x0644, 0x064A, 0x0647, 0x0645, 0x0627, 0x0628, 0x062A, 0x0643,
95 0x0644, 0x0645, 0x0648, 0x0634, 0x0639, 0x0631, 0x0628, 0x064A,
96 0x061F},
97 IDNA_ACE_PREFIX "egbpdaj6bu4bxfgehfvwxn", 0, 0, IDNA_SUCCESS,
98 IDNA_SUCCESS},
100 "Chinese (simplified)", 9,
102 0x4ED6, 0x4EEC, 0x4E3A, 0x4EC0, 0x4E48, 0x4E0D, 0x8BF4, 0x4E2D, 0x6587},
103 IDNA_ACE_PREFIX "ihqwcrb4cv8a8dqg056pqjye", 0, 0, IDNA_SUCCESS,
104 IDNA_SUCCESS},
106 "Chinese (traditional)", 9,
108 0x4ED6, 0x5011, 0x7232, 0x4EC0, 0x9EBD, 0x4E0D, 0x8AAA, 0x4E2D, 0x6587},
109 IDNA_ACE_PREFIX "ihqwctvzc91f659drss3x8bo0yb", 0, 0, IDNA_SUCCESS,
110 IDNA_SUCCESS},
112 "Czech: Pro[CCARON]prost[ECARON]nemluv[IACUTE][CCARON]esky"
113 , 22,
115 0x0050, 0x0072, 0x006F, 0x010D, 0x0070, 0x0072, 0x006F, 0x0073,
116 0x0074, 0x011B, 0x006E, 0x0065, 0x006D, 0x006C, 0x0075, 0x0076,
117 0x00ED, 0x010D, 0x0065, 0x0073, 0x006B, 0x0079},
118 IDNA_ACE_PREFIX "Proprostnemluvesky-uyb24dma41a", 0, 0, IDNA_SUCCESS,
119 IDNA_SUCCESS},
121 "Hebrew", 22,
123 0x05DC, 0x05DE, 0x05D4, 0x05D4, 0x05DD, 0x05E4, 0x05E9, 0x05D5,
124 0x05D8, 0x05DC, 0x05D0, 0x05DE, 0x05D3, 0x05D1, 0x05E8, 0x05D9,
125 0x05DD, 0x05E2, 0x05D1, 0x05E8, 0x05D9, 0x05EA},
126 IDNA_ACE_PREFIX "4dbcagdahymbxekheh6e0a7fei0b", 0, 0, IDNA_SUCCESS,
127 IDNA_SUCCESS},
129 "Hindi (Devanagari)", 30,
131 0x092F, 0x0939, 0x0932, 0x094B, 0x0917, 0x0939, 0x093F, 0x0928,
132 0x094D, 0x0926, 0x0940, 0x0915, 0x094D, 0x092F, 0x094B, 0x0902,
133 0x0928, 0x0939, 0x0940, 0x0902, 0x092C, 0x094B, 0x0932, 0x0938,
134 0x0915, 0x0924, 0x0947, 0x0939, 0x0948, 0x0902},
135 IDNA_ACE_PREFIX "i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd", 0, 0,
136 IDNA_SUCCESS},
138 "Japanese (kanji and hiragana)", 18,
140 0x306A, 0x305C, 0x307F, 0x3093, 0x306A, 0x65E5, 0x672C, 0x8A9E,
141 0x3092, 0x8A71, 0x3057, 0x3066, 0x304F, 0x308C, 0x306A, 0x3044,
142 0x306E, 0x304B},
143 IDNA_ACE_PREFIX "n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa", 0, 0,
144 IDNA_SUCCESS},
146 "Korean (Hangul syllables)", 24,
148 0xC138, 0xACC4, 0xC758, 0xBAA8, 0xB4E0, 0xC0AC, 0xB78C, 0xB4E4,
149 0xC774, 0xD55C, 0xAD6D, 0xC5B4, 0xB97C, 0xC774, 0xD574, 0xD55C,
150 0xB2E4, 0xBA74, 0xC5BC, 0xB9C8, 0xB098, 0xC88B, 0xC744, 0xAE4C},
151 IDNA_ACE_PREFIX "989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt"
152 "30a5jpsd879ccm6fea98c", 0, 0, IDNA_PUNYCODE_ERROR,
153 IDNA_PUNYCODE_ERROR},
154 /* too long output */
156 "Russian (Cyrillic)", 28,
158 0x043F, 0x043E, 0x0447, 0x0435, 0x043C, 0x0443, 0x0436, 0x0435,
159 0x043E, 0x043D, 0x0438, 0x043D, 0x0435, 0x0433, 0x043E, 0x0432,
160 0x043E, 0x0440, 0x044F, 0x0442, 0x043F, 0x043E, 0x0440, 0x0443,
161 0x0441, 0x0441, 0x043A, 0x0438},
162 IDNA_ACE_PREFIX "b1abfaaepdrnnbgefbadotcwatmq2g4l", 0, 0,
163 IDNA_SUCCESS, IDNA_SUCCESS},
165 "Spanish: Porqu[EACUTE]nopuedensimplementehablarenEspa[NTILDE]ol"
166 , 40,
168 0x0050, 0x006F, 0x0072, 0x0071, 0x0075, 0x00E9, 0x006E, 0x006F,
169 0x0070, 0x0075, 0x0065, 0x0064, 0x0065, 0x006E, 0x0073, 0x0069,
170 0x006D, 0x0070, 0x006C, 0x0065, 0x006D, 0x0065, 0x006E, 0x0074,
171 0x0065, 0x0068, 0x0061, 0x0062, 0x006C, 0x0061, 0x0072, 0x0065,
172 0x006E, 0x0045, 0x0073, 0x0070, 0x0061, 0x00F1, 0x006F, 0x006C},
173 IDNA_ACE_PREFIX "PorqunopuedensimplementehablarenEspaol-fmd56a", 0, 0,
174 IDNA_SUCCESS},
176 "Vietnamese", 31,
178 0x0054, 0x1EA1, 0x0069, 0x0073, 0x0061, 0x006F, 0x0068, 0x1ECD,
179 0x006B, 0x0068, 0x00F4, 0x006E, 0x0067, 0x0074, 0x0068, 0x1EC3,
180 0x0063, 0x0068, 0x1EC9, 0x006E, 0x00F3, 0x0069, 0x0074, 0x0069,
181 0x1EBF, 0x006E, 0x0067, 0x0056, 0x0069, 0x1EC7, 0x0074},
182 IDNA_ACE_PREFIX "TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g", 0, 0,
183 IDNA_SUCCESS},
185 "Japanese 3[NEN]B[GUMI][KINPACHI][SENSEI]"
186 , 8,
188 0x0033, 0x5E74, 0x0042, 0x7D44, 0x91D1, 0x516B, 0x5148, 0x751F},
189 IDNA_ACE_PREFIX "3B-ww4c5e180e575a65lsy2b", 0, 0, IDNA_SUCCESS,
190 IDNA_SUCCESS},
192 "Japanese [AMURO][NAMIE]-with-SUPER-MONKEYS"
193 , 24,
195 0x5B89, 0x5BA4, 0x5948, 0x7F8E, 0x6075, 0x002D, 0x0077, 0x0069,
196 0x0074, 0x0068, 0x002D, 0x0053, 0x0055, 0x0050, 0x0045, 0x0052,
197 0x002D, 0x004D, 0x004F, 0x004E, 0x004B, 0x0045, 0x0059, 0x0053},
198 IDNA_ACE_PREFIX "-with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n", 0, 0,
199 IDNA_SUCCESS},
201 "Japanese Hello-Another-Way-[SOREZORE][NO][BASHO]"
202 , 25,
204 0x0048, 0x0065, 0x006C, 0x006C, 0x006F, 0x002D, 0x0041, 0x006E,
205 0x006F, 0x0074, 0x0068, 0x0065, 0x0072, 0x002D, 0x0057, 0x0061,
206 0x0079, 0x002D, 0x305D, 0x308C, 0x305E, 0x308C, 0x306E, 0x5834,
207 0x6240},
208 IDNA_ACE_PREFIX "Hello-Another-Way--fc4qua05auwb3674vfr0b", 0, 0,
209 IDNA_SUCCESS},
211 "Japanese [HITOTSU][YANE][NO][SHITA]2"
212 , 8,
214 0x3072, 0x3068, 0x3064, 0x5C4B, 0x6839, 0x306E, 0x4E0B, 0x0032},
215 IDNA_ACE_PREFIX "2-u9tlzr9756bt3uc0v", 0, 0, IDNA_SUCCESS,
216 IDNA_SUCCESS},
218 "Japanese Maji[DE]Koi[SURU]5[BYOU][MAE]"
219 , 13,
221 0x004D, 0x0061, 0x006A, 0x0069, 0x3067, 0x004B, 0x006F, 0x0069,
222 0x3059, 0x308B, 0x0035, 0x79D2, 0x524D},
223 IDNA_ACE_PREFIX "MajiKoi5-783gue6qz075azm5e", 0, 0, IDNA_SUCCESS,
224 IDNA_SUCCESS},
226 "Japanese [PAFII]de[RUNBA]"
227 , 9,
229 0x30D1, 0x30D5, 0x30A3, 0x30FC, 0x0064, 0x0065, 0x30EB, 0x30F3, 0x30D0},
230 IDNA_ACE_PREFIX "de-jg4avhby1noc0d", 0, 0, IDNA_SUCCESS, IDNA_SUCCESS},
232 "Japanese [SONO][SUPIIDO][DE]"
233 , 7,
235 0x305D, 0x306E, 0x30B9, 0x30D4, 0x30FC, 0x30C9, 0x3067},
236 IDNA_ACE_PREFIX "d9juau41awczczp", 0, 0, IDNA_SUCCESS, IDNA_SUCCESS},
238 "Greek", 8,
239 {0x03b5, 0x03bb, 0x03bb, 0x03b7, 0x03bd, 0x03b9, 0x03ba, 0x03ac},
240 IDNA_ACE_PREFIX "hxargifdar", 0, 0, IDNA_SUCCESS, IDNA_SUCCESS},
242 "Maltese (Malti)", 10,
243 {0x0062, 0x006f, 0x006e, 0x0121, 0x0075, 0x0073, 0x0061, 0x0127,
244 0x0127, 0x0061},
245 IDNA_ACE_PREFIX "bonusaa-5bb1da", 0, 0, IDNA_SUCCESS, IDNA_SUCCESS},
247 "Russian (Cyrillic)", 28,
248 {0x043f, 0x043e, 0x0447, 0x0435, 0x043c, 0x0443, 0x0436, 0x0435,
249 0x043e, 0x043d, 0x0438, 0x043d, 0x0435, 0x0433, 0x043e, 0x0432,
250 0x043e, 0x0440, 0x044f, 0x0442, 0x043f, 0x043e, 0x0440, 0x0443,
251 0x0441, 0x0441, 0x043a, 0x0438},
252 IDNA_ACE_PREFIX "b1abfaaepdrnnbgefbadotcwatmq2g4l", 0, 0,
253 IDNA_SUCCESS, IDNA_SUCCESS},
254 #if 0
256 "(S) -> $1.00 <-", 11,
258 0x002D, 0x003E, 0x0020, 0x0024, 0x0031, 0x002E, 0x0030, 0x0030,
259 0x0020, 0x003C, 0x002D},
260 IDNA_ACE_PREFIX "-> $1.00 <--", 0, 0, IDNA_SUCCESS, IDNA_SUCCESS},
261 #endif
262 { /* XXX depends on IDNA_ACE_PREFIX */
263 "ToASCII() with ACE prefix", 4 + 3,
264 { 'x', 'n', '-', '-', 'f', 'o', 0x3067},
265 IDNA_ACE_PREFIX "too long too long too long too long too long too "
266 "long too long too long too long too long ", 0, 0,
267 IDNA_CONTAINS_ACE_PREFIX, IDNA_PUNYCODE_ERROR}
271 main (int argc, char *argv[])
273 char label[100];
274 unsigned long *ucs4label = NULL;
275 unsigned long tmp[100];
276 size_t len, len2, i;
277 int rc;
280 if (strcmp (argv[argc - 1], "-v") == 0 ||
281 strcmp (argv[argc - 1], "--verbose") == 0)
282 debug = 1;
283 else if (strcmp (argv[argc - 1], "-b") == 0 ||
284 strcmp (argv[argc - 1], "--break-on-error") == 0)
285 break_on_error = 1;
286 else if (strcmp (argv[argc - 1], "-h") == 0 ||
287 strcmp (argv[argc - 1], "-?") == 0 ||
288 strcmp (argv[argc - 1], "--help") == 0)
290 printf ("Usage: %s [-vbh?] [--verbose] [--break-on-error] [--help]\n",
291 argv[0]);
292 return 1;
294 while (argc-- > 1);
296 for (i = 0; i < sizeof (idna) / sizeof (idna[0]); i++)
298 if (debug)
299 printf ("IDNA entry %d: %s\n", i, idna[i].name);
301 if (debug)
303 printf ("in:\n");
304 ucs4print (idna[i].in, idna[i].inlen);
307 rc = idna_to_ascii (idna[i].in, idna[i].inlen, label,
308 idna[i].allowunassigned, idna[i].usestd3asciirules);
309 if (rc != idna[i].toasciirc)
311 fail ("IDNA entry %d failed: %d\n", i, rc);
312 if (debug)
313 printf ("FATAL\n");
314 continue;
317 if (debug && rc == IDNA_SUCCESS)
319 printf ("computed out: %s\n", label);
320 printf ("expected out: %s\n", idna[i].out);
322 else if (debug)
323 printf ("returned %d expected %d\n", rc, idna[i].toasciirc);
325 if (rc == IDNA_SUCCESS)
327 if (strlen (idna[i].out) != strlen (label) ||
328 strcasecmp (idna[i].out, label) != 0)
330 fail ("IDNA entry %d failed\n", i);
331 if (debug)
332 printf ("ERROR\n");
334 else if (debug)
335 printf ("OK\n");
337 else if (debug)
338 printf ("OK\n");
340 if (ucs4label)
341 free(ucs4label);
343 ucs4label = stringprep_utf8_to_ucs4 (idna[i].out, -1, &len);
345 if (debug)
347 printf ("in: %s (%d==%d)\n", idna[i].out, strlen (idna[i].out),
348 len);
349 ucs4print (ucs4label, len);
352 len2 = sizeof (tmp);
353 rc = idna_to_unicode (ucs4label, len, tmp, &len2,
354 idna[i].allowunassigned,
355 idna[i].usestd3asciirules);
356 if (debug)
358 printf ("expected out:\n");
359 if (rc == IDNA_SUCCESS)
360 ucs4print (idna[i].in, idna[i].inlen);
361 else
362 ucs4print (ucs4label, len);
364 printf ("computed out:\n");
365 ucs4print (tmp, len2);
368 if (rc != idna[i].tounicoderc)
370 fail ("IDNA entry %d failed: %d\n", i, rc);
371 if (debug)
372 printf ("FATAL\n");
373 continue;
376 if ((rc == IDNA_SUCCESS && (len2 != idna[i].inlen ||
377 memcmp (idna[i].in, tmp, len2) != 0)) ||
378 (rc != IDNA_SUCCESS && (len2 != len ||
379 memcmp (ucs4label, tmp, len) != 0)))
381 fail ("IDNA entry %d failed\n", i);
382 if (debug)
383 printf ("ERROR\n");
385 else if (debug)
386 printf ("OK\n\n");
389 if (ucs4label)
390 free(ucs4label);
392 if (debug)
393 printf ("IDNA self tests done with %d errors\n", error_count);
395 return error_count ? 1 : 0;