Add.
[libidn.git] / tst_idna.c
blob065ee2fb0f3d49077cad8d57ef0b4a8ec30f131e
1 /* tst_idna.c Self tests for idna_to_ascii().
2 * Copyright (C) 2002, 2003 Simon Josefsson
4 * This file is part of GNU Libidn.
6 * GNU Libidn is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * GNU Libidn is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with GNU Libidn; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 #include "internal.h"
24 static int debug = 0;
25 static int error_count = 0;
26 static int break_on_error = 0;
28 static void
29 fail (const char *format, ...)
31 va_list arg_ptr;
33 va_start (arg_ptr, format);
34 vfprintf (stderr, format, arg_ptr);
35 va_end (arg_ptr);
36 error_count++;
37 if (break_on_error)
38 exit (1);
41 static void
42 escapeprint (char *str, int len)
44 int i;
46 printf ("(length %d bytes):\n", len);
47 printf ("\t'");
48 for (i = 0; i < len; i++)
50 if (((str[i] & 0xFF) >= 'A' && (str[i] & 0xFF) <= 'Z') ||
51 ((str[i] & 0xFF) >= 'a' && (str[i] & 0xFF) <= 'z') ||
52 ((str[i] & 0xFF) >= '0' && (str[i] & 0xFF) <= '9')
53 || (str[i] & 0xFF) == ' ' || (str[i] & 0xFF) == '.')
54 printf ("%c", (str[i] & 0xFF));
55 else
56 printf ("\\x%02X", (str[i] & 0xFF));
57 if ((i+1)%16 == 0 && (i+1) < len)
58 printf("'\n\t'");
62 static void
63 ucs4print (unsigned long *str, ssize_t len)
65 int i;
67 printf ("\t;; ");
68 for (i = 0; len >= 0 ? i < len : str[i]; i++)
70 printf ("U+%04lu ", str[i] & 0xFFFF);
71 if ((i + 1) % 4 == 0)
72 printf (" ");
73 if ((i + 1) % 8 == 0 && i + 1 < len)
74 printf ("\n\t;; ");
76 puts ("");
79 struct idna
81 char *name;
82 size_t inlen;
83 unsigned long in[100];
84 char *out;
85 int allowunassigned;
86 int usestd3asciirules;
87 int toasciirc;
88 int tounicoderc;
89 } idna[] =
92 "Arabic (Egyptian)", 17,
94 0x0644, 0x064A, 0x0647, 0x0645, 0x0627, 0x0628, 0x062A, 0x0643,
95 0x0644, 0x0645, 0x0648, 0x0634, 0x0639, 0x0631, 0x0628, 0x064A,
96 0x061F},
97 IDNA_ACE_PREFIX "egbpdaj6bu4bxfgehfvwxn", 0, 0, IDNA_SUCCESS,
98 IDNA_SUCCESS},
100 "Chinese (simplified)", 9,
102 0x4ED6, 0x4EEC, 0x4E3A, 0x4EC0, 0x4E48, 0x4E0D, 0x8BF4, 0x4E2D, 0x6587},
103 IDNA_ACE_PREFIX "ihqwcrb4cv8a8dqg056pqjye", 0, 0, IDNA_SUCCESS,
104 IDNA_SUCCESS},
106 "Chinese (traditional)", 9,
108 0x4ED6, 0x5011, 0x7232, 0x4EC0, 0x9EBD, 0x4E0D, 0x8AAA, 0x4E2D, 0x6587},
109 IDNA_ACE_PREFIX "ihqwctvzc91f659drss3x8bo0yb", 0, 0, IDNA_SUCCESS,
110 IDNA_SUCCESS},
112 "Czech"
113 #if !defined(DRAFT)
114 ": Pro[CCARON]prost[ECARON]nemluv[IACUTE][CCARON]esky"
115 #endif
116 , 22,
118 0x0050, 0x0072, 0x006F, 0x010D, 0x0070, 0x0072, 0x006F, 0x0073,
119 0x0074, 0x011B, 0x006E, 0x0065, 0x006D, 0x006C, 0x0075, 0x0076,
120 0x00ED, 0x010D, 0x0065, 0x0073, 0x006B, 0x0079},
121 IDNA_ACE_PREFIX "Proprostnemluvesky-uyb24dma41a", 0, 0, IDNA_SUCCESS,
122 IDNA_SUCCESS},
124 "Hebrew", 22,
126 0x05DC, 0x05DE, 0x05D4, 0x05D4, 0x05DD, 0x05E4, 0x05E9, 0x05D5,
127 0x05D8, 0x05DC, 0x05D0, 0x05DE, 0x05D3, 0x05D1, 0x05E8, 0x05D9,
128 0x05DD, 0x05E2, 0x05D1, 0x05E8, 0x05D9, 0x05EA},
129 IDNA_ACE_PREFIX "4dbcagdahymbxekheh6e0a7fei0b", 0, 0, IDNA_SUCCESS,
130 IDNA_SUCCESS},
132 "Hindi (Devanagari)", 30,
134 0x092F, 0x0939, 0x0932, 0x094B, 0x0917, 0x0939, 0x093F, 0x0928,
135 0x094D, 0x0926, 0x0940, 0x0915, 0x094D, 0x092F, 0x094B, 0x0902,
136 0x0928, 0x0939, 0x0940, 0x0902, 0x092C, 0x094B, 0x0932, 0x0938,
137 0x0915, 0x0924, 0x0947, 0x0939, 0x0948, 0x0902},
138 IDNA_ACE_PREFIX "i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd", 0, 0,
139 IDNA_SUCCESS},
141 "Japanese (kanji and hiragana)", 18,
143 0x306A, 0x305C, 0x307F, 0x3093, 0x306A, 0x65E5, 0x672C, 0x8A9E,
144 0x3092, 0x8A71, 0x3057, 0x3066, 0x304F, 0x308C, 0x306A, 0x3044,
145 0x306E, 0x304B},
146 IDNA_ACE_PREFIX "n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa", 0, 0,
147 IDNA_SUCCESS},
148 #if !defined(DRAFT)
150 "Korean (Hangul syllables)", 24,
152 0xC138, 0xACC4, 0xC758, 0xBAA8, 0xB4E0, 0xC0AC, 0xB78C, 0xB4E4,
153 0xC774, 0xD55C, 0xAD6D, 0xC5B4, 0xB97C, 0xC774, 0xD574, 0xD55C,
154 0xB2E4, 0xBA74, 0xC5BC, 0xB9C8, 0xB098, 0xC88B, 0xC744, 0xAE4C},
155 IDNA_ACE_PREFIX "989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt"
156 "30a5jpsd879ccm6fea98c", 0, 0, IDNA_PUNYCODE_ERROR,
157 IDNA_PUNYCODE_ERROR},
158 /* too long output */
159 #endif
161 "Russian (Cyrillic)", 28,
163 0x043F, 0x043E, 0x0447, 0x0435, 0x043C, 0x0443, 0x0436, 0x0435,
164 0x043E, 0x043D, 0x0438, 0x043D, 0x0435, 0x0433, 0x043E, 0x0432,
165 0x043E, 0x0440, 0x044F, 0x0442, 0x043F, 0x043E, 0x0440, 0x0443,
166 0x0441, 0x0441, 0x043A, 0x0438},
167 IDNA_ACE_PREFIX "b1abfaaepdrnnbgefbadotcwatmq2g4l", 0, 0,
168 IDNA_SUCCESS, IDNA_SUCCESS},
170 "Spanish"
171 #if !defined(DRAFT)
172 ": Porqu[EACUTE]nopuedensimplementehablarenEspa[NTILDE]ol"
173 #endif
174 , 40,
176 0x0050, 0x006F, 0x0072, 0x0071, 0x0075, 0x00E9, 0x006E, 0x006F,
177 0x0070, 0x0075, 0x0065, 0x0064, 0x0065, 0x006E, 0x0073, 0x0069,
178 0x006D, 0x0070, 0x006C, 0x0065, 0x006D, 0x0065, 0x006E, 0x0074,
179 0x0065, 0x0068, 0x0061, 0x0062, 0x006C, 0x0061, 0x0072, 0x0065,
180 0x006E, 0x0045, 0x0073, 0x0070, 0x0061, 0x00F1, 0x006F, 0x006C},
181 IDNA_ACE_PREFIX "PorqunopuedensimplementehablarenEspaol-fmd56a", 0, 0,
182 IDNA_SUCCESS},
184 "Vietnamese", 31,
186 0x0054, 0x1EA1, 0x0069, 0x0073, 0x0061, 0x006F, 0x0068, 0x1ECD,
187 0x006B, 0x0068, 0x00F4, 0x006E, 0x0067, 0x0074, 0x0068, 0x1EC3,
188 0x0063, 0x0068, 0x1EC9, 0x006E, 0x00F3, 0x0069, 0x0074, 0x0069,
189 0x1EBF, 0x006E, 0x0067, 0x0056, 0x0069, 0x1EC7, 0x0074},
190 IDNA_ACE_PREFIX "TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g", 0, 0,
191 IDNA_SUCCESS},
193 "Japanese"
194 #if !defined(DRAFT)
195 "3[NEN]B[GUMI][KINPACHI][SENSEI]"
196 #endif
197 , 8,
199 0x0033, 0x5E74, 0x0042, 0x7D44, 0x91D1, 0x516B, 0x5148, 0x751F},
200 IDNA_ACE_PREFIX "3B-ww4c5e180e575a65lsy2b", 0, 0, IDNA_SUCCESS,
201 IDNA_SUCCESS},
203 "Japanese"
204 #if !defined(DRAFT)
205 "[AMURO][NAMIE]-with-SUPER-MONKEYS"
206 #endif
207 , 24,
209 0x5B89, 0x5BA4, 0x5948, 0x7F8E, 0x6075, 0x002D, 0x0077, 0x0069,
210 0x0074, 0x0068, 0x002D, 0x0053, 0x0055, 0x0050, 0x0045, 0x0052,
211 0x002D, 0x004D, 0x004F, 0x004E, 0x004B, 0x0045, 0x0059, 0x0053},
212 IDNA_ACE_PREFIX "-with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n", 0, 0,
213 IDNA_SUCCESS},
215 "Japanese"
216 #if !defined(DRAFT)
217 "Hello-Another-Way-[SOREZORE][NO][BASHO]"
218 #endif
219 , 25,
221 0x0048, 0x0065, 0x006C, 0x006C, 0x006F, 0x002D, 0x0041, 0x006E,
222 0x006F, 0x0074, 0x0068, 0x0065, 0x0072, 0x002D, 0x0057, 0x0061,
223 0x0079, 0x002D, 0x305D, 0x308C, 0x305E, 0x308C, 0x306E, 0x5834,
224 0x6240},
225 IDNA_ACE_PREFIX "Hello-Another-Way--fc4qua05auwb3674vfr0b", 0, 0,
226 IDNA_SUCCESS},
228 "Japanese"
229 #if !defined(DRAFT)
230 "[HITOTSU][YANE][NO][SHITA]2"
231 #endif
232 , 8,
234 0x3072, 0x3068, 0x3064, 0x5C4B, 0x6839, 0x306E, 0x4E0B, 0x0032},
235 IDNA_ACE_PREFIX "2-u9tlzr9756bt3uc0v", 0, 0, IDNA_SUCCESS,
236 IDNA_SUCCESS},
238 "Japanese"
239 #if !defined(DRAFT)
240 "Maji[DE]Koi[SURU]5[BYOU][MAE]"
241 #endif
242 , 13,
244 0x004D, 0x0061, 0x006A, 0x0069, 0x3067, 0x004B, 0x006F, 0x0069,
245 0x3059, 0x308B, 0x0035, 0x79D2, 0x524D},
246 IDNA_ACE_PREFIX "MajiKoi5-783gue6qz075azm5e", 0, 0, IDNA_SUCCESS,
247 IDNA_SUCCESS},
249 "Japanese"
250 #if !defined(DRAFT)
251 "[PAFII]de[RUNBA]"
252 #endif
253 , 9,
255 0x30D1, 0x30D5, 0x30A3, 0x30FC, 0x0064, 0x0065, 0x30EB, 0x30F3, 0x30D0},
256 IDNA_ACE_PREFIX "de-jg4avhby1noc0d", 0, 0, IDNA_SUCCESS, IDNA_SUCCESS},
258 "Japanese"
259 #if !defined(DRAFT)
260 "[SONO][SUPIIDO][DE]"
261 #endif
262 , 7,
264 0x305D, 0x306E, 0x30B9, 0x30D4, 0x30FC, 0x30C9, 0x3067},
265 IDNA_ACE_PREFIX "d9juau41awczczp", 0, 0, IDNA_SUCCESS, IDNA_SUCCESS},
267 "Greek", 8,
268 {0x03b5, 0x03bb, 0x03bb, 0x03b7, 0x03bd, 0x03b9, 0x03ba, 0x03ac},
269 IDNA_ACE_PREFIX "hxargifdar", 0, 0, IDNA_SUCCESS, IDNA_SUCCESS},
271 "Maltese (Malti)", 10,
272 {0x0062, 0x006f, 0x006e, 0x0121, 0x0075, 0x0073, 0x0061, 0x0127,
273 0x0127, 0x0061},
274 IDNA_ACE_PREFIX "bonusaa-5bb1da", 0, 0, IDNA_SUCCESS, IDNA_SUCCESS},
276 "Russian (Cyrillic)", 28,
277 {0x043f, 0x043e, 0x0447, 0x0435, 0x043c, 0x0443, 0x0436, 0x0435,
278 0x043e, 0x043d, 0x0438, 0x043d, 0x0435, 0x0433, 0x043e, 0x0432,
279 0x043e, 0x0440, 0x044f, 0x0442, 0x043f, 0x043e, 0x0440, 0x0443,
280 0x0441, 0x0441, 0x043a, 0x0438},
281 IDNA_ACE_PREFIX "b1abfaaepdrnnbgefbadotcwatmq2g4l", 0, 0,
282 IDNA_SUCCESS, IDNA_SUCCESS},
283 #if 0
285 "(S) -> $1.00 <-", 11,
287 0x002D, 0x003E, 0x0020, 0x0024, 0x0031, 0x002E, 0x0030, 0x0030,
288 0x0020, 0x003C, 0x002D},
289 IDNA_ACE_PREFIX "-> $1.00 <--", 0, 0, IDNA_SUCCESS, IDNA_SUCCESS},
290 #endif
291 #if !defined(DRAFT)
292 { /* XXX depends on IDNA_ACE_PREFIX */
293 "ToASCII() with ACE prefix", 4 + 3,
294 { 'x', 'n', '-', '-', 'f', 'o', 0x3067},
295 IDNA_ACE_PREFIX "too long too long too long too long too long too "
296 "long too long too long too long too long ", 0, 0,
297 IDNA_CONTAINS_ACE_PREFIX, IDNA_PUNYCODE_ERROR}
298 #endif
302 main (int argc, char *argv[])
304 char label[100];
305 unsigned long *ucs4label = NULL;
306 unsigned long tmp[100];
307 size_t len, len2, i;
308 int rc;
311 if (strcmp (argv[argc - 1], "-v") == 0 ||
312 strcmp (argv[argc - 1], "--verbose") == 0)
313 debug = 1;
314 else if (strcmp (argv[argc - 1], "-b") == 0 ||
315 strcmp (argv[argc - 1], "--break-on-error") == 0)
316 break_on_error = 1;
317 else if (strcmp (argv[argc - 1], "-h") == 0 ||
318 strcmp (argv[argc - 1], "-?") == 0 ||
319 strcmp (argv[argc - 1], "--help") == 0)
321 printf ("Usage: %s [-vbh?] [--verbose] [--break-on-error] [--help]\n",
322 argv[0]);
323 return 1;
325 while (argc-- > 1);
327 for (i = 0; i < sizeof (idna) / sizeof (idna[0]); i++)
329 #ifdef DRAFT
330 printf("<section title=\"%s\">\n", idna[i].name);
331 printf("\n");
332 printf("<figure>\n");
333 printf("<artwork>\n");
334 { char *p;
335 int j;
336 size_t t;
337 p = stringprep_ucs4_to_utf8(idna[i].in, idna[i].inlen, NULL, &t);
338 printf ("in ");
339 escapeprint(p, strlen(p));
340 printf ("\n");
341 free(p);
343 printf ("input (length %d):\n\t", t);
344 for (j = 0; j < idna[i].inlen; j++)
346 printf ("U+%04lx ", idna[i].in[j]);
347 if ((j+1)%8 == 0)
348 printf("\n\t");
350 printf ("\n");
352 #endif
354 if (debug)
355 printf ("IDNA entry %d: %s\n", i, idna[i].name);
357 if (debug)
359 printf ("in:\n");
360 ucs4print (idna[i].in, idna[i].inlen);
363 rc = idna_to_ascii (idna[i].in, idna[i].inlen, label,
364 idna[i].allowunassigned, idna[i].usestd3asciirules);
365 if (rc != idna[i].toasciirc)
367 fail ("IDNA entry %d failed: %d\n", i, rc);
368 if (debug)
369 printf ("FATAL\n");
370 continue;
373 #ifdef DRAFT
374 if (rc == IDNA_SUCCESS)
375 printf ("\nout: %s\n", label);
376 #endif
378 if (debug && rc == IDNA_SUCCESS)
380 printf ("computed out: %s\n", label);
381 printf ("expected out: %s\n", idna[i].out);
383 else if (debug)
384 printf ("returned %d expected %d\n", rc, idna[i].toasciirc);
386 if (rc == IDNA_SUCCESS)
388 if (strlen (idna[i].out) != strlen (label) ||
389 strcasecmp (idna[i].out, label) != 0)
391 fail ("IDNA entry %d failed\n", i);
392 if (debug)
393 printf ("ERROR\n");
395 else if (debug)
396 printf ("OK\n");
398 else if (debug)
399 printf ("OK\n");
401 if (ucs4label)
402 free(ucs4label);
404 ucs4label = stringprep_utf8_to_ucs4 (idna[i].out, -1, &len);
406 if (debug)
408 printf ("in: %s (%d==%d)\n", idna[i].out, strlen (idna[i].out),
409 len);
410 ucs4print (ucs4label, len);
413 len2 = sizeof (tmp);
414 rc = idna_to_unicode (ucs4label, len, tmp, &len2,
415 idna[i].allowunassigned,
416 idna[i].usestd3asciirules);
417 if (debug)
419 printf ("expected out:\n");
420 if (rc == IDNA_SUCCESS)
421 ucs4print (idna[i].in, idna[i].inlen);
422 else
423 ucs4print (ucs4label, len);
425 printf ("computed out:\n");
426 ucs4print (tmp, len2);
429 if (rc != idna[i].tounicoderc)
431 fail ("IDNA entry %d failed: %d\n", i, rc);
432 if (debug)
433 printf ("FATAL\n");
434 continue;
437 if ((rc == IDNA_SUCCESS && (len2 != idna[i].inlen ||
438 memcmp (idna[i].in, tmp, len2) != 0)) ||
439 (rc != IDNA_SUCCESS && (len2 != len ||
440 memcmp (ucs4label, tmp, len) != 0)))
442 fail ("IDNA entry %d failed\n", i);
443 if (debug)
444 printf ("ERROR\n");
446 else if (debug)
447 printf ("OK\n\n");
449 #ifdef DRAFT
450 printf("</artwork>\n");
451 printf("</figure>\n");
452 printf("\n");
453 printf("</section>\n");
454 #endif
457 if (ucs4label)
458 free(ucs4label);
460 if (debug)
461 printf ("IDNA self tests done with %d errors\n", error_count);
463 return error_count ? 1 : 0;