Fix.
[libidn.git] / tst_oidna.c
blobe0bb4a3a3f00911406795b52834ad44f865d0501
1 /* tst_idna.c Self tests for idna_to_ascii().
2 * Copyright (C) 2002, 2003 Simon Josefsson
4 * This file is part of GNU Libidn.
6 * GNU Libidn is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * GNU Libidn is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with GNU Libidn; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 #include "internal.h"
24 static int debug = 0;
25 static int error_count = 0;
26 static int break_on_error = 0;
28 static void
29 fail (const char *format, ...)
31 va_list arg_ptr;
33 va_start (arg_ptr, format);
34 vfprintf (stderr, format, arg_ptr);
35 va_end (arg_ptr);
36 error_count++;
37 if (break_on_error)
38 exit (1);
41 static void
42 escapeprint (char *str, int len)
44 int i;
46 printf ("(length %d bytes):\n", len);
47 printf ("\t'");
48 for (i = 0; i < len; i++)
50 if (((str[i] & 0xFF) >= 'A' && (str[i] & 0xFF) <= 'Z') ||
51 ((str[i] & 0xFF) >= 'a' && (str[i] & 0xFF) <= 'z') ||
52 ((str[i] & 0xFF) >= '0' && (str[i] & 0xFF) <= '9')
53 || (str[i] & 0xFF) == ' ' || (str[i] & 0xFF) == '.')
54 printf ("%c", (str[i] & 0xFF));
55 else
56 printf ("\\x%02X", (str[i] & 0xFF));
57 if ((i + 1) % 16 == 0 && (i + 1) < len)
58 printf ("'\n\t'");
62 static void
63 ucs4print (unsigned long *str, ssize_t len)
65 int i;
67 printf ("\t;; ");
68 for (i = 0; len >= 0 ? i < len : str[i]; i++)
70 printf ("U+%04lu ", str[i] & 0xFFFF);
71 if ((i + 1) % 4 == 0)
72 printf (" ");
73 if ((i + 1) % 8 == 0 && i + 1 < len)
74 printf ("\n\t;; ");
76 puts ("");
79 struct idna
81 char *name;
82 size_t inlen;
83 unsigned long in[100];
84 char *out;
85 int allowunassigned;
86 int usestd3asciirules;
87 int toasciirc;
88 int tounicoderc;
89 } idna[] =
92 "Arabic (Egyptian)", 17,
94 0x0644, 0x064A, 0x0647, 0x0645, 0x0627, 0x0628, 0x062A, 0x0643,
95 0x0644, 0x0645, 0x0648, 0x0634, 0x0639, 0x0631, 0x0628, 0x064A,
96 0x061F},
97 IDNA_ACE_PREFIX "egbpdaj6bu4bxfgehfvwxn", 0, 0, IDNA_SUCCESS,
98 IDNA_SUCCESS},
100 "Chinese (simplified)", 9,
102 0x4ED6, 0x4EEC, 0x4E3A, 0x4EC0, 0x4E48, 0x4E0D, 0x8BF4, 0x4E2D, 0x6587},
103 IDNA_ACE_PREFIX "ihqwcrb4cv8a8dqg056pqjye", 0, 0, IDNA_SUCCESS,
104 IDNA_SUCCESS},
106 "Chinese (traditional)", 9,
108 0x4ED6, 0x5011, 0x7232, 0x4EC0, 0x9EBD, 0x4E0D, 0x8AAA, 0x4E2D, 0x6587},
109 IDNA_ACE_PREFIX "ihqwctvzc91f659drss3x8bo0yb", 0, 0, IDNA_SUCCESS,
110 IDNA_SUCCESS},
112 "Czech: Pro[CCARON]prost[ECARON]nemluv[IACUTE][CCARON]esky", 22,
114 0x0050, 0x0072, 0x006F, 0x010D, 0x0070, 0x0072, 0x006F, 0x0073,
115 0x0074, 0x011B, 0x006E, 0x0065, 0x006D, 0x006C, 0x0075, 0x0076,
116 0x00ED, 0x010D, 0x0065, 0x0073, 0x006B, 0x0079},
117 IDNA_ACE_PREFIX "Proprostnemluvesky-uyb24dma41a", 0, 0, IDNA_SUCCESS,
118 IDNA_SUCCESS},
120 "Hebrew", 22,
122 0x05DC, 0x05DE, 0x05D4, 0x05D4, 0x05DD, 0x05E4, 0x05E9, 0x05D5,
123 0x05D8, 0x05DC, 0x05D0, 0x05DE, 0x05D3, 0x05D1, 0x05E8, 0x05D9,
124 0x05DD, 0x05E2, 0x05D1, 0x05E8, 0x05D9, 0x05EA},
125 IDNA_ACE_PREFIX "4dbcagdahymbxekheh6e0a7fei0b", 0, 0, IDNA_SUCCESS,
126 IDNA_SUCCESS},
128 "Hindi (Devanagari)", 30,
130 0x092F, 0x0939, 0x0932, 0x094B, 0x0917, 0x0939, 0x093F, 0x0928,
131 0x094D, 0x0926, 0x0940, 0x0915, 0x094D, 0x092F, 0x094B, 0x0902,
132 0x0928, 0x0939, 0x0940, 0x0902, 0x092C, 0x094B, 0x0932, 0x0938,
133 0x0915, 0x0924, 0x0947, 0x0939, 0x0948, 0x0902},
134 IDNA_ACE_PREFIX "i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd", 0, 0,
135 IDNA_SUCCESS},
137 "Japanese (kanji and hiragana)", 18,
139 0x306A, 0x305C, 0x307F, 0x3093, 0x306A, 0x65E5, 0x672C, 0x8A9E,
140 0x3092, 0x8A71, 0x3057, 0x3066, 0x304F, 0x308C, 0x306A, 0x3044,
141 0x306E, 0x304B},
142 IDNA_ACE_PREFIX "n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa", 0, 0,
143 IDNA_SUCCESS},
145 "Korean (Hangul syllables)", 24,
147 0xC138, 0xACC4, 0xC758, 0xBAA8, 0xB4E0, 0xC0AC, 0xB78C, 0xB4E4,
148 0xC774, 0xD55C, 0xAD6D, 0xC5B4, 0xB97C, 0xC774, 0xD574, 0xD55C,
149 0xB2E4, 0xBA74, 0xC5BC, 0xB9C8, 0xB098, 0xC88B, 0xC744, 0xAE4C},
150 IDNA_ACE_PREFIX "989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt"
151 "30a5jpsd879ccm6fea98c", 0, 0, IDNA_PUNYCODE_ERROR,
152 IDNA_PUNYCODE_ERROR},
153 /* too long output */
155 "Russian (Cyrillic)", 28,
157 0x043F, 0x043E, 0x0447, 0x0435, 0x043C, 0x0443, 0x0436, 0x0435,
158 0x043E, 0x043D, 0x0438, 0x043D, 0x0435, 0x0433, 0x043E, 0x0432,
159 0x043E, 0x0440, 0x044F, 0x0442, 0x043F, 0x043E, 0x0440, 0x0443,
160 0x0441, 0x0441, 0x043A, 0x0438},
161 IDNA_ACE_PREFIX "b1abfaaepdrnnbgefbadotcwatmq2g4l", 0, 0,
162 IDNA_SUCCESS, IDNA_SUCCESS},
164 "Spanish: Porqu[EACUTE]nopuedensimplementehablarenEspa[NTILDE]ol", 40,
166 0x0050, 0x006F, 0x0072, 0x0071, 0x0075, 0x00E9, 0x006E, 0x006F,
167 0x0070, 0x0075, 0x0065, 0x0064, 0x0065, 0x006E, 0x0073, 0x0069,
168 0x006D, 0x0070, 0x006C, 0x0065, 0x006D, 0x0065, 0x006E, 0x0074,
169 0x0065, 0x0068, 0x0061, 0x0062, 0x006C, 0x0061, 0x0072, 0x0065,
170 0x006E, 0x0045, 0x0073, 0x0070, 0x0061, 0x00F1, 0x006F, 0x006C},
171 IDNA_ACE_PREFIX "PorqunopuedensimplementehablarenEspaol-fmd56a", 0, 0,
172 IDNA_SUCCESS},
174 "Vietnamese", 31,
176 0x0054, 0x1EA1, 0x0069, 0x0073, 0x0061, 0x006F, 0x0068, 0x1ECD,
177 0x006B, 0x0068, 0x00F4, 0x006E, 0x0067, 0x0074, 0x0068, 0x1EC3,
178 0x0063, 0x0068, 0x1EC9, 0x006E, 0x00F3, 0x0069, 0x0074, 0x0069,
179 0x1EBF, 0x006E, 0x0067, 0x0056, 0x0069, 0x1EC7, 0x0074},
180 IDNA_ACE_PREFIX "TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g", 0, 0,
181 IDNA_SUCCESS},
183 "Japanese 3[NEN]B[GUMI][KINPACHI][SENSEI]", 8,
185 0x0033, 0x5E74, 0x0042, 0x7D44, 0x91D1, 0x516B, 0x5148, 0x751F},
186 IDNA_ACE_PREFIX "3B-ww4c5e180e575a65lsy2b", 0, 0, IDNA_SUCCESS,
187 IDNA_SUCCESS},
189 "Japanese [AMURO][NAMIE]-with-SUPER-MONKEYS", 24,
191 0x5B89, 0x5BA4, 0x5948, 0x7F8E, 0x6075, 0x002D, 0x0077, 0x0069,
192 0x0074, 0x0068, 0x002D, 0x0053, 0x0055, 0x0050, 0x0045, 0x0052,
193 0x002D, 0x004D, 0x004F, 0x004E, 0x004B, 0x0045, 0x0059, 0x0053},
194 IDNA_ACE_PREFIX "-with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n", 0, 0,
195 IDNA_SUCCESS},
197 "Japanese Hello-Another-Way-[SOREZORE][NO][BASHO]", 25,
199 0x0048, 0x0065, 0x006C, 0x006C, 0x006F, 0x002D, 0x0041, 0x006E,
200 0x006F, 0x0074, 0x0068, 0x0065, 0x0072, 0x002D, 0x0057, 0x0061,
201 0x0079, 0x002D, 0x305D, 0x308C, 0x305E, 0x308C, 0x306E, 0x5834,
202 0x6240},
203 IDNA_ACE_PREFIX "Hello-Another-Way--fc4qua05auwb3674vfr0b", 0, 0,
204 IDNA_SUCCESS},
206 "Japanese [HITOTSU][YANE][NO][SHITA]2", 8,
208 0x3072, 0x3068, 0x3064, 0x5C4B, 0x6839, 0x306E, 0x4E0B, 0x0032},
209 IDNA_ACE_PREFIX "2-u9tlzr9756bt3uc0v", 0, 0, IDNA_SUCCESS,
210 IDNA_SUCCESS},
212 "Japanese Maji[DE]Koi[SURU]5[BYOU][MAE]", 13,
214 0x004D, 0x0061, 0x006A, 0x0069, 0x3067, 0x004B, 0x006F, 0x0069,
215 0x3059, 0x308B, 0x0035, 0x79D2, 0x524D},
216 IDNA_ACE_PREFIX "MajiKoi5-783gue6qz075azm5e", 0, 0, IDNA_SUCCESS,
217 IDNA_SUCCESS},
219 "Japanese [PAFII]de[RUNBA]", 9,
221 0x30D1, 0x30D5, 0x30A3, 0x30FC, 0x0064, 0x0065, 0x30EB, 0x30F3, 0x30D0},
222 IDNA_ACE_PREFIX "de-jg4avhby1noc0d", 0, 0, IDNA_SUCCESS, IDNA_SUCCESS},
224 "Japanese [SONO][SUPIIDO][DE]", 7,
226 0x305D, 0x306E, 0x30B9, 0x30D4, 0x30FC, 0x30C9, 0x3067},
227 IDNA_ACE_PREFIX "d9juau41awczczp", 0, 0, IDNA_SUCCESS, IDNA_SUCCESS},
229 "Greek", 8,
231 0x03b5, 0x03bb, 0x03bb, 0x03b7, 0x03bd, 0x03b9, 0x03ba, 0x03ac},
232 IDNA_ACE_PREFIX "hxargifdar", 0, 0, IDNA_SUCCESS, IDNA_SUCCESS},
234 "Maltese (Malti)", 10,
236 0x0062, 0x006f, 0x006e, 0x0121, 0x0075, 0x0073, 0x0061, 0x0127,
237 0x0127, 0x0061},
238 IDNA_ACE_PREFIX "bonusaa-5bb1da", 0, 0, IDNA_SUCCESS, IDNA_SUCCESS},
240 "Russian (Cyrillic)", 28,
242 0x043f, 0x043e, 0x0447, 0x0435, 0x043c, 0x0443, 0x0436, 0x0435,
243 0x043e, 0x043d, 0x0438, 0x043d, 0x0435, 0x0433, 0x043e, 0x0432,
244 0x043e, 0x0440, 0x044f, 0x0442, 0x043f, 0x043e, 0x0440, 0x0443,
245 0x0441, 0x0441, 0x043a, 0x0438},
246 IDNA_ACE_PREFIX "b1abfaaepdrnnbgefbadotcwatmq2g4l", 0, 0,
247 IDNA_SUCCESS, IDNA_SUCCESS},
248 #if 0
250 "(S) -> $1.00 <-", 11,
252 0x002D, 0x003E, 0x0020, 0x0024, 0x0031, 0x002E, 0x0030, 0x0030,
253 0x0020, 0x003C, 0x002D},
254 IDNA_ACE_PREFIX "-> $1.00 <--", 0, 0, IDNA_SUCCESS, IDNA_SUCCESS},
255 #endif
256 { /* XXX depends on IDNA_ACE_PREFIX */
257 "ToASCII() with ACE prefix", 4 + 3,
259 'x', 'n', '-', '-', 'f', 'o', 0x3067},
260 IDNA_ACE_PREFIX "too long too long too long too long too long too "
261 "long too long too long too long too long ", 0, 0,
262 IDNA_CONTAINS_ACE_PREFIX, IDNA_PUNYCODE_ERROR}
266 main (int argc, char *argv[])
268 char label[100];
269 unsigned long *ucs4label = NULL;
270 unsigned long tmp[100];
271 size_t len, len2, i;
272 int rc;
275 if (strcmp (argv[argc - 1], "-v") == 0 ||
276 strcmp (argv[argc - 1], "--verbose") == 0)
277 debug = 1;
278 else if (strcmp (argv[argc - 1], "-b") == 0 ||
279 strcmp (argv[argc - 1], "--break-on-error") == 0)
280 break_on_error = 1;
281 else if (strcmp (argv[argc - 1], "-h") == 0 ||
282 strcmp (argv[argc - 1], "-?") == 0 ||
283 strcmp (argv[argc - 1], "--help") == 0)
285 printf ("Usage: %s [-vbh?] [--verbose] [--break-on-error] [--help]\n",
286 argv[0]);
287 return 1;
289 while (argc-- > 1);
291 for (i = 0; i < sizeof (idna) / sizeof (idna[0]); i++)
293 if (debug)
294 printf ("IDNA entry %d: %s\n", i, idna[i].name);
296 if (debug)
298 printf ("in:\n");
299 ucs4print (idna[i].in, idna[i].inlen);
302 rc = idna_to_ascii (idna[i].in, idna[i].inlen, label,
303 idna[i].allowunassigned, idna[i].usestd3asciirules);
304 if (rc != idna[i].toasciirc)
306 fail ("IDNA entry %d failed: %d\n", i, rc);
307 if (debug)
308 printf ("FATAL\n");
309 continue;
312 if (debug && rc == IDNA_SUCCESS)
314 printf ("computed out: %s\n", label);
315 printf ("expected out: %s\n", idna[i].out);
317 else if (debug)
318 printf ("returned %d expected %d\n", rc, idna[i].toasciirc);
320 if (rc == IDNA_SUCCESS)
322 if (strlen (idna[i].out) != strlen (label) ||
323 strcasecmp (idna[i].out, label) != 0)
325 fail ("IDNA entry %d failed\n", i);
326 if (debug)
327 printf ("ERROR\n");
329 else if (debug)
330 printf ("OK\n");
332 else if (debug)
333 printf ("OK\n");
335 if (ucs4label)
336 free (ucs4label);
338 ucs4label = stringprep_utf8_to_ucs4 (idna[i].out, -1, &len);
340 if (debug)
342 printf ("in: %s (%d==%d)\n", idna[i].out, strlen (idna[i].out),
343 len);
344 ucs4print (ucs4label, len);
347 len2 = sizeof (tmp) / sizeof (tmp[0]);
348 rc = idna_to_unicode (ucs4label, len, tmp, &len2,
349 idna[i].allowunassigned,
350 idna[i].usestd3asciirules);
351 if (debug)
353 printf ("expected out:\n");
354 if (rc == IDNA_SUCCESS)
355 ucs4print (idna[i].in, idna[i].inlen);
356 else
357 ucs4print (ucs4label, len);
359 printf ("computed out:\n");
360 ucs4print (tmp, len2);
363 if (rc != idna[i].tounicoderc)
365 fail ("IDNA entry %d failed: %d\n", i, rc);
366 if (debug)
367 printf ("FATAL\n");
368 continue;
371 if ((rc == IDNA_SUCCESS && (len2 != idna[i].inlen ||
372 memcmp (idna[i].in, tmp, len2) != 0)) ||
373 (rc != IDNA_SUCCESS && (len2 != len ||
374 memcmp (ucs4label, tmp, len) != 0)))
376 fail ("IDNA entry %d failed\n", i);
377 if (debug)
378 printf ("ERROR\n");
380 else if (debug)
381 printf ("OK\n\n");
384 if (ucs4label)
385 free (ucs4label);
387 if (debug)
388 printf ("IDNA self tests done with %d errors\n", error_count);
390 return error_count ? 1 : 0;