Add.
[libidn.git] / tests / tst_idna.c
bloba3ede2fd0ea5cfb3c6679e30c59d63ff10f79e02
1 /* tst_idna.c Self tests for idna_to_ascii().
2 * Copyright (C) 2002, 2003 Simon Josefsson
4 * This file is part of GNU Libidn.
6 * GNU Libidn is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * GNU Libidn is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with GNU Libidn; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 #if HAVE_CONFIG_H
23 # include "config.h"
24 #endif
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <stdarg.h>
29 #include <string.h>
31 #include <stringprep.h>
32 #include <idna.h>
34 static int debug = 0;
35 static int error_count = 0;
36 static int break_on_error = 0;
38 static void
39 fail (const char *format, ...)
41 va_list arg_ptr;
43 va_start (arg_ptr, format);
44 vfprintf (stderr, format, arg_ptr);
45 va_end (arg_ptr);
46 error_count++;
47 if (break_on_error)
48 exit (1);
51 static void
52 ucs4print (const uint32_t * str, size_t len)
54 size_t i;
56 printf ("\t;; ");
57 for (i = 0; i < len; i++)
59 printf ("U+%04ux ", str[i]);
60 if ((i + 1) % 4 == 0)
61 printf (" ");
62 if ((i + 1) % 8 == 0 && i + 1 < len)
63 printf ("\n\t;; ");
65 puts ("");
68 struct idna
70 char *name;
71 size_t inlen;
72 uint32_t in[100];
73 char *out;
74 int flags;
75 int toasciirc;
76 int tounicoderc;
79 const struct idna idna[] = {
81 "Arabic (Egyptian)", 17,
83 0x0644, 0x064A, 0x0647, 0x0645, 0x0627, 0x0628, 0x062A, 0x0643,
84 0x0644, 0x0645, 0x0648, 0x0634, 0x0639, 0x0631, 0x0628, 0x064A,
85 0x061F},
86 IDNA_ACE_PREFIX "egbpdaj6bu4bxfgehfvwxn", 0, IDNA_SUCCESS, IDNA_SUCCESS},
88 "Chinese (simplified)", 9,
90 0x4ED6, 0x4EEC, 0x4E3A, 0x4EC0, 0x4E48, 0x4E0D, 0x8BF4, 0x4E2D, 0x6587},
91 IDNA_ACE_PREFIX "ihqwcrb4cv8a8dqg056pqjye", 0, IDNA_SUCCESS, IDNA_SUCCESS},
93 "Chinese (traditional)", 9,
95 0x4ED6, 0x5011, 0x7232, 0x4EC0, 0x9EBD, 0x4E0D, 0x8AAA, 0x4E2D, 0x6587},
96 IDNA_ACE_PREFIX "ihqwctvzc91f659drss3x8bo0yb", 0, IDNA_SUCCESS,
97 IDNA_SUCCESS},
99 "Czech: Pro[CCARON]prost[ECARON]nemluv[IACUTE][CCARON]esky", 22,
101 0x0050, 0x0072, 0x006F, 0x010D, 0x0070, 0x0072, 0x006F, 0x0073,
102 0x0074, 0x011B, 0x006E, 0x0065, 0x006D, 0x006C, 0x0075, 0x0076,
103 0x00ED, 0x010D, 0x0065, 0x0073, 0x006B, 0x0079},
104 IDNA_ACE_PREFIX "Proprostnemluvesky-uyb24dma41a", 0, IDNA_SUCCESS,
105 IDNA_SUCCESS},
107 "Hebrew", 22,
109 0x05DC, 0x05DE, 0x05D4, 0x05D4, 0x05DD, 0x05E4, 0x05E9, 0x05D5,
110 0x05D8, 0x05DC, 0x05D0, 0x05DE, 0x05D3, 0x05D1, 0x05E8, 0x05D9,
111 0x05DD, 0x05E2, 0x05D1, 0x05E8, 0x05D9, 0x05EA},
112 IDNA_ACE_PREFIX "4dbcagdahymbxekheh6e0a7fei0b", 0, IDNA_SUCCESS,
113 IDNA_SUCCESS},
115 "Hindi (Devanagari)", 30,
117 0x092F, 0x0939, 0x0932, 0x094B, 0x0917, 0x0939, 0x093F, 0x0928,
118 0x094D, 0x0926, 0x0940, 0x0915, 0x094D, 0x092F, 0x094B, 0x0902,
119 0x0928, 0x0939, 0x0940, 0x0902, 0x092C, 0x094B, 0x0932, 0x0938,
120 0x0915, 0x0924, 0x0947, 0x0939, 0x0948, 0x0902},
121 IDNA_ACE_PREFIX "i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd", 0,
122 IDNA_SUCCESS},
124 "Japanese (kanji and hiragana)", 18,
126 0x306A, 0x305C, 0x307F, 0x3093, 0x306A, 0x65E5, 0x672C, 0x8A9E,
127 0x3092, 0x8A71, 0x3057, 0x3066, 0x304F, 0x308C, 0x306A, 0x3044,
128 0x306E, 0x304B},
129 IDNA_ACE_PREFIX "n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa", 0, IDNA_SUCCESS},
131 "Korean (Hangul syllables)", 24,
133 0xC138, 0xACC4, 0xC758, 0xBAA8, 0xB4E0, 0xC0AC, 0xB78C, 0xB4E4,
134 0xC774, 0xD55C, 0xAD6D, 0xC5B4, 0xB97C, 0xC774, 0xD574, 0xD55C,
135 0xB2E4, 0xBA74, 0xC5BC, 0xB9C8, 0xB098, 0xC88B, 0xC744, 0xAE4C},
136 IDNA_ACE_PREFIX "989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt"
137 "30a5jpsd879ccm6fea98c", 0, IDNA_PUNYCODE_ERROR, IDNA_PUNYCODE_ERROR},
138 /* too long output */
140 "Russian (Cyrillic)", 28,
142 0x043F, 0x043E, 0x0447, 0x0435, 0x043C, 0x0443, 0x0436, 0x0435,
143 0x043E, 0x043D, 0x0438, 0x043D, 0x0435, 0x0433, 0x043E, 0x0432,
144 0x043E, 0x0440, 0x044F, 0x0442, 0x043F, 0x043E, 0x0440, 0x0443,
145 0x0441, 0x0441, 0x043A, 0x0438},
146 IDNA_ACE_PREFIX "b1abfaaepdrnnbgefbadotcwatmq2g4l", 0,
147 IDNA_SUCCESS, IDNA_SUCCESS},
149 "Spanish: Porqu[EACUTE]nopuedensimplementehablarenEspa[NTILDE]ol", 40,
151 0x0050, 0x006F, 0x0072, 0x0071, 0x0075, 0x00E9, 0x006E, 0x006F,
152 0x0070, 0x0075, 0x0065, 0x0064, 0x0065, 0x006E, 0x0073, 0x0069,
153 0x006D, 0x0070, 0x006C, 0x0065, 0x006D, 0x0065, 0x006E, 0x0074,
154 0x0065, 0x0068, 0x0061, 0x0062, 0x006C, 0x0061, 0x0072, 0x0065,
155 0x006E, 0x0045, 0x0073, 0x0070, 0x0061, 0x00F1, 0x006F, 0x006C},
156 IDNA_ACE_PREFIX "PorqunopuedensimplementehablarenEspaol-fmd56a", 0,
157 IDNA_SUCCESS},
159 "Vietnamese", 31,
161 0x0054, 0x1EA1, 0x0069, 0x0073, 0x0061, 0x006F, 0x0068, 0x1ECD,
162 0x006B, 0x0068, 0x00F4, 0x006E, 0x0067, 0x0074, 0x0068, 0x1EC3,
163 0x0063, 0x0068, 0x1EC9, 0x006E, 0x00F3, 0x0069, 0x0074, 0x0069,
164 0x1EBF, 0x006E, 0x0067, 0x0056, 0x0069, 0x1EC7, 0x0074},
165 IDNA_ACE_PREFIX "TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g", 0,
166 IDNA_SUCCESS},
168 "Japanese 3[NEN]B[GUMI][KINPACHI][SENSEI]", 8,
170 0x0033, 0x5E74, 0x0042, 0x7D44, 0x91D1, 0x516B, 0x5148, 0x751F},
171 IDNA_ACE_PREFIX "3B-ww4c5e180e575a65lsy2b", 0, IDNA_SUCCESS,
172 IDNA_SUCCESS},
174 "Japanese [AMURO][NAMIE]-with-SUPER-MONKEYS", 24,
176 0x5B89, 0x5BA4, 0x5948, 0x7F8E, 0x6075, 0x002D, 0x0077, 0x0069,
177 0x0074, 0x0068, 0x002D, 0x0053, 0x0055, 0x0050, 0x0045, 0x0052,
178 0x002D, 0x004D, 0x004F, 0x004E, 0x004B, 0x0045, 0x0059, 0x0053},
179 IDNA_ACE_PREFIX "-with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n", 0,
180 IDNA_SUCCESS},
182 "Japanese Hello-Another-Way-[SOREZORE][NO][BASHO]", 25,
184 0x0048, 0x0065, 0x006C, 0x006C, 0x006F, 0x002D, 0x0041, 0x006E,
185 0x006F, 0x0074, 0x0068, 0x0065, 0x0072, 0x002D, 0x0057, 0x0061,
186 0x0079, 0x002D, 0x305D, 0x308C, 0x305E, 0x308C, 0x306E, 0x5834,
187 0x6240},
188 IDNA_ACE_PREFIX "Hello-Another-Way--fc4qua05auwb3674vfr0b", 0,
189 IDNA_SUCCESS},
191 "Japanese [HITOTSU][YANE][NO][SHITA]2", 8,
193 0x3072, 0x3068, 0x3064, 0x5C4B, 0x6839, 0x306E, 0x4E0B, 0x0032},
194 IDNA_ACE_PREFIX "2-u9tlzr9756bt3uc0v", 0, IDNA_SUCCESS, IDNA_SUCCESS},
196 "Japanese Maji[DE]Koi[SURU]5[BYOU][MAE]", 13,
198 0x004D, 0x0061, 0x006A, 0x0069, 0x3067, 0x004B, 0x006F, 0x0069,
199 0x3059, 0x308B, 0x0035, 0x79D2, 0x524D},
200 IDNA_ACE_PREFIX "MajiKoi5-783gue6qz075azm5e", 0, IDNA_SUCCESS,
201 IDNA_SUCCESS},
203 "Japanese [PAFII]de[RUNBA]", 9,
205 0x30D1, 0x30D5, 0x30A3, 0x30FC, 0x0064, 0x0065, 0x30EB, 0x30F3, 0x30D0},
206 IDNA_ACE_PREFIX "de-jg4avhby1noc0d", 0, IDNA_SUCCESS, IDNA_SUCCESS},
208 "Japanese [SONO][SUPIIDO][DE]", 7,
210 0x305D, 0x306E, 0x30B9, 0x30D4, 0x30FC, 0x30C9, 0x3067},
211 IDNA_ACE_PREFIX "d9juau41awczczp", 0, IDNA_SUCCESS, IDNA_SUCCESS},
213 "Greek", 8,
215 0x03b5, 0x03bb, 0x03bb, 0x03b7, 0x03bd, 0x03b9, 0x03ba, 0x03ac},
216 IDNA_ACE_PREFIX "hxargifdar", 0, IDNA_SUCCESS, IDNA_SUCCESS},
218 "Maltese (Malti)", 10,
220 0x0062, 0x006f, 0x006e, 0x0121, 0x0075, 0x0073, 0x0061, 0x0127,
221 0x0127, 0x0061},
222 IDNA_ACE_PREFIX "bonusaa-5bb1da", 0, IDNA_SUCCESS, IDNA_SUCCESS},
224 "Russian (Cyrillic)", 28,
226 0x043f, 0x043e, 0x0447, 0x0435, 0x043c, 0x0443, 0x0436, 0x0435,
227 0x043e, 0x043d, 0x0438, 0x043d, 0x0435, 0x0433, 0x043e, 0x0432,
228 0x043e, 0x0440, 0x044f, 0x0442, 0x043f, 0x043e, 0x0440, 0x0443,
229 0x0441, 0x0441, 0x043a, 0x0438},
230 IDNA_ACE_PREFIX "b1abfaaepdrnnbgefbadotcwatmq2g4l", 0,
231 IDNA_SUCCESS, IDNA_SUCCESS},
232 #if 0
234 "(S) -> $1.00 <-", 11,
236 0x002D, 0x003E, 0x0020, 0x0024, 0x0031, 0x002E, 0x0030, 0x0030,
237 0x0020, 0x003C, 0x002D},
238 IDNA_ACE_PREFIX "-> $1.00 <--", 0, IDNA_SUCCESS, IDNA_SUCCESS},
239 #endif
240 { /* XXX depends on IDNA_ACE_PREFIX */
241 "ToASCII() with ACE prefix", 4 + 3,
243 'x', 'n', '-', '-', 'f', 'o', 0x3067},
244 IDNA_ACE_PREFIX "too long too long too long too long too long too "
245 "long too long too long too long too long ", 0,
246 IDNA_CONTAINS_ACE_PREFIX, IDNA_PUNYCODE_ERROR}
250 main (int argc, char *argv[])
252 char label[100];
253 uint32_t *ucs4label = NULL;
254 uint32_t tmp[100];
255 size_t len, len2, i;
256 int rc;
259 if (strcmp (argv[argc - 1], "-v") == 0 ||
260 strcmp (argv[argc - 1], "--verbose") == 0)
261 debug = 1;
262 else if (strcmp (argv[argc - 1], "-b") == 0 ||
263 strcmp (argv[argc - 1], "--break-on-error") == 0)
264 break_on_error = 1;
265 else if (strcmp (argv[argc - 1], "-h") == 0 ||
266 strcmp (argv[argc - 1], "-?") == 0 ||
267 strcmp (argv[argc - 1], "--help") == 0)
269 printf ("Usage: %s [-vbh?] [--verbose] [--break-on-error] [--help]\n",
270 argv[0]);
271 return 1;
273 while (argc-- > 1);
275 for (i = 0; i < sizeof (idna) / sizeof (idna[0]); i++)
277 if (debug)
278 printf ("IDNA entry %d: %s\n", i, idna[i].name);
280 if (debug)
282 printf ("in:\n");
283 ucs4print (idna[i].in, idna[i].inlen);
286 rc = idna_to_ascii_4i (idna[i].in, idna[i].inlen, label, idna[i].flags);
287 if (rc != idna[i].toasciirc)
289 fail ("IDNA entry %d failed: %d\n", i, rc);
290 if (debug)
291 printf ("FATAL\n");
292 continue;
295 if (debug && rc == IDNA_SUCCESS)
297 printf ("computed out: %s\n", label);
298 printf ("expected out: %s\n", idna[i].out);
300 else if (debug)
301 printf ("returned %d expected %d\n", rc, idna[i].toasciirc);
303 if (rc == IDNA_SUCCESS)
305 if (strlen (idna[i].out) != strlen (label) ||
306 strcasecmp (idna[i].out, label) != 0)
308 fail ("IDNA entry %d failed\n", i);
309 if (debug)
310 printf ("ERROR\n");
312 else if (debug)
313 printf ("OK\n");
315 else if (debug)
316 printf ("OK\n");
318 if (ucs4label)
319 free (ucs4label);
321 ucs4label = stringprep_utf8_to_ucs4 (idna[i].out, -1, &len);
323 if (debug)
325 printf ("in: %s (%d==%d)\n", idna[i].out, strlen (idna[i].out),
326 len);
327 ucs4print (ucs4label, len);
330 len2 = sizeof (tmp) / sizeof (tmp[0]);
331 rc = idna_to_unicode_44i (ucs4label, len, tmp, &len2, idna[i].flags);
332 if (debug)
334 printf ("expected out (%d):\n",
335 rc == IDNA_SUCCESS ? idna[i].inlen : len);
336 if (rc == IDNA_SUCCESS)
337 ucs4print (idna[i].in, idna[i].inlen);
338 else
339 ucs4print (ucs4label, len);
341 printf ("computed out (%d):\n", len2);
342 ucs4print (tmp, len2);
345 if (rc != idna[i].tounicoderc)
347 fail ("IDNA entry %d failed: %d\n", i, rc);
348 if (debug)
349 printf ("FATAL\n");
350 continue;
353 if ((rc == IDNA_SUCCESS && (len2 != idna[i].inlen ||
354 memcmp (idna[i].in, tmp, len2) != 0)) ||
355 (rc != IDNA_SUCCESS && (len2 != len ||
356 memcmp (ucs4label, tmp, len) != 0)))
358 if (debug)
360 if (rc == IDNA_SUCCESS)
361 printf ("len=%d len2=%d\n", len2, idna[i].inlen);
362 else
363 printf ("len=%d len2=%d\n", len, len2);
365 fail ("IDNA entry %d failed\n", i);
366 if (debug)
367 printf ("ERROR\n");
369 else if (debug)
370 printf ("OK\n\n");
373 if (ucs4label)
374 free (ucs4label);
376 if (debug)
377 printf ("IDNA self tests done with %d errors\n", error_count);
379 return error_count ? 1 : 0;