*** empty log message ***
[libidn.git] / tst_idna.c
blob2ad48592f08b672de4f83d78cbd3148d6aae1a0e
1 /* tst_idna.c Libstringprep self tests for idna_to_ascii().
2 * Copyright (C) 2002 Simon Josefsson
4 * This file is part of Libstringprep.
6 * Libstringprep is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * Libstringprep is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with Libstringprep; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 #include "internal.h"
24 static int debug = 0;
25 static int error_count = 0;
26 static int break_on_error = 0;
28 static void
29 fail (const char *format, ...)
31 va_list arg_ptr;
33 va_start (arg_ptr, format);
34 vfprintf (stderr, format, arg_ptr);
35 va_end (arg_ptr);
36 error_count++;
37 if (break_on_error)
38 exit (1);
41 static void
42 ucs4print (unsigned long *str, ssize_t len)
44 int i;
46 printf ("\t;; ");
47 for (i = 0; len >= 0 ? i < len : str[i]; i++)
49 printf ("U+%04lux ", str[i]);
50 if ((i + 1) % 4 == 0)
51 printf (" ");
52 if ((i + 1) % 8 == 0 && i + 1 < len)
53 printf ("\n\t;; ");
55 puts ("");
58 struct idna
60 char *name;
61 size_t inlen;
62 unsigned long in[100];
63 char *out;
64 int allowunassigned;
65 int usestd3asciirules;
66 int toasciirc;
67 int tounicoderc;
69 idna[] =
72 "(A) Arabic (Egyptian)", 17,
74 0x0644, 0x064A, 0x0647, 0x0645, 0x0627, 0x0628, 0x062A, 0x0643,
75 0x0644, 0x0645, 0x0648, 0x0634, 0x0639, 0x0631, 0x0628, 0x064A,
76 0x061F}
77 , IDNA_ACE_PREFIX "egbpdaj6bu4bxfgehfvwxn", 0, 0, IDNA_SUCCESS, IDNA_SUCCESS}
80 "(B) Chinese (simplified)", 9,
82 0x4ED6, 0x4EEC, 0x4E3A, 0x4EC0, 0x4E48, 0x4E0D, 0x8BF4, 0x4E2D, 0x6587}
83 , IDNA_ACE_PREFIX "ihqwcrb4cv8a8dqg056pqjye", 0, 0, IDNA_SUCCESS, IDNA_SUCCESS}
86 "(C) Chinese (traditional)", 9,
88 0x4ED6, 0x5011, 0x7232, 0x4EC0, 0x9EBD, 0x4E0D, 0x8AAA, 0x4E2D, 0x6587}
89 , IDNA_ACE_PREFIX "ihqwctvzc91f659drss3x8bo0yb", 0, 0, IDNA_SUCCESS, IDNA_SUCCESS}
92 "(D) Czech: Pro<ccaron>prost<ecaron>nemluv<iacute><ccaron>esky", 22,
94 0x0050, 0x0072, 0x006F, 0x010D, 0x0070, 0x0072, 0x006F, 0x0073,
95 0x0074, 0x011B, 0x006E, 0x0065, 0x006D, 0x006C, 0x0075, 0x0076,
96 0x00ED, 0x010D, 0x0065, 0x0073, 0x006B, 0x0079}
97 , IDNA_ACE_PREFIX "Proprostnemluvesky-uyb24dma41a", 0, 0, IDNA_SUCCESS, IDNA_SUCCESS}
100 "(E) Hebrew:", 22,
102 0x05DC, 0x05DE, 0x05D4, 0x05D4, 0x05DD, 0x05E4, 0x05E9, 0x05D5,
103 0x05D8, 0x05DC, 0x05D0, 0x05DE, 0x05D3, 0x05D1, 0x05E8, 0x05D9,
104 0x05DD, 0x05E2, 0x05D1, 0x05E8, 0x05D9, 0x05EA}
105 , IDNA_ACE_PREFIX "4dbcagdahymbxekheh6e0a7fei0b", 0, 0, IDNA_SUCCESS, IDNA_SUCCESS}
108 "(F) Hindi (Devanagari):", 30,
110 0x092F, 0x0939, 0x0932, 0x094B, 0x0917, 0x0939, 0x093F, 0x0928,
111 0x094D, 0x0926, 0x0940, 0x0915, 0x094D, 0x092F, 0x094B, 0x0902,
112 0x0928, 0x0939, 0x0940, 0x0902, 0x092C, 0x094B, 0x0932, 0x0938,
113 0x0915, 0x0924, 0x0947, 0x0939, 0x0948, 0x0902}
114 , IDNA_ACE_PREFIX "i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd", 0, 0,
115 IDNA_SUCCESS}
118 "(G) Japanese (kanji and hiragana):", 18,
120 0x306A, 0x305C, 0x307F, 0x3093, 0x306A, 0x65E5, 0x672C, 0x8A9E,
121 0x3092, 0x8A71, 0x3057, 0x3066, 0x304F, 0x308C, 0x306A, 0x3044,
122 0x306E, 0x304B}
123 , IDNA_ACE_PREFIX "n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa", 0, 0,
124 IDNA_SUCCESS}
127 "(H) Korean (Hangul syllables):", 24,
129 0xC138, 0xACC4, 0xC758, 0xBAA8, 0xB4E0, 0xC0AC, 0xB78C, 0xB4E4,
130 0xC774, 0xD55C, 0xAD6D, 0xC5B4, 0xB97C, 0xC774, 0xD574, 0xD55C,
131 0xB2E4, 0xBA74, 0xC5BC, 0xB9C8, 0xB098, 0xC88B, 0xC744, 0xAE4C}
132 , IDNA_ACE_PREFIX "989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt"
133 "30a5jpsd879ccm6fea98c", 0, 0, IDNA_PUNYCODE_ERROR, IDNA_PUNYCODE_ERROR}
134 /* too long output */
137 "(I) Russian (Cyrillic):", 28,
139 0x043F, 0x043E, 0x0447, 0x0435, 0x043C, 0x0443, 0x0436, 0x0435,
140 0x043E, 0x043D, 0x0438, 0x043D, 0x0435, 0x0433, 0x043E, 0x0432,
141 0x043E, 0x0440, 0x044F, 0x0442, 0x043F, 0x043E, 0x0440, 0x0443,
142 0x0441, 0x0441, 0x043A, 0x0438}
143 , IDNA_ACE_PREFIX "b1abfaaepdrnnbgefbadotcwatmq2g4l", 0, 0, IDNA_SUCCESS, IDNA_SUCCESS}
146 "(J) Spanish: Porqu<eacute>nopuedensimplementehablarenEspa<ntilde>ol", 40,
148 0x0050, 0x006F, 0x0072, 0x0071, 0x0075, 0x00E9, 0x006E, 0x006F,
149 0x0070, 0x0075, 0x0065, 0x0064, 0x0065, 0x006E, 0x0073, 0x0069,
150 0x006D, 0x0070, 0x006C, 0x0065, 0x006D, 0x0065, 0x006E, 0x0074,
151 0x0065, 0x0068, 0x0061, 0x0062, 0x006C, 0x0061, 0x0072, 0x0065,
152 0x006E, 0x0045, 0x0073, 0x0070, 0x0061, 0x00F1, 0x006F, 0x006C}
153 , IDNA_ACE_PREFIX "PorqunopuedensimplementehablarenEspaol-fmd56a", 0, 0,
154 IDNA_SUCCESS}
157 "(K) Vietnamese:", 31,
159 0x0054, 0x1EA1, 0x0069, 0x0073, 0x0061, 0x006F, 0x0068, 0x1ECD,
160 0x006B, 0x0068, 0x00F4, 0x006E, 0x0067, 0x0074, 0x0068, 0x1EC3,
161 0x0063, 0x0068, 0x1EC9, 0x006E, 0x00F3, 0x0069, 0x0074, 0x0069,
162 0x1EBF, 0x006E, 0x0067, 0x0056, 0x0069, 0x1EC7, 0x0074}
163 , IDNA_ACE_PREFIX "TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g", 0, 0,
164 IDNA_SUCCESS}
167 "(L) 3<nen>B<gumi><kinpachi><sensei>", 8,
169 0x0033, 0x5E74, 0x0042, 0x7D44, 0x91D1, 0x516B, 0x5148, 0x751F}
170 , IDNA_ACE_PREFIX "3B-ww4c5e180e575a65lsy2b", 0, 0, IDNA_SUCCESS, IDNA_SUCCESS}
173 "(M) <amuro><namie>-with-SUPER-MONKEYS", 24,
175 0x5B89, 0x5BA4, 0x5948, 0x7F8E, 0x6075, 0x002D, 0x0077, 0x0069,
176 0x0074, 0x0068, 0x002D, 0x0053, 0x0055, 0x0050, 0x0045, 0x0052,
177 0x002D, 0x004D, 0x004F, 0x004E, 0x004B, 0x0045, 0x0059, 0x0053}
178 , IDNA_ACE_PREFIX "-with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n", 0, 0,
179 IDNA_SUCCESS}
182 "(N) Hello-Another-Way-<sorezore><no><basho>", 25,
184 0x0048, 0x0065, 0x006C, 0x006C, 0x006F, 0x002D, 0x0041, 0x006E,
185 0x006F, 0x0074, 0x0068, 0x0065, 0x0072, 0x002D, 0x0057, 0x0061,
186 0x0079, 0x002D, 0x305D, 0x308C, 0x305E, 0x308C, 0x306E, 0x5834,
187 0x6240}
188 , IDNA_ACE_PREFIX "Hello-Another-Way--fc4qua05auwb3674vfr0b", 0, 0,
189 IDNA_SUCCESS}
192 "(O) <hitotsu><yane><no><shita>2", 8,
194 0x3072, 0x3068, 0x3064, 0x5C4B, 0x6839, 0x306E, 0x4E0B, 0x0032}
195 , IDNA_ACE_PREFIX "2-u9tlzr9756bt3uc0v", 0, 0, IDNA_SUCCESS, IDNA_SUCCESS}
198 "(P) Maji<de>Koi<suru>5<byou><mae>", 13,
200 0x004D, 0x0061, 0x006A, 0x0069, 0x3067, 0x004B, 0x006F, 0x0069,
201 0x3059, 0x308B, 0x0035, 0x79D2, 0x524D}
202 , IDNA_ACE_PREFIX "MajiKoi5-783gue6qz075azm5e", 0, 0, IDNA_SUCCESS, IDNA_SUCCESS}
205 "(Q) <pafii>de<runba>", 9,
207 0x30D1, 0x30D5, 0x30A3, 0x30FC, 0x0064, 0x0065, 0x30EB, 0x30F3, 0x30D0}
208 , IDNA_ACE_PREFIX "de-jg4avhby1noc0d", 0, 0, IDNA_SUCCESS, IDNA_SUCCESS}
211 "(R) <sono><supiido><de>", 7,
213 0x305D, 0x306E, 0x30B9, 0x30D4, 0x30FC, 0x30C9, 0x3067}
214 , IDNA_ACE_PREFIX "d9juau41awczczp", 0, 0, IDNA_SUCCESS, IDNA_SUCCESS}
216 #if 0
218 "(S) -> $1.00 <-", 11,
220 0x002D, 0x003E, 0x0020, 0x0024, 0x0031, 0x002E, 0x0030, 0x0030,
221 0x0020, 0x003C, 0x002D}
222 , IDNA_ACE_PREFIX "-> $1.00 <--", 0, 0, IDNA_SUCCESS, IDNA_SUCCESS}
223 #endif
227 main (int argc, char *argv[])
229 char label[100];
230 unsigned long *ucs4label;
231 unsigned long tmp[100];
232 ssize_t len;
233 size_t len2;
234 int rc, i;
237 if (strcmp (argv[argc - 1], "-v") == 0 ||
238 strcmp (argv[argc - 1], "--verbose") == 0)
239 debug = 1;
240 else if (strcmp (argv[argc - 1], "-b") == 0 ||
241 strcmp (argv[argc - 1], "--break-on-error") == 0)
242 break_on_error = 1;
243 else if (strcmp (argv[argc - 1], "-h") == 0 ||
244 strcmp (argv[argc - 1], "-?") == 0 ||
245 strcmp (argv[argc - 1], "--help") == 0)
247 printf ("Usage: %s [-vbh?] [--verbose] [--break-on-error] [--help]\n",
248 argv[0]);
249 return 1;
251 while (argc-- > 1);
253 for (i = 0; i < sizeof (idna) / sizeof (idna[0]); i++)
255 if (debug)
256 printf ("IDNA entry %d: %s\n", i, idna[i].name);
258 if (debug)
260 printf ("in:\n");
261 ucs4print (idna[i].in, idna[i].inlen);
264 rc = idna_to_ascii (idna[i].in, idna[i].inlen, label,
265 idna[i].allowunassigned, idna[i].usestd3asciirules);
266 if (rc != idna[i].toasciirc)
268 fail ("IDNA entry %d failed: %d\n", i, rc);
269 if (debug)
270 printf ("FATAL\n");
271 continue;
274 if (debug && rc == IDNA_SUCCESS)
276 printf ("computed out: %s\n", label);
277 printf ("expected out: %s\n", idna[i].out);
279 else if (debug)
280 printf ("returned %d expected %d\n", rc, idna[i].toasciirc);
282 if (rc == IDNA_SUCCESS)
284 if (strlen (idna[i].out) != strlen (label) ||
285 strcasecmp (idna[i].out, label) != 0)
287 fail ("IDNA entry %d failed\n", i);
288 if (debug)
289 printf ("ERROR\n");
291 else if (debug)
292 printf ("OK\n\n");
294 else if (debug)
295 printf ("OK\n\n");
297 ucs4label = stringprep_utf8_to_ucs4 (idna[i].out, -1, &len);
299 if (debug)
301 printf ("in: %s (%d==%d)\n", idna[i].out, strlen(idna[i].out), len);
302 ucs4print (ucs4label, len);
305 len2 = sizeof(tmp);
306 rc = idna_to_unicode (ucs4label, len, tmp, &len2,
307 idna[i].allowunassigned,
308 idna[i].usestd3asciirules);
309 if (rc != idna[i].tounicoderc)
311 fail ("IDNA entry %d failed: %d\n", i, rc);
312 if (debug)
313 printf ("FATAL\n");
314 continue;
317 if (debug)
319 printf ("expected out:\n");
320 if (rc == IDNA_SUCCESS)
321 ucs4print (idna[i].in, idna[i].inlen);
322 else
323 ucs4print (ucs4label, len);
325 printf ("computed out:\n");
326 ucs4print (tmp, len2);
329 if ((rc == IDNA_SUCCESS && (len2 != idna[i].inlen ||
330 memcmp(idna[i].in, tmp, len2) != 0)) ||
331 (rc != IDNA_SUCCESS && (len2 != len ||
332 memcmp(ucs4label, tmp, len) != 0)))
334 fail ("IDNA entry %d failed\n", i);
335 if (debug)
336 printf ("ERROR\n");
341 if (debug)
342 printf ("IDNA self tests done with %d errors\n", error_count);
344 return error_count ? 1 : 0;