Add lgpl and gpl from gnulib.
[libidn.git] / tests / tst_idna.c
blobaf5832e139c7714c8fdfd40938de310825ef764a
1 /* tst_idna.c --- Self tests for idna_to_ascii().
2 * Copyright (C) 2002, 2003, 2004 Simon Josefsson
4 * This file is part of GNU Libidn.
6 * GNU Libidn is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * GNU Libidn is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with GNU Libidn; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
22 #ifdef HAVE_CONFIG_H
23 # include "config.h"
24 #endif
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <stdarg.h>
29 #include <string.h>
31 #include <stringprep.h>
32 #include <idna.h>
34 #include "utils.h"
36 struct idna
38 const char *name;
39 size_t inlen;
40 uint32_t in[100];
41 const char *out;
42 int flags;
43 int toasciirc;
44 int tounicoderc;
47 static const struct idna idna[] = {
49 "Arabic (Egyptian)", 17,
51 0x0644, 0x064A, 0x0647, 0x0645, 0x0627, 0x0628, 0x062A, 0x0643,
52 0x0644, 0x0645, 0x0648, 0x0634, 0x0639, 0x0631, 0x0628, 0x064A,
53 0x061F},
54 IDNA_ACE_PREFIX "egbpdaj6bu4bxfgehfvwxn", 0, IDNA_SUCCESS, IDNA_SUCCESS},
56 "Chinese (simplified)", 9,
58 0x4ED6, 0x4EEC, 0x4E3A, 0x4EC0, 0x4E48, 0x4E0D, 0x8BF4, 0x4E2D, 0x6587},
59 IDNA_ACE_PREFIX "ihqwcrb4cv8a8dqg056pqjye", 0, IDNA_SUCCESS, IDNA_SUCCESS},
61 "Chinese (traditional)", 9,
63 0x4ED6, 0x5011, 0x7232, 0x4EC0, 0x9EBD, 0x4E0D, 0x8AAA, 0x4E2D, 0x6587},
64 IDNA_ACE_PREFIX "ihqwctvzc91f659drss3x8bo0yb", 0, IDNA_SUCCESS,
65 IDNA_SUCCESS},
67 "Czech: Pro[CCARON]prost[ECARON]nemluv[IACUTE][CCARON]esky", 22,
69 0x0050, 0x0072, 0x006F, 0x010D, 0x0070, 0x0072, 0x006F, 0x0073,
70 0x0074, 0x011B, 0x006E, 0x0065, 0x006D, 0x006C, 0x0075, 0x0076,
71 0x00ED, 0x010D, 0x0065, 0x0073, 0x006B, 0x0079},
72 IDNA_ACE_PREFIX "Proprostnemluvesky-uyb24dma41a", 0, IDNA_SUCCESS,
73 IDNA_SUCCESS},
75 "Hebrew", 22,
77 0x05DC, 0x05DE, 0x05D4, 0x05D4, 0x05DD, 0x05E4, 0x05E9, 0x05D5,
78 0x05D8, 0x05DC, 0x05D0, 0x05DE, 0x05D3, 0x05D1, 0x05E8, 0x05D9,
79 0x05DD, 0x05E2, 0x05D1, 0x05E8, 0x05D9, 0x05EA},
80 IDNA_ACE_PREFIX "4dbcagdahymbxekheh6e0a7fei0b", 0, IDNA_SUCCESS,
81 IDNA_SUCCESS},
83 "Hindi (Devanagari)", 30,
85 0x092F, 0x0939, 0x0932, 0x094B, 0x0917, 0x0939, 0x093F, 0x0928,
86 0x094D, 0x0926, 0x0940, 0x0915, 0x094D, 0x092F, 0x094B, 0x0902,
87 0x0928, 0x0939, 0x0940, 0x0902, 0x092C, 0x094B, 0x0932, 0x0938,
88 0x0915, 0x0924, 0x0947, 0x0939, 0x0948, 0x0902},
89 IDNA_ACE_PREFIX "i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd", 0,
90 IDNA_SUCCESS},
92 "Japanese (kanji and hiragana)", 18,
94 0x306A, 0x305C, 0x307F, 0x3093, 0x306A, 0x65E5, 0x672C, 0x8A9E,
95 0x3092, 0x8A71, 0x3057, 0x3066, 0x304F, 0x308C, 0x306A, 0x3044,
96 0x306E, 0x304B},
97 IDNA_ACE_PREFIX "n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa", 0, IDNA_SUCCESS},
99 "Korean (Hangul syllables)", 24,
101 0xC138, 0xACC4, 0xC758, 0xBAA8, 0xB4E0, 0xC0AC, 0xB78C, 0xB4E4,
102 0xC774, 0xD55C, 0xAD6D, 0xC5B4, 0xB97C, 0xC774, 0xD574, 0xD55C,
103 0xB2E4, 0xBA74, 0xC5BC, 0xB9C8, 0xB098, 0xC88B, 0xC744, 0xAE4C},
104 IDNA_ACE_PREFIX "989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt"
105 "30a5jpsd879ccm6fea98c", 0, IDNA_PUNYCODE_ERROR, IDNA_PUNYCODE_ERROR},
106 /* too long output */
108 "Russian (Cyrillic)", 28,
110 0x043F, 0x043E, 0x0447, 0x0435, 0x043C, 0x0443, 0x0436, 0x0435,
111 0x043E, 0x043D, 0x0438, 0x043D, 0x0435, 0x0433, 0x043E, 0x0432,
112 0x043E, 0x0440, 0x044F, 0x0442, 0x043F, 0x043E, 0x0440, 0x0443,
113 0x0441, 0x0441, 0x043A, 0x0438},
114 IDNA_ACE_PREFIX "b1abfaaepdrnnbgefbadotcwatmq2g4l", 0,
115 IDNA_SUCCESS, IDNA_SUCCESS},
117 "Spanish: Porqu[EACUTE]nopuedensimplementehablarenEspa[NTILDE]ol", 40,
119 0x0050, 0x006F, 0x0072, 0x0071, 0x0075, 0x00E9, 0x006E, 0x006F,
120 0x0070, 0x0075, 0x0065, 0x0064, 0x0065, 0x006E, 0x0073, 0x0069,
121 0x006D, 0x0070, 0x006C, 0x0065, 0x006D, 0x0065, 0x006E, 0x0074,
122 0x0065, 0x0068, 0x0061, 0x0062, 0x006C, 0x0061, 0x0072, 0x0065,
123 0x006E, 0x0045, 0x0073, 0x0070, 0x0061, 0x00F1, 0x006F, 0x006C},
124 IDNA_ACE_PREFIX "PorqunopuedensimplementehablarenEspaol-fmd56a", 0,
125 IDNA_SUCCESS},
127 "Vietnamese", 31,
129 0x0054, 0x1EA1, 0x0069, 0x0073, 0x0061, 0x006F, 0x0068, 0x1ECD,
130 0x006B, 0x0068, 0x00F4, 0x006E, 0x0067, 0x0074, 0x0068, 0x1EC3,
131 0x0063, 0x0068, 0x1EC9, 0x006E, 0x00F3, 0x0069, 0x0074, 0x0069,
132 0x1EBF, 0x006E, 0x0067, 0x0056, 0x0069, 0x1EC7, 0x0074},
133 IDNA_ACE_PREFIX "TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g", 0,
134 IDNA_SUCCESS},
136 "Japanese 3[NEN]B[GUMI][KINPACHI][SENSEI]", 8,
138 0x0033, 0x5E74, 0x0042, 0x7D44, 0x91D1, 0x516B, 0x5148, 0x751F},
139 IDNA_ACE_PREFIX "3B-ww4c5e180e575a65lsy2b", 0, IDNA_SUCCESS,
140 IDNA_SUCCESS},
142 "Japanese [AMURO][NAMIE]-with-SUPER-MONKEYS", 24,
144 0x5B89, 0x5BA4, 0x5948, 0x7F8E, 0x6075, 0x002D, 0x0077, 0x0069,
145 0x0074, 0x0068, 0x002D, 0x0053, 0x0055, 0x0050, 0x0045, 0x0052,
146 0x002D, 0x004D, 0x004F, 0x004E, 0x004B, 0x0045, 0x0059, 0x0053},
147 IDNA_ACE_PREFIX "-with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n", 0,
148 IDNA_SUCCESS},
150 "Japanese Hello-Another-Way-[SOREZORE][NO][BASHO]", 25,
152 0x0048, 0x0065, 0x006C, 0x006C, 0x006F, 0x002D, 0x0041, 0x006E,
153 0x006F, 0x0074, 0x0068, 0x0065, 0x0072, 0x002D, 0x0057, 0x0061,
154 0x0079, 0x002D, 0x305D, 0x308C, 0x305E, 0x308C, 0x306E, 0x5834,
155 0x6240},
156 IDNA_ACE_PREFIX "Hello-Another-Way--fc4qua05auwb3674vfr0b", 0,
157 IDNA_SUCCESS},
159 "Japanese [HITOTSU][YANE][NO][SHITA]2", 8,
161 0x3072, 0x3068, 0x3064, 0x5C4B, 0x6839, 0x306E, 0x4E0B, 0x0032},
162 IDNA_ACE_PREFIX "2-u9tlzr9756bt3uc0v", 0, IDNA_SUCCESS, IDNA_SUCCESS},
164 "Japanese Maji[DE]Koi[SURU]5[BYOU][MAE]", 13,
166 0x004D, 0x0061, 0x006A, 0x0069, 0x3067, 0x004B, 0x006F, 0x0069,
167 0x3059, 0x308B, 0x0035, 0x79D2, 0x524D},
168 IDNA_ACE_PREFIX "MajiKoi5-783gue6qz075azm5e", 0, IDNA_SUCCESS,
169 IDNA_SUCCESS},
171 "Japanese [PAFII]de[RUNBA]", 9,
173 0x30D1, 0x30D5, 0x30A3, 0x30FC, 0x0064, 0x0065, 0x30EB, 0x30F3, 0x30D0},
174 IDNA_ACE_PREFIX "de-jg4avhby1noc0d", 0, IDNA_SUCCESS, IDNA_SUCCESS},
176 "Japanese [SONO][SUPIIDO][DE]", 7,
178 0x305D, 0x306E, 0x30B9, 0x30D4, 0x30FC, 0x30C9, 0x3067},
179 IDNA_ACE_PREFIX "d9juau41awczczp", 0, IDNA_SUCCESS, IDNA_SUCCESS},
181 "Greek", 8,
183 0x03b5, 0x03bb, 0x03bb, 0x03b7, 0x03bd, 0x03b9, 0x03ba, 0x03ac},
184 IDNA_ACE_PREFIX "hxargifdar", 0, IDNA_SUCCESS, IDNA_SUCCESS},
186 "Maltese (Malti)", 10,
188 0x0062, 0x006f, 0x006e, 0x0121, 0x0075, 0x0073, 0x0061, 0x0127,
189 0x0127, 0x0061},
190 IDNA_ACE_PREFIX "bonusaa-5bb1da", 0, IDNA_SUCCESS, IDNA_SUCCESS},
192 "Russian (Cyrillic)", 28,
194 0x043f, 0x043e, 0x0447, 0x0435, 0x043c, 0x0443, 0x0436, 0x0435,
195 0x043e, 0x043d, 0x0438, 0x043d, 0x0435, 0x0433, 0x043e, 0x0432,
196 0x043e, 0x0440, 0x044f, 0x0442, 0x043f, 0x043e, 0x0440, 0x0443,
197 0x0441, 0x0441, 0x043a, 0x0438},
198 IDNA_ACE_PREFIX "b1abfaaepdrnnbgefbadotcwatmq2g4l", 0,
199 IDNA_SUCCESS, IDNA_SUCCESS},
200 #if 0
202 "(S) -> $1.00 <-", 11,
204 0x002D, 0x003E, 0x0020, 0x0024, 0x0031, 0x002E, 0x0030, 0x0030,
205 0x0020, 0x003C, 0x002D},
206 IDNA_ACE_PREFIX "-> $1.00 <--", 0, IDNA_SUCCESS, IDNA_SUCCESS},
207 #endif
208 { /* XXX depends on IDNA_ACE_PREFIX */
209 "ToASCII() with ACE prefix", 4 + 3,
211 'x', 'n', '-', '-', 'f', 'o', 0x3067},
212 IDNA_ACE_PREFIX "too long too long too long too long too long too "
213 "long too long too long too long too long ", 0,
214 IDNA_CONTAINS_ACE_PREFIX, IDNA_PUNYCODE_ERROR}
217 void
218 doit (void)
220 char label[100];
221 uint32_t *ucs4label = NULL;
222 uint32_t tmp[100];
223 size_t len, len2, i;
224 int rc;
226 for (i = 0; i < sizeof (idna) / sizeof (idna[0]); i++)
228 if (debug)
229 printf ("IDNA entry %d: %s\n", i, idna[i].name);
231 if (debug)
233 printf ("in:\n");
234 ucs4print (idna[i].in, idna[i].inlen);
237 rc = idna_to_ascii_4i (idna[i].in, idna[i].inlen, label, idna[i].flags);
238 if (rc != idna[i].toasciirc)
240 fail ("IDNA entry %d failed: %d\n", i, rc);
241 if (debug)
242 printf ("FATAL\n");
243 continue;
246 if (debug && rc == IDNA_SUCCESS)
248 printf ("computed out: %s\n", label);
249 printf ("expected out: %s\n", idna[i].out);
251 else if (debug)
252 printf ("returned %d expected %d\n", rc, idna[i].toasciirc);
254 if (rc == IDNA_SUCCESS)
256 if (strlen (idna[i].out) != strlen (label) ||
257 strcasecmp (idna[i].out, label) != 0)
259 fail ("IDNA entry %d failed\n", i);
260 if (debug)
261 printf ("ERROR\n");
263 else if (debug)
264 printf ("OK\n");
266 else if (debug)
267 printf ("OK\n");
269 if (ucs4label)
270 free (ucs4label);
272 ucs4label = stringprep_utf8_to_ucs4 (idna[i].out, -1, &len);
274 if (debug)
276 printf ("in: %s (%d==%d)\n", idna[i].out, strlen (idna[i].out),
277 len);
278 ucs4print (ucs4label, len);
281 len2 = sizeof (tmp) / sizeof (tmp[0]);
282 rc = idna_to_unicode_44i (ucs4label, len, tmp, &len2, idna[i].flags);
283 if (debug)
285 printf ("expected out (%d):\n",
286 rc == IDNA_SUCCESS ? idna[i].inlen : len);
287 if (rc == IDNA_SUCCESS)
288 ucs4print (idna[i].in, idna[i].inlen);
289 else
290 ucs4print (ucs4label, len);
292 printf ("computed out (%d):\n", len2);
293 ucs4print (tmp, len2);
296 if (rc != idna[i].tounicoderc)
298 fail ("IDNA entry %d failed: %d\n", i, rc);
299 if (debug)
300 printf ("FATAL\n");
301 continue;
304 if ((rc == IDNA_SUCCESS && (len2 != idna[i].inlen ||
305 memcmp (idna[i].in, tmp, len2) != 0)) ||
306 (rc != IDNA_SUCCESS && (len2 != len ||
307 memcmp (ucs4label, tmp, len) != 0)))
309 if (debug)
311 if (rc == IDNA_SUCCESS)
312 printf ("len=%d len2=%d\n", len2, idna[i].inlen);
313 else
314 printf ("len=%d len2=%d\n", len, len2);
316 fail ("IDNA entry %d failed\n", i);
317 if (debug)
318 printf ("ERROR\n");
320 else if (debug)
321 printf ("OK\n\n");
324 if (ucs4label)
325 free (ucs4label);