1 /* tst_idna.c Self tests for idna_to_ascii().
2 * Copyright (C) 2002, 2003 Simon Josefsson
4 * This file is part of GNU Libidn.
6 * GNU Libidn is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * GNU Libidn is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with GNU Libidn; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 static int error_count
= 0;
26 static int break_on_error
= 0;
29 fail (const char *format
, ...)
33 va_start (arg_ptr
, format
);
34 vfprintf (stderr
, format
, arg_ptr
);
42 escapeprint (char *str
, int len
)
46 printf ("(length %d bytes):\n", len
);
48 for (i
= 0; i
< len
; i
++)
50 if (((str
[i
] & 0xFF) >= 'A' && (str
[i
] & 0xFF) <= 'Z') ||
51 ((str
[i
] & 0xFF) >= 'a' && (str
[i
] & 0xFF) <= 'z') ||
52 ((str
[i
] & 0xFF) >= '0' && (str
[i
] & 0xFF) <= '9')
53 || (str
[i
] & 0xFF) == ' ' || (str
[i
] & 0xFF) == '.')
54 printf ("%c", (str
[i
] & 0xFF));
56 printf ("\\x%02X", (str
[i
] & 0xFF));
57 if ((i
+1)%16 == 0 && (i
+1) < len
)
63 ucs4print (unsigned long *str
, ssize_t len
)
68 for (i
= 0; len
>= 0 ? i
< len
: str
[i
]; i
++)
70 printf ("U+%04lu ", str
[i
] & 0xFFFF);
73 if ((i
+ 1) % 8 == 0 && i
+ 1 < len
)
83 unsigned long in
[100];
86 int usestd3asciirules
;
92 "Arabic (Egyptian)", 17,
94 0x0644, 0x064A, 0x0647, 0x0645, 0x0627, 0x0628, 0x062A, 0x0643,
95 0x0644, 0x0645, 0x0648, 0x0634, 0x0639, 0x0631, 0x0628, 0x064A,
97 IDNA_ACE_PREFIX
"egbpdaj6bu4bxfgehfvwxn", 0, 0, IDNA_SUCCESS
,
100 "Chinese (simplified)", 9,
102 0x4ED6, 0x4EEC, 0x4E3A, 0x4EC0, 0x4E48, 0x4E0D, 0x8BF4, 0x4E2D, 0x6587},
103 IDNA_ACE_PREFIX
"ihqwcrb4cv8a8dqg056pqjye", 0, 0, IDNA_SUCCESS
,
106 "Chinese (traditional)", 9,
108 0x4ED6, 0x5011, 0x7232, 0x4EC0, 0x9EBD, 0x4E0D, 0x8AAA, 0x4E2D, 0x6587},
109 IDNA_ACE_PREFIX
"ihqwctvzc91f659drss3x8bo0yb", 0, 0, IDNA_SUCCESS
,
114 ": Pro[CCARON]prost[ECARON]nemluv[IACUTE][CCARON]esky"
118 0x0050, 0x0072, 0x006F, 0x010D, 0x0070, 0x0072, 0x006F, 0x0073,
119 0x0074, 0x011B, 0x006E, 0x0065, 0x006D, 0x006C, 0x0075, 0x0076,
120 0x00ED, 0x010D, 0x0065, 0x0073, 0x006B, 0x0079},
121 IDNA_ACE_PREFIX
"Proprostnemluvesky-uyb24dma41a", 0, 0, IDNA_SUCCESS
,
126 0x05DC, 0x05DE, 0x05D4, 0x05D4, 0x05DD, 0x05E4, 0x05E9, 0x05D5,
127 0x05D8, 0x05DC, 0x05D0, 0x05DE, 0x05D3, 0x05D1, 0x05E8, 0x05D9,
128 0x05DD, 0x05E2, 0x05D1, 0x05E8, 0x05D9, 0x05EA},
129 IDNA_ACE_PREFIX
"4dbcagdahymbxekheh6e0a7fei0b", 0, 0, IDNA_SUCCESS
,
132 "Hindi (Devanagari)", 30,
134 0x092F, 0x0939, 0x0932, 0x094B, 0x0917, 0x0939, 0x093F, 0x0928,
135 0x094D, 0x0926, 0x0940, 0x0915, 0x094D, 0x092F, 0x094B, 0x0902,
136 0x0928, 0x0939, 0x0940, 0x0902, 0x092C, 0x094B, 0x0932, 0x0938,
137 0x0915, 0x0924, 0x0947, 0x0939, 0x0948, 0x0902},
138 IDNA_ACE_PREFIX
"i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd", 0, 0,
141 "Japanese (kanji and hiragana)", 18,
143 0x306A, 0x305C, 0x307F, 0x3093, 0x306A, 0x65E5, 0x672C, 0x8A9E,
144 0x3092, 0x8A71, 0x3057, 0x3066, 0x304F, 0x308C, 0x306A, 0x3044,
146 IDNA_ACE_PREFIX
"n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa", 0, 0,
150 "Korean (Hangul syllables)", 24,
152 0xC138, 0xACC4, 0xC758, 0xBAA8, 0xB4E0, 0xC0AC, 0xB78C, 0xB4E4,
153 0xC774, 0xD55C, 0xAD6D, 0xC5B4, 0xB97C, 0xC774, 0xD574, 0xD55C,
154 0xB2E4, 0xBA74, 0xC5BC, 0xB9C8, 0xB098, 0xC88B, 0xC744, 0xAE4C},
155 IDNA_ACE_PREFIX
"989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt"
156 "30a5jpsd879ccm6fea98c", 0, 0, IDNA_PUNYCODE_ERROR
,
157 IDNA_PUNYCODE_ERROR
},
158 /* too long output */
161 "Russian (Cyrillic)", 28,
163 0x043F, 0x043E, 0x0447, 0x0435, 0x043C, 0x0443, 0x0436, 0x0435,
164 0x043E, 0x043D, 0x0438, 0x043D, 0x0435, 0x0433, 0x043E, 0x0432,
165 0x043E, 0x0440, 0x044F, 0x0442, 0x043F, 0x043E, 0x0440, 0x0443,
166 0x0441, 0x0441, 0x043A, 0x0438},
167 IDNA_ACE_PREFIX
"b1abfaaepdrnnbgefbadotcwatmq2g4l", 0, 0,
168 IDNA_SUCCESS
, IDNA_SUCCESS
},
172 ": Porqu[EACUTE]nopuedensimplementehablarenEspa[NTILDE]ol"
176 0x0050, 0x006F, 0x0072, 0x0071, 0x0075, 0x00E9, 0x006E, 0x006F,
177 0x0070, 0x0075, 0x0065, 0x0064, 0x0065, 0x006E, 0x0073, 0x0069,
178 0x006D, 0x0070, 0x006C, 0x0065, 0x006D, 0x0065, 0x006E, 0x0074,
179 0x0065, 0x0068, 0x0061, 0x0062, 0x006C, 0x0061, 0x0072, 0x0065,
180 0x006E, 0x0045, 0x0073, 0x0070, 0x0061, 0x00F1, 0x006F, 0x006C},
181 IDNA_ACE_PREFIX
"PorqunopuedensimplementehablarenEspaol-fmd56a", 0, 0,
186 0x0054, 0x1EA1, 0x0069, 0x0073, 0x0061, 0x006F, 0x0068, 0x1ECD,
187 0x006B, 0x0068, 0x00F4, 0x006E, 0x0067, 0x0074, 0x0068, 0x1EC3,
188 0x0063, 0x0068, 0x1EC9, 0x006E, 0x00F3, 0x0069, 0x0074, 0x0069,
189 0x1EBF, 0x006E, 0x0067, 0x0056, 0x0069, 0x1EC7, 0x0074},
190 IDNA_ACE_PREFIX
"TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g", 0, 0,
195 "3[NEN]B[GUMI][KINPACHI][SENSEI]"
199 0x0033, 0x5E74, 0x0042, 0x7D44, 0x91D1, 0x516B, 0x5148, 0x751F},
200 IDNA_ACE_PREFIX
"3B-ww4c5e180e575a65lsy2b", 0, 0, IDNA_SUCCESS
,
205 "[AMURO][NAMIE]-with-SUPER-MONKEYS"
209 0x5B89, 0x5BA4, 0x5948, 0x7F8E, 0x6075, 0x002D, 0x0077, 0x0069,
210 0x0074, 0x0068, 0x002D, 0x0053, 0x0055, 0x0050, 0x0045, 0x0052,
211 0x002D, 0x004D, 0x004F, 0x004E, 0x004B, 0x0045, 0x0059, 0x0053},
212 IDNA_ACE_PREFIX
"-with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n", 0, 0,
217 "Hello-Another-Way-[SOREZORE][NO][BASHO]"
221 0x0048, 0x0065, 0x006C, 0x006C, 0x006F, 0x002D, 0x0041, 0x006E,
222 0x006F, 0x0074, 0x0068, 0x0065, 0x0072, 0x002D, 0x0057, 0x0061,
223 0x0079, 0x002D, 0x305D, 0x308C, 0x305E, 0x308C, 0x306E, 0x5834,
225 IDNA_ACE_PREFIX
"Hello-Another-Way--fc4qua05auwb3674vfr0b", 0, 0,
230 "[HITOTSU][YANE][NO][SHITA]2"
234 0x3072, 0x3068, 0x3064, 0x5C4B, 0x6839, 0x306E, 0x4E0B, 0x0032},
235 IDNA_ACE_PREFIX
"2-u9tlzr9756bt3uc0v", 0, 0, IDNA_SUCCESS
,
240 "Maji[DE]Koi[SURU]5[BYOU][MAE]"
244 0x004D, 0x0061, 0x006A, 0x0069, 0x3067, 0x004B, 0x006F, 0x0069,
245 0x3059, 0x308B, 0x0035, 0x79D2, 0x524D},
246 IDNA_ACE_PREFIX
"MajiKoi5-783gue6qz075azm5e", 0, 0, IDNA_SUCCESS
,
255 0x30D1, 0x30D5, 0x30A3, 0x30FC, 0x0064, 0x0065, 0x30EB, 0x30F3, 0x30D0},
256 IDNA_ACE_PREFIX
"de-jg4avhby1noc0d", 0, 0, IDNA_SUCCESS
, IDNA_SUCCESS
},
260 "[SONO][SUPIIDO][DE]"
264 0x305D, 0x306E, 0x30B9, 0x30D4, 0x30FC, 0x30C9, 0x3067},
265 IDNA_ACE_PREFIX
"d9juau41awczczp", 0, 0, IDNA_SUCCESS
, IDNA_SUCCESS
},
268 {0x03b5, 0x03bb, 0x03bb, 0x03b7, 0x03bd, 0x03b9, 0x03ba, 0x03ac},
269 IDNA_ACE_PREFIX
"hxargifdar", 0, 0, IDNA_SUCCESS
, IDNA_SUCCESS
},
271 "Maltese (Malti)", 10,
272 {0x0062, 0x006f, 0x006e, 0x0121, 0x0075, 0x0073, 0x0061, 0x0127,
274 IDNA_ACE_PREFIX
"bonusaa-5bb1da", 0, 0, IDNA_SUCCESS
, IDNA_SUCCESS
},
276 "Russian (Cyrillic)", 28,
277 {0x043f, 0x043e, 0x0447, 0x0435, 0x043c, 0x0443, 0x0436, 0x0435,
278 0x043e, 0x043d, 0x0438, 0x043d, 0x0435, 0x0433, 0x043e, 0x0432,
279 0x043e, 0x0440, 0x044f, 0x0442, 0x043f, 0x043e, 0x0440, 0x0443,
280 0x0441, 0x0441, 0x043a, 0x0438},
281 IDNA_ACE_PREFIX
"b1abfaaepdrnnbgefbadotcwatmq2g4l", 0, 0,
282 IDNA_SUCCESS
, IDNA_SUCCESS
},
285 "(S) -> $1.00 <-", 11,
287 0x002D, 0x003E, 0x0020, 0x0024, 0x0031, 0x002E, 0x0030, 0x0030,
288 0x0020, 0x003C, 0x002D},
289 IDNA_ACE_PREFIX
"-> $1.00 <--", 0, 0, IDNA_SUCCESS
, IDNA_SUCCESS
},
292 { /* XXX depends on IDNA_ACE_PREFIX */
293 "ToASCII() with ACE prefix", 4 + 3,
294 { 'x', 'n', '-', '-', 'f', 'o', 0x3067},
295 IDNA_ACE_PREFIX
"too long too long too long too long too long too "
296 "long too long too long too long too long ", 0, 0,
297 IDNA_CONTAINS_ACE_PREFIX
, IDNA_PUNYCODE_ERROR
}
302 main (int argc
, char *argv
[])
305 unsigned long *ucs4label
= NULL
;
306 unsigned long tmp
[100];
311 if (strcmp (argv
[argc
- 1], "-v") == 0 ||
312 strcmp (argv
[argc
- 1], "--verbose") == 0)
314 else if (strcmp (argv
[argc
- 1], "-b") == 0 ||
315 strcmp (argv
[argc
- 1], "--break-on-error") == 0)
317 else if (strcmp (argv
[argc
- 1], "-h") == 0 ||
318 strcmp (argv
[argc
- 1], "-?") == 0 ||
319 strcmp (argv
[argc
- 1], "--help") == 0)
321 printf ("Usage: %s [-vbh?] [--verbose] [--break-on-error] [--help]\n",
327 for (i
= 0; i
< sizeof (idna
) / sizeof (idna
[0]); i
++)
330 printf("<section title=\"%s.\">\n", idna
[i
].name
);
332 printf("<figure>\n");
333 printf("<artwork>\n");
337 p
= stringprep_ucs4_to_utf8(idna
[i
].in
, idna
[i
].inlen
, NULL
, &t
);
339 escapeprint(p
, strlen(p
));
343 printf ("input (length %d):\n\t", t
);
344 for (j
= 0; j
< idna
[i
].inlen
; j
++)
346 printf ("U+%04lx ", idna
[i
].in
[j
]);
355 printf ("IDNA entry %d: %s\n", i
, idna
[i
].name
);
360 ucs4print (idna
[i
].in
, idna
[i
].inlen
);
363 rc
= idna_to_ascii (idna
[i
].in
, idna
[i
].inlen
, label
,
364 idna
[i
].allowunassigned
, idna
[i
].usestd3asciirules
);
365 if (rc
!= idna
[i
].toasciirc
)
367 fail ("IDNA entry %d failed: %d\n", i
, rc
);
374 if (rc
== IDNA_SUCCESS
)
375 printf ("\nout: %s\n", label
);
378 if (debug
&& rc
== IDNA_SUCCESS
)
380 printf ("computed out: %s\n", label
);
381 printf ("expected out: %s\n", idna
[i
].out
);
384 printf ("returned %d expected %d\n", rc
, idna
[i
].toasciirc
);
386 if (rc
== IDNA_SUCCESS
)
388 if (strlen (idna
[i
].out
) != strlen (label
) ||
389 strcasecmp (idna
[i
].out
, label
) != 0)
391 fail ("IDNA entry %d failed\n", i
);
404 ucs4label
= stringprep_utf8_to_ucs4 (idna
[i
].out
, -1, &len
);
408 printf ("in: %s (%d==%d)\n", idna
[i
].out
, strlen (idna
[i
].out
),
410 ucs4print (ucs4label
, len
);
414 rc
= idna_to_unicode (ucs4label
, len
, tmp
, &len2
,
415 idna
[i
].allowunassigned
,
416 idna
[i
].usestd3asciirules
);
419 printf ("expected out:\n");
420 if (rc
== IDNA_SUCCESS
)
421 ucs4print (idna
[i
].in
, idna
[i
].inlen
);
423 ucs4print (ucs4label
, len
);
425 printf ("computed out:\n");
426 ucs4print (tmp
, len2
);
429 if (rc
!= idna
[i
].tounicoderc
)
431 fail ("IDNA entry %d failed: %d\n", i
, rc
);
437 if ((rc
== IDNA_SUCCESS
&& (len2
!= idna
[i
].inlen
||
438 memcmp (idna
[i
].in
, tmp
, len2
) != 0)) ||
439 (rc
!= IDNA_SUCCESS
&& (len2
!= len
||
440 memcmp (ucs4label
, tmp
, len
) != 0)))
442 fail ("IDNA entry %d failed\n", i
);
450 printf("</artwork>\n");
451 printf("</figure>\n");
453 printf("</section>\n");
461 printf ("IDNA self tests done with %d errors\n", error_count
);
463 return error_count
? 1 : 0;