1 /* tst_idna.c Self tests for idna_to_ascii().
2 * Copyright (C) 2002, 2003 Simon Josefsson
4 * This file is part of GNU Libidn.
6 * GNU Libidn is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * GNU Libidn is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with GNU Libidn; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 static int error_count
= 0;
26 static int break_on_error
= 0;
29 fail (const char *format
, ...)
33 va_start (arg_ptr
, format
);
34 vfprintf (stderr
, format
, arg_ptr
);
42 escapeprint (char *str
, int len
)
46 printf ("(length %d bytes):\n", len
);
48 for (i
= 0; i
< len
; i
++)
50 if (((str
[i
] & 0xFF) >= 'A' && (str
[i
] & 0xFF) <= 'Z') ||
51 ((str
[i
] & 0xFF) >= 'a' && (str
[i
] & 0xFF) <= 'z') ||
52 ((str
[i
] & 0xFF) >= '0' && (str
[i
] & 0xFF) <= '9')
53 || (str
[i
] & 0xFF) == ' ' || (str
[i
] & 0xFF) == '.')
54 printf ("%c", (str
[i
] & 0xFF));
56 printf ("\\x%02X", (str
[i
] & 0xFF));
57 if ((i
+1)%16 == 0 && (i
+1) < len
)
63 ucs4print (unsigned long *str
, ssize_t len
)
68 for (i
= 0; len
>= 0 ? i
< len
: str
[i
]; i
++)
70 printf ("U+%04lu ", str
[i
] & 0xFFFF);
73 if ((i
+ 1) % 8 == 0 && i
+ 1 < len
)
83 unsigned long in
[100];
86 int usestd3asciirules
;
92 "Arabic (Egyptian)", 17,
94 0x0644, 0x064A, 0x0647, 0x0645, 0x0627, 0x0628, 0x062A, 0x0643,
95 0x0644, 0x0645, 0x0648, 0x0634, 0x0639, 0x0631, 0x0628, 0x064A,
97 IDNA_ACE_PREFIX
"egbpdaj6bu4bxfgehfvwxn", 0, 0, IDNA_SUCCESS
,
100 "Chinese (simplified)", 9,
102 0x4ED6, 0x4EEC, 0x4E3A, 0x4EC0, 0x4E48, 0x4E0D, 0x8BF4, 0x4E2D, 0x6587},
103 IDNA_ACE_PREFIX
"ihqwcrb4cv8a8dqg056pqjye", 0, 0, IDNA_SUCCESS
,
106 "Chinese (traditional)", 9,
108 0x4ED6, 0x5011, 0x7232, 0x4EC0, 0x9EBD, 0x4E0D, 0x8AAA, 0x4E2D, 0x6587},
109 IDNA_ACE_PREFIX
"ihqwctvzc91f659drss3x8bo0yb", 0, 0, IDNA_SUCCESS
,
112 "Czech: Pro[CCARON]prost[ECARON]nemluv[IACUTE][CCARON]esky"
115 0x0050, 0x0072, 0x006F, 0x010D, 0x0070, 0x0072, 0x006F, 0x0073,
116 0x0074, 0x011B, 0x006E, 0x0065, 0x006D, 0x006C, 0x0075, 0x0076,
117 0x00ED, 0x010D, 0x0065, 0x0073, 0x006B, 0x0079},
118 IDNA_ACE_PREFIX
"Proprostnemluvesky-uyb24dma41a", 0, 0, IDNA_SUCCESS
,
123 0x05DC, 0x05DE, 0x05D4, 0x05D4, 0x05DD, 0x05E4, 0x05E9, 0x05D5,
124 0x05D8, 0x05DC, 0x05D0, 0x05DE, 0x05D3, 0x05D1, 0x05E8, 0x05D9,
125 0x05DD, 0x05E2, 0x05D1, 0x05E8, 0x05D9, 0x05EA},
126 IDNA_ACE_PREFIX
"4dbcagdahymbxekheh6e0a7fei0b", 0, 0, IDNA_SUCCESS
,
129 "Hindi (Devanagari)", 30,
131 0x092F, 0x0939, 0x0932, 0x094B, 0x0917, 0x0939, 0x093F, 0x0928,
132 0x094D, 0x0926, 0x0940, 0x0915, 0x094D, 0x092F, 0x094B, 0x0902,
133 0x0928, 0x0939, 0x0940, 0x0902, 0x092C, 0x094B, 0x0932, 0x0938,
134 0x0915, 0x0924, 0x0947, 0x0939, 0x0948, 0x0902},
135 IDNA_ACE_PREFIX
"i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd", 0, 0,
138 "Japanese (kanji and hiragana)", 18,
140 0x306A, 0x305C, 0x307F, 0x3093, 0x306A, 0x65E5, 0x672C, 0x8A9E,
141 0x3092, 0x8A71, 0x3057, 0x3066, 0x304F, 0x308C, 0x306A, 0x3044,
143 IDNA_ACE_PREFIX
"n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa", 0, 0,
146 "Korean (Hangul syllables)", 24,
148 0xC138, 0xACC4, 0xC758, 0xBAA8, 0xB4E0, 0xC0AC, 0xB78C, 0xB4E4,
149 0xC774, 0xD55C, 0xAD6D, 0xC5B4, 0xB97C, 0xC774, 0xD574, 0xD55C,
150 0xB2E4, 0xBA74, 0xC5BC, 0xB9C8, 0xB098, 0xC88B, 0xC744, 0xAE4C},
151 IDNA_ACE_PREFIX
"989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt"
152 "30a5jpsd879ccm6fea98c", 0, 0, IDNA_PUNYCODE_ERROR
,
153 IDNA_PUNYCODE_ERROR
},
154 /* too long output */
156 "Russian (Cyrillic)", 28,
158 0x043F, 0x043E, 0x0447, 0x0435, 0x043C, 0x0443, 0x0436, 0x0435,
159 0x043E, 0x043D, 0x0438, 0x043D, 0x0435, 0x0433, 0x043E, 0x0432,
160 0x043E, 0x0440, 0x044F, 0x0442, 0x043F, 0x043E, 0x0440, 0x0443,
161 0x0441, 0x0441, 0x043A, 0x0438},
162 IDNA_ACE_PREFIX
"b1abfaaepdrnnbgefbadotcwatmq2g4l", 0, 0,
163 IDNA_SUCCESS
, IDNA_SUCCESS
},
165 "Spanish: Porqu[EACUTE]nopuedensimplementehablarenEspa[NTILDE]ol"
168 0x0050, 0x006F, 0x0072, 0x0071, 0x0075, 0x00E9, 0x006E, 0x006F,
169 0x0070, 0x0075, 0x0065, 0x0064, 0x0065, 0x006E, 0x0073, 0x0069,
170 0x006D, 0x0070, 0x006C, 0x0065, 0x006D, 0x0065, 0x006E, 0x0074,
171 0x0065, 0x0068, 0x0061, 0x0062, 0x006C, 0x0061, 0x0072, 0x0065,
172 0x006E, 0x0045, 0x0073, 0x0070, 0x0061, 0x00F1, 0x006F, 0x006C},
173 IDNA_ACE_PREFIX
"PorqunopuedensimplementehablarenEspaol-fmd56a", 0, 0,
178 0x0054, 0x1EA1, 0x0069, 0x0073, 0x0061, 0x006F, 0x0068, 0x1ECD,
179 0x006B, 0x0068, 0x00F4, 0x006E, 0x0067, 0x0074, 0x0068, 0x1EC3,
180 0x0063, 0x0068, 0x1EC9, 0x006E, 0x00F3, 0x0069, 0x0074, 0x0069,
181 0x1EBF, 0x006E, 0x0067, 0x0056, 0x0069, 0x1EC7, 0x0074},
182 IDNA_ACE_PREFIX
"TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g", 0, 0,
185 "Japanese 3[NEN]B[GUMI][KINPACHI][SENSEI]"
188 0x0033, 0x5E74, 0x0042, 0x7D44, 0x91D1, 0x516B, 0x5148, 0x751F},
189 IDNA_ACE_PREFIX
"3B-ww4c5e180e575a65lsy2b", 0, 0, IDNA_SUCCESS
,
192 "Japanese [AMURO][NAMIE]-with-SUPER-MONKEYS"
195 0x5B89, 0x5BA4, 0x5948, 0x7F8E, 0x6075, 0x002D, 0x0077, 0x0069,
196 0x0074, 0x0068, 0x002D, 0x0053, 0x0055, 0x0050, 0x0045, 0x0052,
197 0x002D, 0x004D, 0x004F, 0x004E, 0x004B, 0x0045, 0x0059, 0x0053},
198 IDNA_ACE_PREFIX
"-with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n", 0, 0,
201 "Japanese Hello-Another-Way-[SOREZORE][NO][BASHO]"
204 0x0048, 0x0065, 0x006C, 0x006C, 0x006F, 0x002D, 0x0041, 0x006E,
205 0x006F, 0x0074, 0x0068, 0x0065, 0x0072, 0x002D, 0x0057, 0x0061,
206 0x0079, 0x002D, 0x305D, 0x308C, 0x305E, 0x308C, 0x306E, 0x5834,
208 IDNA_ACE_PREFIX
"Hello-Another-Way--fc4qua05auwb3674vfr0b", 0, 0,
211 "Japanese [HITOTSU][YANE][NO][SHITA]2"
214 0x3072, 0x3068, 0x3064, 0x5C4B, 0x6839, 0x306E, 0x4E0B, 0x0032},
215 IDNA_ACE_PREFIX
"2-u9tlzr9756bt3uc0v", 0, 0, IDNA_SUCCESS
,
218 "Japanese Maji[DE]Koi[SURU]5[BYOU][MAE]"
221 0x004D, 0x0061, 0x006A, 0x0069, 0x3067, 0x004B, 0x006F, 0x0069,
222 0x3059, 0x308B, 0x0035, 0x79D2, 0x524D},
223 IDNA_ACE_PREFIX
"MajiKoi5-783gue6qz075azm5e", 0, 0, IDNA_SUCCESS
,
226 "Japanese [PAFII]de[RUNBA]"
229 0x30D1, 0x30D5, 0x30A3, 0x30FC, 0x0064, 0x0065, 0x30EB, 0x30F3, 0x30D0},
230 IDNA_ACE_PREFIX
"de-jg4avhby1noc0d", 0, 0, IDNA_SUCCESS
, IDNA_SUCCESS
},
232 "Japanese [SONO][SUPIIDO][DE]"
235 0x305D, 0x306E, 0x30B9, 0x30D4, 0x30FC, 0x30C9, 0x3067},
236 IDNA_ACE_PREFIX
"d9juau41awczczp", 0, 0, IDNA_SUCCESS
, IDNA_SUCCESS
},
239 {0x03b5, 0x03bb, 0x03bb, 0x03b7, 0x03bd, 0x03b9, 0x03ba, 0x03ac},
240 IDNA_ACE_PREFIX
"hxargifdar", 0, 0, IDNA_SUCCESS
, IDNA_SUCCESS
},
242 "Maltese (Malti)", 10,
243 {0x0062, 0x006f, 0x006e, 0x0121, 0x0075, 0x0073, 0x0061, 0x0127,
245 IDNA_ACE_PREFIX
"bonusaa-5bb1da", 0, 0, IDNA_SUCCESS
, IDNA_SUCCESS
},
247 "Russian (Cyrillic)", 28,
248 {0x043f, 0x043e, 0x0447, 0x0435, 0x043c, 0x0443, 0x0436, 0x0435,
249 0x043e, 0x043d, 0x0438, 0x043d, 0x0435, 0x0433, 0x043e, 0x0432,
250 0x043e, 0x0440, 0x044f, 0x0442, 0x043f, 0x043e, 0x0440, 0x0443,
251 0x0441, 0x0441, 0x043a, 0x0438},
252 IDNA_ACE_PREFIX
"b1abfaaepdrnnbgefbadotcwatmq2g4l", 0, 0,
253 IDNA_SUCCESS
, IDNA_SUCCESS
},
256 "(S) -> $1.00 <-", 11,
258 0x002D, 0x003E, 0x0020, 0x0024, 0x0031, 0x002E, 0x0030, 0x0030,
259 0x0020, 0x003C, 0x002D},
260 IDNA_ACE_PREFIX
"-> $1.00 <--", 0, 0, IDNA_SUCCESS
, IDNA_SUCCESS
},
262 { /* XXX depends on IDNA_ACE_PREFIX */
263 "ToASCII() with ACE prefix", 4 + 3,
264 { 'x', 'n', '-', '-', 'f', 'o', 0x3067},
265 IDNA_ACE_PREFIX
"too long too long too long too long too long too "
266 "long too long too long too long too long ", 0, 0,
267 IDNA_CONTAINS_ACE_PREFIX
, IDNA_PUNYCODE_ERROR
}
271 main (int argc
, char *argv
[])
274 unsigned long *ucs4label
= NULL
;
275 unsigned long tmp
[100];
280 if (strcmp (argv
[argc
- 1], "-v") == 0 ||
281 strcmp (argv
[argc
- 1], "--verbose") == 0)
283 else if (strcmp (argv
[argc
- 1], "-b") == 0 ||
284 strcmp (argv
[argc
- 1], "--break-on-error") == 0)
286 else if (strcmp (argv
[argc
- 1], "-h") == 0 ||
287 strcmp (argv
[argc
- 1], "-?") == 0 ||
288 strcmp (argv
[argc
- 1], "--help") == 0)
290 printf ("Usage: %s [-vbh?] [--verbose] [--break-on-error] [--help]\n",
296 for (i
= 0; i
< sizeof (idna
) / sizeof (idna
[0]); i
++)
299 printf ("IDNA entry %d: %s\n", i
, idna
[i
].name
);
304 ucs4print (idna
[i
].in
, idna
[i
].inlen
);
307 rc
= idna_to_ascii (idna
[i
].in
, idna
[i
].inlen
, label
,
308 idna
[i
].allowunassigned
, idna
[i
].usestd3asciirules
);
309 if (rc
!= idna
[i
].toasciirc
)
311 fail ("IDNA entry %d failed: %d\n", i
, rc
);
317 if (debug
&& rc
== IDNA_SUCCESS
)
319 printf ("computed out: %s\n", label
);
320 printf ("expected out: %s\n", idna
[i
].out
);
323 printf ("returned %d expected %d\n", rc
, idna
[i
].toasciirc
);
325 if (rc
== IDNA_SUCCESS
)
327 if (strlen (idna
[i
].out
) != strlen (label
) ||
328 strcasecmp (idna
[i
].out
, label
) != 0)
330 fail ("IDNA entry %d failed\n", i
);
343 ucs4label
= stringprep_utf8_to_ucs4 (idna
[i
].out
, -1, &len
);
347 printf ("in: %s (%d==%d)\n", idna
[i
].out
, strlen (idna
[i
].out
),
349 ucs4print (ucs4label
, len
);
353 rc
= idna_to_unicode (ucs4label
, len
, tmp
, &len2
,
354 idna
[i
].allowunassigned
,
355 idna
[i
].usestd3asciirules
);
358 printf ("expected out:\n");
359 if (rc
== IDNA_SUCCESS
)
360 ucs4print (idna
[i
].in
, idna
[i
].inlen
);
362 ucs4print (ucs4label
, len
);
364 printf ("computed out:\n");
365 ucs4print (tmp
, len2
);
368 if (rc
!= idna
[i
].tounicoderc
)
370 fail ("IDNA entry %d failed: %d\n", i
, rc
);
376 if ((rc
== IDNA_SUCCESS
&& (len2
!= idna
[i
].inlen
||
377 memcmp (idna
[i
].in
, tmp
, len2
) != 0)) ||
378 (rc
!= IDNA_SUCCESS
&& (len2
!= len
||
379 memcmp (ucs4label
, tmp
, len
) != 0)))
381 fail ("IDNA entry %d failed\n", i
);
393 printf ("IDNA self tests done with %d errors\n", error_count
);
395 return error_count
? 1 : 0;