1 /* tst_idna.c Self tests for idna_to_ascii().
2 * Copyright (C) 2002, 2003 Simon Josefsson
4 * This file is part of GNU Libidn.
6 * GNU Libidn is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * GNU Libidn is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with GNU Libidn; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 static int error_count
= 0;
26 static int break_on_error
= 0;
29 fail (const char *format
, ...)
33 va_start (arg_ptr
, format
);
34 vfprintf (stderr
, format
, arg_ptr
);
42 escapeprint (char *str
, int len
)
46 printf ("(length %d bytes):\n", len
);
48 for (i
= 0; i
< len
; i
++)
50 if (((str
[i
] & 0xFF) >= 'A' && (str
[i
] & 0xFF) <= 'Z') ||
51 ((str
[i
] & 0xFF) >= 'a' && (str
[i
] & 0xFF) <= 'z') ||
52 ((str
[i
] & 0xFF) >= '0' && (str
[i
] & 0xFF) <= '9')
53 || (str
[i
] & 0xFF) == ' ' || (str
[i
] & 0xFF) == '.')
54 printf ("%c", (str
[i
] & 0xFF));
56 printf ("\\x%02X", (str
[i
] & 0xFF));
57 if ((i
+ 1) % 16 == 0 && (i
+ 1) < len
)
63 ucs4print (uint32_t * str
, ssize_t len
)
68 for (i
= 0; len
>= 0 ? i
< len
: str
[i
]; i
++)
70 printf ("U+%04lu ", str
[i
] & 0xFFFF);
73 if ((i
+ 1) % 8 == 0 && i
+ 1 < len
)
91 "Arabic (Egyptian)", 17,
93 0x0644, 0x064A, 0x0647, 0x0645, 0x0627, 0x0628, 0x062A, 0x0643,
94 0x0644, 0x0645, 0x0648, 0x0634, 0x0639, 0x0631, 0x0628, 0x064A,
96 IDNA_ACE_PREFIX
"egbpdaj6bu4bxfgehfvwxn", 0, IDNA_SUCCESS
,
99 "Chinese (simplified)", 9,
101 0x4ED6, 0x4EEC, 0x4E3A, 0x4EC0, 0x4E48, 0x4E0D, 0x8BF4, 0x4E2D, 0x6587},
102 IDNA_ACE_PREFIX
"ihqwcrb4cv8a8dqg056pqjye", 0, IDNA_SUCCESS
,
105 "Chinese (traditional)", 9,
107 0x4ED6, 0x5011, 0x7232, 0x4EC0, 0x9EBD, 0x4E0D, 0x8AAA, 0x4E2D, 0x6587},
108 IDNA_ACE_PREFIX
"ihqwctvzc91f659drss3x8bo0yb", 0, IDNA_SUCCESS
,
111 "Czech: Pro[CCARON]prost[ECARON]nemluv[IACUTE][CCARON]esky", 22,
113 0x0050, 0x0072, 0x006F, 0x010D, 0x0070, 0x0072, 0x006F, 0x0073,
114 0x0074, 0x011B, 0x006E, 0x0065, 0x006D, 0x006C, 0x0075, 0x0076,
115 0x00ED, 0x010D, 0x0065, 0x0073, 0x006B, 0x0079},
116 IDNA_ACE_PREFIX
"Proprostnemluvesky-uyb24dma41a", 0, IDNA_SUCCESS
,
121 0x05DC, 0x05DE, 0x05D4, 0x05D4, 0x05DD, 0x05E4, 0x05E9, 0x05D5,
122 0x05D8, 0x05DC, 0x05D0, 0x05DE, 0x05D3, 0x05D1, 0x05E8, 0x05D9,
123 0x05DD, 0x05E2, 0x05D1, 0x05E8, 0x05D9, 0x05EA},
124 IDNA_ACE_PREFIX
"4dbcagdahymbxekheh6e0a7fei0b", 0, IDNA_SUCCESS
,
127 "Hindi (Devanagari)", 30,
129 0x092F, 0x0939, 0x0932, 0x094B, 0x0917, 0x0939, 0x093F, 0x0928,
130 0x094D, 0x0926, 0x0940, 0x0915, 0x094D, 0x092F, 0x094B, 0x0902,
131 0x0928, 0x0939, 0x0940, 0x0902, 0x092C, 0x094B, 0x0932, 0x0938,
132 0x0915, 0x0924, 0x0947, 0x0939, 0x0948, 0x0902},
133 IDNA_ACE_PREFIX
"i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd", 0,
136 "Japanese (kanji and hiragana)", 18,
138 0x306A, 0x305C, 0x307F, 0x3093, 0x306A, 0x65E5, 0x672C, 0x8A9E,
139 0x3092, 0x8A71, 0x3057, 0x3066, 0x304F, 0x308C, 0x306A, 0x3044,
141 IDNA_ACE_PREFIX
"n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa", 0,
144 "Korean (Hangul syllables)", 24,
146 0xC138, 0xACC4, 0xC758, 0xBAA8, 0xB4E0, 0xC0AC, 0xB78C, 0xB4E4,
147 0xC774, 0xD55C, 0xAD6D, 0xC5B4, 0xB97C, 0xC774, 0xD574, 0xD55C,
148 0xB2E4, 0xBA74, 0xC5BC, 0xB9C8, 0xB098, 0xC88B, 0xC744, 0xAE4C},
149 IDNA_ACE_PREFIX
"989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt"
150 "30a5jpsd879ccm6fea98c", 0, IDNA_PUNYCODE_ERROR
, IDNA_PUNYCODE_ERROR
},
151 /* too long output */
153 "Russian (Cyrillic)", 28,
155 0x043F, 0x043E, 0x0447, 0x0435, 0x043C, 0x0443, 0x0436, 0x0435,
156 0x043E, 0x043D, 0x0438, 0x043D, 0x0435, 0x0433, 0x043E, 0x0432,
157 0x043E, 0x0440, 0x044F, 0x0442, 0x043F, 0x043E, 0x0440, 0x0443,
158 0x0441, 0x0441, 0x043A, 0x0438},
159 IDNA_ACE_PREFIX
"b1abfaaepdrnnbgefbadotcwatmq2g4l", 0,
160 IDNA_SUCCESS
, IDNA_SUCCESS
},
162 "Spanish: Porqu[EACUTE]nopuedensimplementehablarenEspa[NTILDE]ol", 40,
164 0x0050, 0x006F, 0x0072, 0x0071, 0x0075, 0x00E9, 0x006E, 0x006F,
165 0x0070, 0x0075, 0x0065, 0x0064, 0x0065, 0x006E, 0x0073, 0x0069,
166 0x006D, 0x0070, 0x006C, 0x0065, 0x006D, 0x0065, 0x006E, 0x0074,
167 0x0065, 0x0068, 0x0061, 0x0062, 0x006C, 0x0061, 0x0072, 0x0065,
168 0x006E, 0x0045, 0x0073, 0x0070, 0x0061, 0x00F1, 0x006F, 0x006C},
169 IDNA_ACE_PREFIX
"PorqunopuedensimplementehablarenEspaol-fmd56a", 0,
174 0x0054, 0x1EA1, 0x0069, 0x0073, 0x0061, 0x006F, 0x0068, 0x1ECD,
175 0x006B, 0x0068, 0x00F4, 0x006E, 0x0067, 0x0074, 0x0068, 0x1EC3,
176 0x0063, 0x0068, 0x1EC9, 0x006E, 0x00F3, 0x0069, 0x0074, 0x0069,
177 0x1EBF, 0x006E, 0x0067, 0x0056, 0x0069, 0x1EC7, 0x0074},
178 IDNA_ACE_PREFIX
"TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g", 0,
181 "Japanese 3[NEN]B[GUMI][KINPACHI][SENSEI]", 8,
183 0x0033, 0x5E74, 0x0042, 0x7D44, 0x91D1, 0x516B, 0x5148, 0x751F},
184 IDNA_ACE_PREFIX
"3B-ww4c5e180e575a65lsy2b", 0, IDNA_SUCCESS
,
187 "Japanese [AMURO][NAMIE]-with-SUPER-MONKEYS", 24,
189 0x5B89, 0x5BA4, 0x5948, 0x7F8E, 0x6075, 0x002D, 0x0077, 0x0069,
190 0x0074, 0x0068, 0x002D, 0x0053, 0x0055, 0x0050, 0x0045, 0x0052,
191 0x002D, 0x004D, 0x004F, 0x004E, 0x004B, 0x0045, 0x0059, 0x0053},
192 IDNA_ACE_PREFIX
"-with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n", 0,
195 "Japanese Hello-Another-Way-[SOREZORE][NO][BASHO]", 25,
197 0x0048, 0x0065, 0x006C, 0x006C, 0x006F, 0x002D, 0x0041, 0x006E,
198 0x006F, 0x0074, 0x0068, 0x0065, 0x0072, 0x002D, 0x0057, 0x0061,
199 0x0079, 0x002D, 0x305D, 0x308C, 0x305E, 0x308C, 0x306E, 0x5834,
201 IDNA_ACE_PREFIX
"Hello-Another-Way--fc4qua05auwb3674vfr0b", 0,
204 "Japanese [HITOTSU][YANE][NO][SHITA]2", 8,
206 0x3072, 0x3068, 0x3064, 0x5C4B, 0x6839, 0x306E, 0x4E0B, 0x0032},
207 IDNA_ACE_PREFIX
"2-u9tlzr9756bt3uc0v", 0, IDNA_SUCCESS
, IDNA_SUCCESS
},
209 "Japanese Maji[DE]Koi[SURU]5[BYOU][MAE]", 13,
211 0x004D, 0x0061, 0x006A, 0x0069, 0x3067, 0x004B, 0x006F, 0x0069,
212 0x3059, 0x308B, 0x0035, 0x79D2, 0x524D},
213 IDNA_ACE_PREFIX
"MajiKoi5-783gue6qz075azm5e", 0, IDNA_SUCCESS
,
216 "Japanese [PAFII]de[RUNBA]", 9,
218 0x30D1, 0x30D5, 0x30A3, 0x30FC, 0x0064, 0x0065, 0x30EB, 0x30F3, 0x30D0},
219 IDNA_ACE_PREFIX
"de-jg4avhby1noc0d", 0, IDNA_SUCCESS
, IDNA_SUCCESS
},
221 "Japanese [SONO][SUPIIDO][DE]", 7,
223 0x305D, 0x306E, 0x30B9, 0x30D4, 0x30FC, 0x30C9, 0x3067},
224 IDNA_ACE_PREFIX
"d9juau41awczczp", 0, IDNA_SUCCESS
, IDNA_SUCCESS
},
228 0x03b5, 0x03bb, 0x03bb, 0x03b7, 0x03bd, 0x03b9, 0x03ba, 0x03ac},
229 IDNA_ACE_PREFIX
"hxargifdar", 0, IDNA_SUCCESS
, IDNA_SUCCESS
},
231 "Maltese (Malti)", 10,
233 0x0062, 0x006f, 0x006e, 0x0121, 0x0075, 0x0073, 0x0061, 0x0127,
235 IDNA_ACE_PREFIX
"bonusaa-5bb1da", 0, IDNA_SUCCESS
, IDNA_SUCCESS
},
237 "Russian (Cyrillic)", 28,
239 0x043f, 0x043e, 0x0447, 0x0435, 0x043c, 0x0443, 0x0436, 0x0435,
240 0x043e, 0x043d, 0x0438, 0x043d, 0x0435, 0x0433, 0x043e, 0x0432,
241 0x043e, 0x0440, 0x044f, 0x0442, 0x043f, 0x043e, 0x0440, 0x0443,
242 0x0441, 0x0441, 0x043a, 0x0438},
243 IDNA_ACE_PREFIX
"b1abfaaepdrnnbgefbadotcwatmq2g4l", 0,
244 IDNA_SUCCESS
, IDNA_SUCCESS
},
247 "(S) -> $1.00 <-", 11,
249 0x002D, 0x003E, 0x0020, 0x0024, 0x0031, 0x002E, 0x0030, 0x0030,
250 0x0020, 0x003C, 0x002D},
251 IDNA_ACE_PREFIX
"-> $1.00 <--", 0, IDNA_SUCCESS
, IDNA_SUCCESS
},
253 { /* XXX depends on IDNA_ACE_PREFIX */
254 "ToASCII() with ACE prefix", 4 + 3,
256 'x', 'n', '-', '-', 'f', 'o', 0x3067},
257 IDNA_ACE_PREFIX
"too long too long too long too long too long too "
258 "long too long too long too long too long ", 0,
259 IDNA_CONTAINS_ACE_PREFIX
, IDNA_PUNYCODE_ERROR
}
263 main (int argc
, char *argv
[])
266 uint32_t *ucs4label
= NULL
;
272 if (strcmp (argv
[argc
- 1], "-v") == 0 ||
273 strcmp (argv
[argc
- 1], "--verbose") == 0)
275 else if (strcmp (argv
[argc
- 1], "-b") == 0 ||
276 strcmp (argv
[argc
- 1], "--break-on-error") == 0)
278 else if (strcmp (argv
[argc
- 1], "-h") == 0 ||
279 strcmp (argv
[argc
- 1], "-?") == 0 ||
280 strcmp (argv
[argc
- 1], "--help") == 0)
282 printf ("Usage: %s [-vbh?] [--verbose] [--break-on-error] [--help]\n",
288 for (i
= 0; i
< sizeof (idna
) / sizeof (idna
[0]); i
++)
291 printf ("IDNA entry %d: %s\n", i
, idna
[i
].name
);
296 ucs4print (idna
[i
].in
, idna
[i
].inlen
);
299 rc
= idna_to_ascii_4i (idna
[i
].in
, idna
[i
].inlen
, label
, idna
[i
].flags
);
300 if (rc
!= idna
[i
].toasciirc
)
302 fail ("IDNA entry %d failed: %d\n", i
, rc
);
308 if (debug
&& rc
== IDNA_SUCCESS
)
310 printf ("computed out: %s\n", label
);
311 printf ("expected out: %s\n", idna
[i
].out
);
314 printf ("returned %d expected %d\n", rc
, idna
[i
].toasciirc
);
316 if (rc
== IDNA_SUCCESS
)
318 if (strlen (idna
[i
].out
) != strlen (label
) ||
319 strcasecmp (idna
[i
].out
, label
) != 0)
321 fail ("IDNA entry %d failed\n", i
);
334 ucs4label
= stringprep_utf8_to_ucs4 (idna
[i
].out
, -1, &len
);
338 printf ("in: %s (%d==%d)\n", idna
[i
].out
, strlen (idna
[i
].out
),
340 ucs4print (ucs4label
, len
);
343 len2
= sizeof (tmp
) / sizeof (tmp
[0]);
344 rc
= idna_to_unicode_44i (ucs4label
, len
, tmp
, &len2
, idna
[i
].flags
);
347 printf ("expected out (%d):\n",
348 rc
== IDNA_SUCCESS
? idna
[i
].inlen
: len
);
349 if (rc
== IDNA_SUCCESS
)
350 ucs4print (idna
[i
].in
, idna
[i
].inlen
);
352 ucs4print (ucs4label
, len
);
354 printf ("computed out (%d):\n", len2
);
355 ucs4print (tmp
, len2
);
358 if (rc
!= idna
[i
].tounicoderc
)
360 fail ("IDNA entry %d failed: %d\n", i
, rc
);
366 if ((rc
== IDNA_SUCCESS
&& (len2
!= idna
[i
].inlen
||
367 memcmp (idna
[i
].in
, tmp
, len2
) != 0)) ||
368 (rc
!= IDNA_SUCCESS
&& (len2
!= len
||
369 memcmp (ucs4label
, tmp
, len
) != 0)))
373 if (rc
== IDNA_SUCCESS
)
374 printf ("len=%d len2=%d\n", len2
, idna
[i
].inlen
);
376 printf ("len=%d len2=%d\n", len
, len2
);
378 fail ("IDNA entry %d failed\n", i
);
390 printf ("IDNA self tests done with %d errors\n", error_count
);
392 return error_count
? 1 : 0;