1 /* tst_idna.c Self tests for idna_to_ascii().
2 * Copyright (C) 2002, 2003 Simon Josefsson
4 * This file is part of GNU Libidn.
6 * GNU Libidn is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * GNU Libidn is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with GNU Libidn; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 static int error_count
= 0;
26 static int break_on_error
= 0;
29 fail (const char *format
, ...)
33 va_start (arg_ptr
, format
);
34 vfprintf (stderr
, format
, arg_ptr
);
42 escapeprint (char *str
, int len
)
46 printf ("(length %d bytes):\n", len
);
48 for (i
= 0; i
< len
; i
++)
50 if (((str
[i
] & 0xFF) >= 'A' && (str
[i
] & 0xFF) <= 'Z') ||
51 ((str
[i
] & 0xFF) >= 'a' && (str
[i
] & 0xFF) <= 'z') ||
52 ((str
[i
] & 0xFF) >= '0' && (str
[i
] & 0xFF) <= '9')
53 || (str
[i
] & 0xFF) == ' ' || (str
[i
] & 0xFF) == '.')
54 printf ("%c", (str
[i
] & 0xFF));
56 printf ("\\x%02X", (str
[i
] & 0xFF));
57 if ((i
+ 1) % 16 == 0 && (i
+ 1) < len
)
63 ucs4print (unsigned long *str
, ssize_t len
)
68 for (i
= 0; len
>= 0 ? i
< len
: str
[i
]; i
++)
70 printf ("U+%04lu ", str
[i
] & 0xFFFF);
73 if ((i
+ 1) % 8 == 0 && i
+ 1 < len
)
83 unsigned long in
[100];
86 int usestd3asciirules
;
92 "Arabic (Egyptian)", 17,
94 0x0644, 0x064A, 0x0647, 0x0645, 0x0627, 0x0628, 0x062A, 0x0643,
95 0x0644, 0x0645, 0x0648, 0x0634, 0x0639, 0x0631, 0x0628, 0x064A,
97 IDNA_ACE_PREFIX
"egbpdaj6bu4bxfgehfvwxn", 0, 0, IDNA_SUCCESS
,
100 "Chinese (simplified)", 9,
102 0x4ED6, 0x4EEC, 0x4E3A, 0x4EC0, 0x4E48, 0x4E0D, 0x8BF4, 0x4E2D, 0x6587},
103 IDNA_ACE_PREFIX
"ihqwcrb4cv8a8dqg056pqjye", 0, 0, IDNA_SUCCESS
,
106 "Chinese (traditional)", 9,
108 0x4ED6, 0x5011, 0x7232, 0x4EC0, 0x9EBD, 0x4E0D, 0x8AAA, 0x4E2D, 0x6587},
109 IDNA_ACE_PREFIX
"ihqwctvzc91f659drss3x8bo0yb", 0, 0, IDNA_SUCCESS
,
112 "Czech: Pro[CCARON]prost[ECARON]nemluv[IACUTE][CCARON]esky", 22,
114 0x0050, 0x0072, 0x006F, 0x010D, 0x0070, 0x0072, 0x006F, 0x0073,
115 0x0074, 0x011B, 0x006E, 0x0065, 0x006D, 0x006C, 0x0075, 0x0076,
116 0x00ED, 0x010D, 0x0065, 0x0073, 0x006B, 0x0079},
117 IDNA_ACE_PREFIX
"Proprostnemluvesky-uyb24dma41a", 0, 0, IDNA_SUCCESS
,
122 0x05DC, 0x05DE, 0x05D4, 0x05D4, 0x05DD, 0x05E4, 0x05E9, 0x05D5,
123 0x05D8, 0x05DC, 0x05D0, 0x05DE, 0x05D3, 0x05D1, 0x05E8, 0x05D9,
124 0x05DD, 0x05E2, 0x05D1, 0x05E8, 0x05D9, 0x05EA},
125 IDNA_ACE_PREFIX
"4dbcagdahymbxekheh6e0a7fei0b", 0, 0, IDNA_SUCCESS
,
128 "Hindi (Devanagari)", 30,
130 0x092F, 0x0939, 0x0932, 0x094B, 0x0917, 0x0939, 0x093F, 0x0928,
131 0x094D, 0x0926, 0x0940, 0x0915, 0x094D, 0x092F, 0x094B, 0x0902,
132 0x0928, 0x0939, 0x0940, 0x0902, 0x092C, 0x094B, 0x0932, 0x0938,
133 0x0915, 0x0924, 0x0947, 0x0939, 0x0948, 0x0902},
134 IDNA_ACE_PREFIX
"i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd", 0, 0,
137 "Japanese (kanji and hiragana)", 18,
139 0x306A, 0x305C, 0x307F, 0x3093, 0x306A, 0x65E5, 0x672C, 0x8A9E,
140 0x3092, 0x8A71, 0x3057, 0x3066, 0x304F, 0x308C, 0x306A, 0x3044,
142 IDNA_ACE_PREFIX
"n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa", 0, 0,
145 "Korean (Hangul syllables)", 24,
147 0xC138, 0xACC4, 0xC758, 0xBAA8, 0xB4E0, 0xC0AC, 0xB78C, 0xB4E4,
148 0xC774, 0xD55C, 0xAD6D, 0xC5B4, 0xB97C, 0xC774, 0xD574, 0xD55C,
149 0xB2E4, 0xBA74, 0xC5BC, 0xB9C8, 0xB098, 0xC88B, 0xC744, 0xAE4C},
150 IDNA_ACE_PREFIX
"989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt"
151 "30a5jpsd879ccm6fea98c", 0, 0, IDNA_PUNYCODE_ERROR
,
152 IDNA_PUNYCODE_ERROR
},
153 /* too long output */
155 "Russian (Cyrillic)", 28,
157 0x043F, 0x043E, 0x0447, 0x0435, 0x043C, 0x0443, 0x0436, 0x0435,
158 0x043E, 0x043D, 0x0438, 0x043D, 0x0435, 0x0433, 0x043E, 0x0432,
159 0x043E, 0x0440, 0x044F, 0x0442, 0x043F, 0x043E, 0x0440, 0x0443,
160 0x0441, 0x0441, 0x043A, 0x0438},
161 IDNA_ACE_PREFIX
"b1abfaaepdrnnbgefbadotcwatmq2g4l", 0, 0,
162 IDNA_SUCCESS
, IDNA_SUCCESS
},
164 "Spanish: Porqu[EACUTE]nopuedensimplementehablarenEspa[NTILDE]ol", 40,
166 0x0050, 0x006F, 0x0072, 0x0071, 0x0075, 0x00E9, 0x006E, 0x006F,
167 0x0070, 0x0075, 0x0065, 0x0064, 0x0065, 0x006E, 0x0073, 0x0069,
168 0x006D, 0x0070, 0x006C, 0x0065, 0x006D, 0x0065, 0x006E, 0x0074,
169 0x0065, 0x0068, 0x0061, 0x0062, 0x006C, 0x0061, 0x0072, 0x0065,
170 0x006E, 0x0045, 0x0073, 0x0070, 0x0061, 0x00F1, 0x006F, 0x006C},
171 IDNA_ACE_PREFIX
"PorqunopuedensimplementehablarenEspaol-fmd56a", 0, 0,
176 0x0054, 0x1EA1, 0x0069, 0x0073, 0x0061, 0x006F, 0x0068, 0x1ECD,
177 0x006B, 0x0068, 0x00F4, 0x006E, 0x0067, 0x0074, 0x0068, 0x1EC3,
178 0x0063, 0x0068, 0x1EC9, 0x006E, 0x00F3, 0x0069, 0x0074, 0x0069,
179 0x1EBF, 0x006E, 0x0067, 0x0056, 0x0069, 0x1EC7, 0x0074},
180 IDNA_ACE_PREFIX
"TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g", 0, 0,
183 "Japanese 3[NEN]B[GUMI][KINPACHI][SENSEI]", 8,
185 0x0033, 0x5E74, 0x0042, 0x7D44, 0x91D1, 0x516B, 0x5148, 0x751F},
186 IDNA_ACE_PREFIX
"3B-ww4c5e180e575a65lsy2b", 0, 0, IDNA_SUCCESS
,
189 "Japanese [AMURO][NAMIE]-with-SUPER-MONKEYS", 24,
191 0x5B89, 0x5BA4, 0x5948, 0x7F8E, 0x6075, 0x002D, 0x0077, 0x0069,
192 0x0074, 0x0068, 0x002D, 0x0053, 0x0055, 0x0050, 0x0045, 0x0052,
193 0x002D, 0x004D, 0x004F, 0x004E, 0x004B, 0x0045, 0x0059, 0x0053},
194 IDNA_ACE_PREFIX
"-with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n", 0, 0,
197 "Japanese Hello-Another-Way-[SOREZORE][NO][BASHO]", 25,
199 0x0048, 0x0065, 0x006C, 0x006C, 0x006F, 0x002D, 0x0041, 0x006E,
200 0x006F, 0x0074, 0x0068, 0x0065, 0x0072, 0x002D, 0x0057, 0x0061,
201 0x0079, 0x002D, 0x305D, 0x308C, 0x305E, 0x308C, 0x306E, 0x5834,
203 IDNA_ACE_PREFIX
"Hello-Another-Way--fc4qua05auwb3674vfr0b", 0, 0,
206 "Japanese [HITOTSU][YANE][NO][SHITA]2", 8,
208 0x3072, 0x3068, 0x3064, 0x5C4B, 0x6839, 0x306E, 0x4E0B, 0x0032},
209 IDNA_ACE_PREFIX
"2-u9tlzr9756bt3uc0v", 0, 0, IDNA_SUCCESS
,
212 "Japanese Maji[DE]Koi[SURU]5[BYOU][MAE]", 13,
214 0x004D, 0x0061, 0x006A, 0x0069, 0x3067, 0x004B, 0x006F, 0x0069,
215 0x3059, 0x308B, 0x0035, 0x79D2, 0x524D},
216 IDNA_ACE_PREFIX
"MajiKoi5-783gue6qz075azm5e", 0, 0, IDNA_SUCCESS
,
219 "Japanese [PAFII]de[RUNBA]", 9,
221 0x30D1, 0x30D5, 0x30A3, 0x30FC, 0x0064, 0x0065, 0x30EB, 0x30F3, 0x30D0},
222 IDNA_ACE_PREFIX
"de-jg4avhby1noc0d", 0, 0, IDNA_SUCCESS
, IDNA_SUCCESS
},
224 "Japanese [SONO][SUPIIDO][DE]", 7,
226 0x305D, 0x306E, 0x30B9, 0x30D4, 0x30FC, 0x30C9, 0x3067},
227 IDNA_ACE_PREFIX
"d9juau41awczczp", 0, 0, IDNA_SUCCESS
, IDNA_SUCCESS
},
231 0x03b5, 0x03bb, 0x03bb, 0x03b7, 0x03bd, 0x03b9, 0x03ba, 0x03ac},
232 IDNA_ACE_PREFIX
"hxargifdar", 0, 0, IDNA_SUCCESS
, IDNA_SUCCESS
},
234 "Maltese (Malti)", 10,
236 0x0062, 0x006f, 0x006e, 0x0121, 0x0075, 0x0073, 0x0061, 0x0127,
238 IDNA_ACE_PREFIX
"bonusaa-5bb1da", 0, 0, IDNA_SUCCESS
, IDNA_SUCCESS
},
240 "Russian (Cyrillic)", 28,
242 0x043f, 0x043e, 0x0447, 0x0435, 0x043c, 0x0443, 0x0436, 0x0435,
243 0x043e, 0x043d, 0x0438, 0x043d, 0x0435, 0x0433, 0x043e, 0x0432,
244 0x043e, 0x0440, 0x044f, 0x0442, 0x043f, 0x043e, 0x0440, 0x0443,
245 0x0441, 0x0441, 0x043a, 0x0438},
246 IDNA_ACE_PREFIX
"b1abfaaepdrnnbgefbadotcwatmq2g4l", 0, 0,
247 IDNA_SUCCESS
, IDNA_SUCCESS
},
250 "(S) -> $1.00 <-", 11,
252 0x002D, 0x003E, 0x0020, 0x0024, 0x0031, 0x002E, 0x0030, 0x0030,
253 0x0020, 0x003C, 0x002D},
254 IDNA_ACE_PREFIX
"-> $1.00 <--", 0, 0, IDNA_SUCCESS
, IDNA_SUCCESS
},
256 { /* XXX depends on IDNA_ACE_PREFIX */
257 "ToASCII() with ACE prefix", 4 + 3,
259 'x', 'n', '-', '-', 'f', 'o', 0x3067},
260 IDNA_ACE_PREFIX
"too long too long too long too long too long too "
261 "long too long too long too long too long ", 0, 0,
262 IDNA_CONTAINS_ACE_PREFIX
, IDNA_PUNYCODE_ERROR
}
266 main (int argc
, char *argv
[])
269 unsigned long *ucs4label
= NULL
;
270 unsigned long tmp
[100];
275 if (strcmp (argv
[argc
- 1], "-v") == 0 ||
276 strcmp (argv
[argc
- 1], "--verbose") == 0)
278 else if (strcmp (argv
[argc
- 1], "-b") == 0 ||
279 strcmp (argv
[argc
- 1], "--break-on-error") == 0)
281 else if (strcmp (argv
[argc
- 1], "-h") == 0 ||
282 strcmp (argv
[argc
- 1], "-?") == 0 ||
283 strcmp (argv
[argc
- 1], "--help") == 0)
285 printf ("Usage: %s [-vbh?] [--verbose] [--break-on-error] [--help]\n",
291 for (i
= 0; i
< sizeof (idna
) / sizeof (idna
[0]); i
++)
294 printf ("IDNA entry %d: %s\n", i
, idna
[i
].name
);
299 ucs4print (idna
[i
].in
, idna
[i
].inlen
);
302 rc
= idna_to_ascii (idna
[i
].in
, idna
[i
].inlen
, label
,
303 idna
[i
].allowunassigned
, idna
[i
].usestd3asciirules
);
304 if (rc
!= idna
[i
].toasciirc
)
306 fail ("IDNA entry %d failed: %d\n", i
, rc
);
312 if (debug
&& rc
== IDNA_SUCCESS
)
314 printf ("computed out: %s\n", label
);
315 printf ("expected out: %s\n", idna
[i
].out
);
318 printf ("returned %d expected %d\n", rc
, idna
[i
].toasciirc
);
320 if (rc
== IDNA_SUCCESS
)
322 if (strlen (idna
[i
].out
) != strlen (label
) ||
323 strcasecmp (idna
[i
].out
, label
) != 0)
325 fail ("IDNA entry %d failed\n", i
);
338 ucs4label
= stringprep_utf8_to_ucs4 (idna
[i
].out
, -1, &len
);
342 printf ("in: %s (%d==%d)\n", idna
[i
].out
, strlen (idna
[i
].out
),
344 ucs4print (ucs4label
, len
);
348 rc
= idna_to_unicode (ucs4label
, len
, tmp
, &len2
,
349 idna
[i
].allowunassigned
,
350 idna
[i
].usestd3asciirules
);
353 printf ("expected out:\n");
354 if (rc
== IDNA_SUCCESS
)
355 ucs4print (idna
[i
].in
, idna
[i
].inlen
);
357 ucs4print (ucs4label
, len
);
359 printf ("computed out:\n");
360 ucs4print (tmp
, len2
);
363 if (rc
!= idna
[i
].tounicoderc
)
365 fail ("IDNA entry %d failed: %d\n", i
, rc
);
371 if ((rc
== IDNA_SUCCESS
&& (len2
!= idna
[i
].inlen
||
372 memcmp (idna
[i
].in
, tmp
, len2
) != 0)) ||
373 (rc
!= IDNA_SUCCESS
&& (len2
!= len
||
374 memcmp (ucs4label
, tmp
, len
) != 0)))
376 fail ("IDNA entry %d failed\n", i
);
388 printf ("IDNA self tests done with %d errors\n", error_count
);
390 return error_count
? 1 : 0;