1 /* tst_idna.c --- Self tests for idna_to_ascii().
2 * Copyright (C) 2002, 2003, 2004 Simon Josefsson
4 * This file is part of GNU Libidn.
6 * GNU Libidn is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * GNU Libidn is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with GNU Libidn; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
31 #include <stringprep.h>
47 static const struct idna idna
[] = {
49 "Arabic (Egyptian)", 17,
51 0x0644, 0x064A, 0x0647, 0x0645, 0x0627, 0x0628, 0x062A, 0x0643,
52 0x0644, 0x0645, 0x0648, 0x0634, 0x0639, 0x0631, 0x0628, 0x064A,
54 IDNA_ACE_PREFIX
"egbpdaj6bu4bxfgehfvwxn", 0, IDNA_SUCCESS
, IDNA_SUCCESS
},
56 "Chinese (simplified)", 9,
58 0x4ED6, 0x4EEC, 0x4E3A, 0x4EC0, 0x4E48, 0x4E0D, 0x8BF4, 0x4E2D, 0x6587},
59 IDNA_ACE_PREFIX
"ihqwcrb4cv8a8dqg056pqjye", 0, IDNA_SUCCESS
, IDNA_SUCCESS
},
61 "Chinese (traditional)", 9,
63 0x4ED6, 0x5011, 0x7232, 0x4EC0, 0x9EBD, 0x4E0D, 0x8AAA, 0x4E2D, 0x6587},
64 IDNA_ACE_PREFIX
"ihqwctvzc91f659drss3x8bo0yb", 0, IDNA_SUCCESS
,
67 "Czech: Pro[CCARON]prost[ECARON]nemluv[IACUTE][CCARON]esky", 22,
69 0x0050, 0x0072, 0x006F, 0x010D, 0x0070, 0x0072, 0x006F, 0x0073,
70 0x0074, 0x011B, 0x006E, 0x0065, 0x006D, 0x006C, 0x0075, 0x0076,
71 0x00ED, 0x010D, 0x0065, 0x0073, 0x006B, 0x0079},
72 IDNA_ACE_PREFIX
"Proprostnemluvesky-uyb24dma41a", 0, IDNA_SUCCESS
,
77 0x05DC, 0x05DE, 0x05D4, 0x05D4, 0x05DD, 0x05E4, 0x05E9, 0x05D5,
78 0x05D8, 0x05DC, 0x05D0, 0x05DE, 0x05D3, 0x05D1, 0x05E8, 0x05D9,
79 0x05DD, 0x05E2, 0x05D1, 0x05E8, 0x05D9, 0x05EA},
80 IDNA_ACE_PREFIX
"4dbcagdahymbxekheh6e0a7fei0b", 0, IDNA_SUCCESS
,
83 "Hindi (Devanagari)", 30,
85 0x092F, 0x0939, 0x0932, 0x094B, 0x0917, 0x0939, 0x093F, 0x0928,
86 0x094D, 0x0926, 0x0940, 0x0915, 0x094D, 0x092F, 0x094B, 0x0902,
87 0x0928, 0x0939, 0x0940, 0x0902, 0x092C, 0x094B, 0x0932, 0x0938,
88 0x0915, 0x0924, 0x0947, 0x0939, 0x0948, 0x0902},
89 IDNA_ACE_PREFIX
"i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd", 0,
92 "Japanese (kanji and hiragana)", 18,
94 0x306A, 0x305C, 0x307F, 0x3093, 0x306A, 0x65E5, 0x672C, 0x8A9E,
95 0x3092, 0x8A71, 0x3057, 0x3066, 0x304F, 0x308C, 0x306A, 0x3044,
97 IDNA_ACE_PREFIX
"n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa", 0, IDNA_SUCCESS
},
99 "Korean (Hangul syllables)", 24,
101 0xC138, 0xACC4, 0xC758, 0xBAA8, 0xB4E0, 0xC0AC, 0xB78C, 0xB4E4,
102 0xC774, 0xD55C, 0xAD6D, 0xC5B4, 0xB97C, 0xC774, 0xD574, 0xD55C,
103 0xB2E4, 0xBA74, 0xC5BC, 0xB9C8, 0xB098, 0xC88B, 0xC744, 0xAE4C},
104 IDNA_ACE_PREFIX
"989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt"
105 "30a5jpsd879ccm6fea98c", 0, IDNA_PUNYCODE_ERROR
, IDNA_PUNYCODE_ERROR
},
106 /* too long output */
108 "Russian (Cyrillic)", 28,
110 0x043F, 0x043E, 0x0447, 0x0435, 0x043C, 0x0443, 0x0436, 0x0435,
111 0x043E, 0x043D, 0x0438, 0x043D, 0x0435, 0x0433, 0x043E, 0x0432,
112 0x043E, 0x0440, 0x044F, 0x0442, 0x043F, 0x043E, 0x0440, 0x0443,
113 0x0441, 0x0441, 0x043A, 0x0438},
114 IDNA_ACE_PREFIX
"b1abfaaepdrnnbgefbadotcwatmq2g4l", 0,
115 IDNA_SUCCESS
, IDNA_SUCCESS
},
117 "Spanish: Porqu[EACUTE]nopuedensimplementehablarenEspa[NTILDE]ol", 40,
119 0x0050, 0x006F, 0x0072, 0x0071, 0x0075, 0x00E9, 0x006E, 0x006F,
120 0x0070, 0x0075, 0x0065, 0x0064, 0x0065, 0x006E, 0x0073, 0x0069,
121 0x006D, 0x0070, 0x006C, 0x0065, 0x006D, 0x0065, 0x006E, 0x0074,
122 0x0065, 0x0068, 0x0061, 0x0062, 0x006C, 0x0061, 0x0072, 0x0065,
123 0x006E, 0x0045, 0x0073, 0x0070, 0x0061, 0x00F1, 0x006F, 0x006C},
124 IDNA_ACE_PREFIX
"PorqunopuedensimplementehablarenEspaol-fmd56a", 0,
129 0x0054, 0x1EA1, 0x0069, 0x0073, 0x0061, 0x006F, 0x0068, 0x1ECD,
130 0x006B, 0x0068, 0x00F4, 0x006E, 0x0067, 0x0074, 0x0068, 0x1EC3,
131 0x0063, 0x0068, 0x1EC9, 0x006E, 0x00F3, 0x0069, 0x0074, 0x0069,
132 0x1EBF, 0x006E, 0x0067, 0x0056, 0x0069, 0x1EC7, 0x0074},
133 IDNA_ACE_PREFIX
"TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g", 0,
136 "Japanese 3[NEN]B[GUMI][KINPACHI][SENSEI]", 8,
138 0x0033, 0x5E74, 0x0042, 0x7D44, 0x91D1, 0x516B, 0x5148, 0x751F},
139 IDNA_ACE_PREFIX
"3B-ww4c5e180e575a65lsy2b", 0, IDNA_SUCCESS
,
142 "Japanese [AMURO][NAMIE]-with-SUPER-MONKEYS", 24,
144 0x5B89, 0x5BA4, 0x5948, 0x7F8E, 0x6075, 0x002D, 0x0077, 0x0069,
145 0x0074, 0x0068, 0x002D, 0x0053, 0x0055, 0x0050, 0x0045, 0x0052,
146 0x002D, 0x004D, 0x004F, 0x004E, 0x004B, 0x0045, 0x0059, 0x0053},
147 IDNA_ACE_PREFIX
"-with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n", 0,
150 "Japanese Hello-Another-Way-[SOREZORE][NO][BASHO]", 25,
152 0x0048, 0x0065, 0x006C, 0x006C, 0x006F, 0x002D, 0x0041, 0x006E,
153 0x006F, 0x0074, 0x0068, 0x0065, 0x0072, 0x002D, 0x0057, 0x0061,
154 0x0079, 0x002D, 0x305D, 0x308C, 0x305E, 0x308C, 0x306E, 0x5834,
156 IDNA_ACE_PREFIX
"Hello-Another-Way--fc4qua05auwb3674vfr0b", 0,
159 "Japanese [HITOTSU][YANE][NO][SHITA]2", 8,
161 0x3072, 0x3068, 0x3064, 0x5C4B, 0x6839, 0x306E, 0x4E0B, 0x0032},
162 IDNA_ACE_PREFIX
"2-u9tlzr9756bt3uc0v", 0, IDNA_SUCCESS
, IDNA_SUCCESS
},
164 "Japanese Maji[DE]Koi[SURU]5[BYOU][MAE]", 13,
166 0x004D, 0x0061, 0x006A, 0x0069, 0x3067, 0x004B, 0x006F, 0x0069,
167 0x3059, 0x308B, 0x0035, 0x79D2, 0x524D},
168 IDNA_ACE_PREFIX
"MajiKoi5-783gue6qz075azm5e", 0, IDNA_SUCCESS
,
171 "Japanese [PAFII]de[RUNBA]", 9,
173 0x30D1, 0x30D5, 0x30A3, 0x30FC, 0x0064, 0x0065, 0x30EB, 0x30F3, 0x30D0},
174 IDNA_ACE_PREFIX
"de-jg4avhby1noc0d", 0, IDNA_SUCCESS
, IDNA_SUCCESS
},
176 "Japanese [SONO][SUPIIDO][DE]", 7,
178 0x305D, 0x306E, 0x30B9, 0x30D4, 0x30FC, 0x30C9, 0x3067},
179 IDNA_ACE_PREFIX
"d9juau41awczczp", 0, IDNA_SUCCESS
, IDNA_SUCCESS
},
183 0x03b5, 0x03bb, 0x03bb, 0x03b7, 0x03bd, 0x03b9, 0x03ba, 0x03ac},
184 IDNA_ACE_PREFIX
"hxargifdar", 0, IDNA_SUCCESS
, IDNA_SUCCESS
},
186 "Maltese (Malti)", 10,
188 0x0062, 0x006f, 0x006e, 0x0121, 0x0075, 0x0073, 0x0061, 0x0127,
190 IDNA_ACE_PREFIX
"bonusaa-5bb1da", 0, IDNA_SUCCESS
, IDNA_SUCCESS
},
192 "Russian (Cyrillic)", 28,
194 0x043f, 0x043e, 0x0447, 0x0435, 0x043c, 0x0443, 0x0436, 0x0435,
195 0x043e, 0x043d, 0x0438, 0x043d, 0x0435, 0x0433, 0x043e, 0x0432,
196 0x043e, 0x0440, 0x044f, 0x0442, 0x043f, 0x043e, 0x0440, 0x0443,
197 0x0441, 0x0441, 0x043a, 0x0438},
198 IDNA_ACE_PREFIX
"b1abfaaepdrnnbgefbadotcwatmq2g4l", 0,
199 IDNA_SUCCESS
, IDNA_SUCCESS
},
202 "(S) -> $1.00 <-", 11,
204 0x002D, 0x003E, 0x0020, 0x0024, 0x0031, 0x002E, 0x0030, 0x0030,
205 0x0020, 0x003C, 0x002D},
206 IDNA_ACE_PREFIX
"-> $1.00 <--", 0, IDNA_SUCCESS
, IDNA_SUCCESS
},
208 { /* XXX depends on IDNA_ACE_PREFIX */
209 "ToASCII() with ACE prefix", 4 + 3,
211 'x', 'n', '-', '-', 'f', 'o', 0x3067},
212 IDNA_ACE_PREFIX
"too long too long too long too long too long too "
213 "long too long too long too long too long ", 0,
214 IDNA_CONTAINS_ACE_PREFIX
, IDNA_PUNYCODE_ERROR
}
221 uint32_t *ucs4label
= NULL
;
226 for (i
= 0; i
< sizeof (idna
) / sizeof (idna
[0]); i
++)
229 printf ("IDNA entry %d: %s\n", i
, idna
[i
].name
);
234 ucs4print (idna
[i
].in
, idna
[i
].inlen
);
237 rc
= idna_to_ascii_4i (idna
[i
].in
, idna
[i
].inlen
, label
, idna
[i
].flags
);
238 if (rc
!= idna
[i
].toasciirc
)
240 fail ("IDNA entry %d failed: %d\n", i
, rc
);
246 if (debug
&& rc
== IDNA_SUCCESS
)
248 printf ("computed out: %s\n", label
);
249 printf ("expected out: %s\n", idna
[i
].out
);
252 printf ("returned %d expected %d\n", rc
, idna
[i
].toasciirc
);
254 if (rc
== IDNA_SUCCESS
)
256 if (strlen (idna
[i
].out
) != strlen (label
) ||
257 strcasecmp (idna
[i
].out
, label
) != 0)
259 fail ("IDNA entry %d failed\n", i
);
272 ucs4label
= stringprep_utf8_to_ucs4 (idna
[i
].out
, -1, &len
);
276 printf ("in: %s (%d==%d)\n", idna
[i
].out
, strlen (idna
[i
].out
),
278 ucs4print (ucs4label
, len
);
281 len2
= sizeof (tmp
) / sizeof (tmp
[0]);
282 rc
= idna_to_unicode_44i (ucs4label
, len
, tmp
, &len2
, idna
[i
].flags
);
285 printf ("expected out (%d):\n",
286 rc
== IDNA_SUCCESS
? idna
[i
].inlen
: len
);
287 if (rc
== IDNA_SUCCESS
)
288 ucs4print (idna
[i
].in
, idna
[i
].inlen
);
290 ucs4print (ucs4label
, len
);
292 printf ("computed out (%d):\n", len2
);
293 ucs4print (tmp
, len2
);
296 if (rc
!= idna
[i
].tounicoderc
)
298 fail ("IDNA entry %d failed: %d\n", i
, rc
);
304 if ((rc
== IDNA_SUCCESS
&& (len2
!= idna
[i
].inlen
||
305 memcmp (idna
[i
].in
, tmp
, len2
) != 0)) ||
306 (rc
!= IDNA_SUCCESS
&& (len2
!= len
||
307 memcmp (ucs4label
, tmp
, len
) != 0)))
311 if (rc
== IDNA_SUCCESS
)
312 printf ("len=%d len2=%d\n", len2
, idna
[i
].inlen
);
314 printf ("len=%d len2=%d\n", len
, len2
);
316 fail ("IDNA entry %d failed\n", i
);