Use valid HTML.
[libidn.git] / tst_stringprep.c
blob1397037689023e1a3565398b9aceff4f2e933d50
1 /* tst_stringprep.c Self tests for stringprep().
2 * Copyright (C) 2002, 2003 Simon Josefsson
4 * This file is part of GNU Libidn.
6 * GNU Libidn is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * GNU Libidn is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with GNU Libidn; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 #include "internal.h"
24 static int debug = 0;
25 static int error_count = 0;
26 static int break_on_error = 0;
28 static void
29 fail (const char *format, ...)
31 va_list arg_ptr;
33 va_start (arg_ptr, format);
34 vfprintf (stderr, format, arg_ptr);
35 va_end (arg_ptr);
36 error_count++;
37 if (break_on_error)
38 exit (1);
41 static void
42 escapeprint (char *str, int len)
44 int i;
46 printf ("`");
47 for (i = 0; i < len; i++)
49 if (((str[i] & 0xFF) >= 'A' && (str[i] & 0xFF) <= 'Z') ||
50 ((str[i] & 0xFF) >= 'a' && (str[i] & 0xFF) <= 'z') ||
51 ((str[i] & 0xFF) >= '0' && (str[i] & 0xFF) <= '9')
52 || (str[i] & 0xFF) == ' ' || (str[i] & 0xFF) == '.')
53 printf ("%c", (str[i] & 0xFF));
54 else
55 printf ("\\x%02x", (str[i] & 0xFF));
56 if ((i+1)%16 == 0 && (i+1) < len)
57 printf("'\n\t'");
59 printf ("' (length %d bytes)\n", len);
62 static void
63 hexprint (char *str, int len)
65 int i;
67 printf ("\t;; ");
68 for (i = 0; i < len; i++)
70 printf ("%02x ", (str[i] & 0xFF));
71 if ((i + 1) % 8 == 0)
72 printf (" ");
73 if ((i + 1) % 16 == 0 && i + 1 < len)
74 printf ("\n\t;; ");
78 static void
79 binprint (char *str, int len)
81 int i;
83 printf ("\t;; ");
84 for (i = 0; i < len; i++)
86 printf ("%d%d%d%d%d%d%d%d ",
87 (str[i] & 0xFF) & 0x80 ? 1 : 0,
88 (str[i] & 0xFF) & 0x40 ? 1 : 0,
89 (str[i] & 0xFF) & 0x20 ? 1 : 0,
90 (str[i] & 0xFF) & 0x10 ? 1 : 0,
91 (str[i] & 0xFF) & 0x08 ? 1 : 0,
92 (str[i] & 0xFF) & 0x04 ? 1 : 0,
93 (str[i] & 0xFF) & 0x02 ? 1 : 0, (str[i] & 0xFF) & 0x01 ? 1 : 0);
94 if ((i + 1) % 3 == 0)
95 printf (" ");
96 if ((i + 1) % 6 == 0 && i + 1 < len)
97 printf ("\n\t;; ");
101 struct stringprep
103 char *comment;
104 char *in;
105 char *out;
106 char *profile;
107 int flags;
108 int rc;
110 strprep[] =
113 "Map to nothing",
114 "foo\xC2\xAD\xCD\x8F\xE1\xA0\x86\xE1\xA0\x8B"
115 "bar""\xE2\x80\x8B\xE2\x81\xA0""baz\xEF\xB8\x80\xEF\xB8\x88"
116 "\xEF\xB8\x8F\xEF\xBB\xBF", "foobarbaz"
119 "Case folding ASCII U+0043 U+0041 U+0046 U+0045",
120 "CAFE", "cafe"
123 "Case folding 8bit U+00DF (german sharp s)",
124 "\xC3\xDF", "ss"
127 "Case folding U+0130 (turkish capital I with dot)",
128 "\xC4\xB0", "i\xcc\x87"
131 "Case folding multibyte U+0143 U+037A",
132 "\xC5\x83\xCD\xBA", "\xC5\x84 \xCE\xB9"
135 "Case folding U+2121 U+33C6 U+1D7BB",
136 "\xE2\x84\xA1\xE3\x8F\x86\xF0\x9D\x9E\xBB",
137 "telc\xE2\x88\x95""kg\xCF\x83"
140 "Normalization of U+006a U+030c U+00A0 U+00AA",
141 "\x6A\xCC\x8C\xC2\xA0\xC2\xAA", "\xC7\xB0 a"
144 "Case folding U+1FB7 and normalization",
145 "\xE1\xBE\xB7", "\xE1\xBE\xB6\xCE\xB9"
148 "Self-reverting case folding U+01F0 and normalization",
149 "\xC7\xF0", "\xC7\xB0"
152 "Self-reverting case folding U+0390 and normalization",
153 "\xCE\x90", "\xCE\x90"
156 "Self-reverting case folding U+03B0 and normalization",
157 "\xCE\xB0", "\xCE\xB0"
160 "Self-reverting case folding U+1E96 and normalization",
161 "\xE1\xBA\x96", "\xE1\xBA\x96"
164 "Self-reverting case folding U+1F56 and normalization",
165 "\xE1\xBD\x96", "\xE1\xBD\x96"
168 "ASCII space character U+0020",
169 "\x20", "\x20"
172 "Non-ASCII 8bit space character U+00A0",
173 "\xC2\xA0", "\x20"
176 "Non-ASCII multibyte space character U+1680",
177 "\xE1\x9A\x80", NULL, "Nameprep", 0,
178 STRINGPREP_CONTAINS_PROHIBITED
181 "Non-ASCII multibyte space character U+2000",
182 "\xE2\x80\x80", "\x20"
185 "Zero Width Space U+200b",
186 "\xE2\x80\x8b", ""
189 "Non-ASCII multibyte space character U+3000",
190 "\xE3\x80\x80", "\x20"
193 "ASCII control characters U+0010 U+007F",
194 "\x10\x7F", "\x10\x7F"
197 "Non-ASCII 8bit control character U+0085",
198 "\xC2\x85", NULL, "Nameprep", 0,
199 STRINGPREP_CONTAINS_PROHIBITED
202 "Non-ASCII multibyte control character U+180E",
203 "\xE1\xA0\x8E", NULL, "Nameprep", 0,
204 STRINGPREP_CONTAINS_PROHIBITED
207 "Zero Width No-Break Space U+FEFF",
208 "\xEF\xBB\xBF", ""
211 "Non-ASCII control character U+1D175",
212 "\xF0\x9D\x85\xB5", NULL, "Nameprep", 0,
213 STRINGPREP_CONTAINS_PROHIBITED
216 "Plane 0 private use character U+F123",
217 "\xEF\x84\xA3", NULL, "Nameprep", 0,
218 STRINGPREP_CONTAINS_PROHIBITED
221 "Plane 15 private use character U+F1234",
222 "\xF3\xB1\x88\xB4", NULL, "Nameprep", 0,
223 STRINGPREP_CONTAINS_PROHIBITED
226 "Plane 16 private use character U+10F234",
227 "\xF4\x8F\x88\xB4", NULL, "Nameprep", 0,
228 STRINGPREP_CONTAINS_PROHIBITED
231 "Non-character code point U+8FFFE",
232 "\xF2\x8F\xBF\xBE", NULL, "Nameprep", 0,
233 STRINGPREP_CONTAINS_PROHIBITED
236 "Non-character code point U+10FFFF",
237 "\xF4\x8F\xBF\xBF", NULL, "Nameprep", 0,
238 STRINGPREP_CONTAINS_PROHIBITED
241 "Surrogate code U+DF42",
242 "\xED\xBD\x82", NULL, "Nameprep", 0,
243 STRINGPREP_CONTAINS_PROHIBITED
246 "Non-plain text character U+FFFD",
247 "\xEF\xBF\xBD", NULL, "Nameprep", 0,
248 STRINGPREP_CONTAINS_PROHIBITED
251 "Ideographic description character U+2FF5",
252 "\xE2\xBF\xB5", NULL, "Nameprep", 0,
253 STRINGPREP_CONTAINS_PROHIBITED
256 "Display property character U+0341",
257 "\xCD\x81", "\xCC\x81"
260 "Left-to-right mark U+200E",
261 "\xE2\x80\x8E", "\xCC\x81", "Nameprep", 0,
262 STRINGPREP_CONTAINS_PROHIBITED
265 "Deprecated U+202A",
266 "\xE2\x80\xAA", "\xCC\x81", "Nameprep", 0,
267 STRINGPREP_CONTAINS_PROHIBITED
270 "Language tagging character U+E0001",
271 "\xF3\xA0\x80\x81", "\xCC\x81", "Nameprep", 0,
272 STRINGPREP_CONTAINS_PROHIBITED
275 "Language tagging character U+E0042",
276 "\xF3\xA0\x81\x82", NULL, "Nameprep", 0,
277 STRINGPREP_CONTAINS_PROHIBITED
280 "Bidi: RandALCat character U+05BE and LCat characters",
281 "foo\xD6\xBE""bar", NULL, "Nameprep", 0,
282 STRINGPREP_BIDI_BOTH_L_AND_RAL
285 "Bidi: RandALCat character U+FD50 and LCat characters",
286 "foo\xEF\xB5\x90""bar", NULL, "Nameprep", 0,
287 STRINGPREP_BIDI_BOTH_L_AND_RAL
290 "Bidi: RandALCat character U+FB38 and LCat characters",
291 "foo\xEF\xB9\xB6""bar", "foo \xd9\x8e""bar"
293 { "Bidi: RandALCat without trailing RandALCat U+0627 U+0031",
294 "\xD8\xA7\x31", NULL, "Nameprep", 0,
295 STRINGPREP_BIDI_LEADTRAIL_NOT_RAL}
298 "Bidi: RandALCat character U+0627 U+0031 U+0628",
299 "\xD8\xA7\x31\xD8\xA8", "\xD8\xA7\x31\xD8\xA8"
302 "Unassigned code point U+E0002",
303 "\xF3\xA0\x80\x82", NULL, "Nameprep", STRINGPREP_NO_UNASSIGNED,
304 STRINGPREP_CONTAINS_UNASSIGNED
307 "Larger test (shrinking)",
308 "X\xC2\xAD\xC3\xDF\xC4\xB0\xE2\x84\xA1\x6a\xcc\x8c\xc2\xa0\xc2"
309 "\xaa\xce\xb0\xe2\x80\x80", "xssi\xcc\x87""tel\xc7\xb0 a\xce\xb0 ",
310 "Nameprep"
313 "Larger test (expanding)",
314 "X\xC3\xDF\xe3\x8c\x96\xC4\xB0\xE2\x84\xA1\xE2\x92\x9F\xE3\x8c\x80",
315 "xss\xe3\x82\xad\xe3\x83\xad\xe3\x83\xa1\xe3\x83\xbc\xe3\x83\x88"
316 "\xe3\x83\xab""i\xcc\x87""tel\x28""d\x29\xe3\x82\xa2\xe3\x83\x91"
317 "\xe3\x83\xbc\xe3\x83\x88"
319 #if !defined(DRAFT)
320 { "Test of prohibited ASCII character U+0020",
321 "\x20", NULL, "generic", 0, STRINGPREP_CONTAINS_PROHIBITED
324 "Test of NFKC U+00A0 and prohibited character U+0020",
325 "\xC2\xA0", NULL, "generic", 0, STRINGPREP_CONTAINS_PROHIBITED
327 { "Case map + normalization",
328 "\xC2\xB5", "\xCE\xBC", "generic"},
329 /* The rest are rather non-interesting, but no point in removing
330 working test cases... */
331 { "case_nonfkc",
332 "\xC2\xB5", "\xCE\xBC", "generic", STRINGPREP_NO_NFKC}
334 { "NFKC test",
335 "\xC2\xAA", "\x61", "generic"},
336 { "nameprep, exposed a bug in libstringprep 0.0.5",
337 "\xC2\xAA\x0A", "\x61\x0A"},
338 { "unassigned code point U+0221",
339 "\xC8\xA1", "\xC8\xA1", "generic"},
340 { "Unassigned code point U+0221",
341 "\xC8\xA1", NULL, "generic", STRINGPREP_NO_UNASSIGNED,
342 STRINGPREP_CONTAINS_UNASSIGNED},
343 { "Unassigned code point U+0236",
344 "\xC8\xB6", "\xC8\xB6", "generic"},
345 { "unassigned code point U+0236",
346 "\xC8\xB6", NULL, "generic", STRINGPREP_NO_UNASSIGNED,
347 STRINGPREP_CONTAINS_UNASSIGNED},
348 { "bidi both RandALCat and LCat U+0627 U+00AA U+0628",
349 "\xD8\xA7\xC2\xAA\xD8\xA8", NULL, "generic", 0,
350 STRINGPREP_BIDI_BOTH_L_AND_RAL}
352 { "XMPP node profile prohibited output",
353 "foo@bar", NULL, "Nodeprep", 0,
354 STRINGPREP_CONTAINS_PROHIBITED},
355 { "XMPP resource profile on same string should work though",
356 "foo@bar", "foo@bar", "Resourceprep"},
357 { "SASL ANONYMOUS plain mechanism",
358 "simon@josefsson.org", "simon@josefsson.org", "plain"},
359 { "iSCSI profile",
360 "Example-Name", "example-name", "ISCSIprep"},
361 { "SASL profile",
362 "Example\xC2\xA0""Name", "Example Name", "SASLprep"}
363 #endif
367 main (int argc, char *argv[])
369 char *p;
370 int rc;
371 size_t i;
373 if (!stringprep_check_version (STRINGPREP_VERSION))
374 fail ("stringprep_check_version() failed\n");
377 if (strcmp (argv[argc - 1], "-v") == 0 ||
378 strcmp (argv[argc - 1], "--verbose") == 0)
379 debug = 1;
380 else if (strcmp (argv[argc - 1], "-b") == 0 ||
381 strcmp (argv[argc - 1], "--break-on-error") == 0)
382 break_on_error = 1;
383 else if (strcmp (argv[argc - 1], "-h") == 0 ||
384 strcmp (argv[argc - 1], "-?") == 0 ||
385 strcmp (argv[argc - 1], "--help") == 0)
387 printf ("Usage: %s [-vbh?] [--verbose] [--break-on-error] [--help]\n",
388 argv[0]);
389 return 1;
391 while (argc-- > 1);
393 for (i = 0; i < sizeof (strprep) / sizeof (strprep[0]); i++)
395 #ifdef DRAFT
396 printf("<section title=\"%s.\">\n", strprep[i].comment);
397 printf("\n");
398 printf("<figure>\n");
399 printf("<artwork>\n");
400 printf ("in: ");
401 escapeprint (strprep[i].in, strlen (strprep[i].in));
402 #endif
403 if (debug)
404 printf ("STRINGPREP entry %d\n", i);
406 if (debug)
408 printf ("flags: %d\n", strprep[i].flags);
410 printf ("in: ");
411 escapeprint (strprep[i].in, strlen (strprep[i].in));
412 hexprint (strprep[i].in, strlen (strprep[i].in));
413 puts ("");
414 binprint (strprep[i].in, strlen (strprep[i].in));
415 puts ("");
418 rc = stringprep_profile (strprep[i].in, &p,
419 strprep[i].profile ?
420 strprep[i].profile :
421 "Nameprep",
422 strprep[i].flags);
423 if (rc != strprep[i].rc)
425 fail ("stringprep() entry %d failed: %d\n", i, rc);
426 if (debug)
427 printf ("FATAL\n");
428 continue;
431 #ifdef DRAFT
432 if (rc == STRINGPREP_OK)
434 printf ("out: ");
435 escapeprint (p, strlen (p));
437 #endif
439 if (debug && rc == STRINGPREP_OK)
441 printf ("out: ");
442 escapeprint (p, strlen (p));
443 hexprint (p, strlen (p));
444 puts ("");
445 binprint (p, strlen (p));
446 puts ("");
448 printf ("expected out: ");
449 escapeprint (strprep[i].out, strlen (strprep[i].out));
450 hexprint (strprep[i].out, strlen (strprep[i].out));
451 puts ("");
452 binprint (strprep[i].out, strlen (strprep[i].out));
453 puts ("");
455 else if (debug)
456 printf ("returned %d expected %d\n", rc, strprep[i].rc);
458 if (rc == STRINGPREP_OK)
460 if (strlen (strprep[i].out) != strlen (p) ||
461 memcmp (strprep[i].out, p, strlen (p)) != 0)
463 fail ("stringprep() entry %d failed\n", i);
464 if (debug)
465 printf ("ERROR\n");
467 else if (debug)
468 printf ("OK\n\n");
470 free (p);
472 else if (debug)
473 printf ("OK\n\n");
475 #ifdef DRAFT
476 printf("</artwork>\n");
477 printf("</figure>\n");
478 printf("\n");
479 printf("</section>\n");
480 #endif
483 #if 0
484 memset (p, 0, 10);
485 stringprep_unichar_to_utf8 (0x3316, p);
486 hexprint (p, strlen (p));
487 puts ("");
488 #endif
490 if (debug)
491 printf ("Stringprep self tests done with %d errors\n", error_count);
493 return error_count ? 1 : 0;