Updated to fedora-glibc-20060106T2148
[glibc.git] / posix / tst-rxspencer.c
bloba68bab2de980f492240f1ced5cc6d01df4a5b966
1 /* Regular expression tests.
2 Copyright (C) 2003, 2005 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Jakub Jelinek <jakub@redhat.com>, 2003.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19 02111-1307 USA. */
21 #include <sys/types.h>
22 #include <mcheck.h>
23 #include <regex.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <locale.h>
28 #include <getopt.h>
30 static void
31 replace_special_chars (char *str)
33 for (; (str = strpbrk (str, "NTSZ")) != NULL; ++str)
34 switch (*str)
36 case 'N': *str = '\n'; break;
37 case 'T': *str = '\t'; break;
38 case 'S': *str = ' '; break;
39 case 'Z': *str = '\0'; break;
43 static void
44 glibc_re_syntax (char *str)
46 char *p, *end = strchr (str, '\0') + 1;
48 /* Replace [[:<:]] with \< and [[:>:]] with \>. */
49 for (p = str; (p = strstr (p, "[[:")) != NULL; )
50 if ((p[3] == '<' || p[3] == '>') && strncmp (p + 4, ":]]", 3) == 0)
52 p[0] = '\\';
53 p[1] = p[3];
54 memmove (p + 2, p + 7, end - p - 7);
55 end -= 5;
56 p += 2;
58 else
59 p += 3;
62 static char *
63 mb_replace (char *dst, const char c)
65 switch (c)
67 /* Replace a with \'a and A with \'A. */
68 case 'a':
69 *dst++ = '\xc3';
70 *dst++ = '\xa1';
71 break;
72 case 'A':
73 *dst++ = '\xc3';
74 *dst++ = '\x81';
75 break;
76 /* Replace b with \v{c} and B with \v{C}. */
77 case 'b':
78 *dst++ = '\xc4';
79 *dst++ = '\x8d';
80 break;
81 case 'B':
82 *dst++ = '\xc4';
83 *dst++ = '\x8c';
84 break;
85 /* Replace c with \v{d} and C with \v{D}. */
86 case 'c':
87 *dst++ = '\xc4';
88 *dst++ = '\x8f';
89 break;
90 case 'C':
91 *dst++ = '\xc4';
92 *dst++ = '\x8e';
93 break;
94 /* Replace d with \'e and D with \'E. */
95 case 'd':
96 *dst++ = '\xc3';
97 *dst++ = '\xa9';
98 break;
99 case 'D':
100 *dst++ = '\xc3';
101 *dst++ = '\x89';
102 break;
104 return dst;
107 static char *
108 mb_frob_string (const char *str, const char *letters)
110 char *ret, *dst;
111 const char *src;
113 if (str == NULL)
114 return NULL;
116 ret = malloc (2 * strlen (str) + 1);
117 if (ret == NULL)
118 return NULL;
120 for (src = str, dst = ret; *src; ++src)
121 if (strchr (letters, *src))
122 dst = mb_replace (dst, *src);
123 else
124 *dst++ = *src;
125 *dst = '\0';
126 return ret;
129 /* Like mb_frob_string, but don't replace anything between
130 [: and :], [. and .] or [= and =] or characters escaped
131 with a backslash. */
133 static char *
134 mb_frob_pattern (const char *str, const char *letters)
136 char *ret, *dst;
137 const char *src;
138 int in_class = 0, escaped = 0;
140 if (str == NULL)
141 return NULL;
143 ret = malloc (2 * strlen (str) + 1);
144 if (ret == NULL)
145 return NULL;
147 for (src = str, dst = ret; *src; ++src)
148 if (*src == '\\')
150 escaped ^= 1;
151 *dst++ = *src;
153 else if (escaped)
155 escaped = 0;
156 *dst++ = *src;
157 continue;
159 else if (!in_class && strchr (letters, *src))
160 dst = mb_replace (dst, *src);
161 else
163 if (!in_class && *src == '[' && strchr (":.=", src[1]))
164 in_class = 1;
165 else if (in_class && *src == ']' && strchr (":.=", src[-1]))
166 in_class = 0;
167 *dst++ = *src;
169 *dst = '\0';
170 return ret;
173 static int
174 check_match (regmatch_t *rm, int idx, const char *string,
175 const char *match, const char *fail)
177 if (match[0] == '-' && match[1] == '\0')
179 if (rm[idx].rm_so == -1 && rm[idx].rm_eo == -1)
180 return 0;
181 printf ("%s rm[%d] unexpectedly matched\n", fail, idx);
182 return 1;
185 if (rm[idx].rm_so == -1 || rm[idx].rm_eo == -1)
187 printf ("%s rm[%d] unexpectedly did not match\n", fail, idx);
188 return 1;
191 if (match[0] == '@')
193 if (rm[idx].rm_so != rm[idx].rm_eo)
195 printf ("%s rm[%d] not empty\n", fail, idx);
196 return 1;
199 if (strncmp (string + rm[idx].rm_so, match + 1, strlen (match + 1) ?: 1))
201 printf ("%s rm[%d] not matching %s\n", fail, idx, match);
202 return 1;
204 return 0;
207 if (rm[idx].rm_eo - rm[idx].rm_so != strlen (match)
208 || strncmp (string + rm[idx].rm_so, match,
209 rm[idx].rm_eo - rm[idx].rm_so))
211 printf ("%s rm[%d] not matching %s\n", fail, idx, match);
212 return 1;
215 return 0;
218 static int
219 test (const char *pattern, int cflags, const char *string, int eflags,
220 char *expect, char *matches, const char *fail)
222 regex_t re;
223 regmatch_t rm[10];
224 int n, ret = 0;
226 n = regcomp (&re, pattern, cflags);
227 if (n != 0)
229 char buf[500];
230 if (eflags == -1)
232 static struct { reg_errcode_t code; const char *name; } codes []
233 #define C(x) { REG_##x, #x }
234 = { C(NOERROR), C(NOMATCH), C(BADPAT), C(ECOLLATE),
235 C(ECTYPE), C(EESCAPE), C(ESUBREG), C(EBRACK),
236 C(EPAREN), C(EBRACE), C(BADBR), C(ERANGE),
237 C(ESPACE), C(BADRPT) };
239 for (int i = 0; i < sizeof (codes) / sizeof (codes[0]); ++i)
240 if (n == codes[i].code)
242 if (strcmp (string, codes[i].name))
244 printf ("%s regcomp returned REG_%s (expected REG_%s)\n",
245 fail, codes[i].name, string);
246 return 1;
248 return 0;
251 printf ("%s regcomp return value REG_%d\n", fail, n);
252 return 1;
255 regerror (n, &re, buf, sizeof (buf));
256 printf ("%s regcomp failed: %s\n", fail, buf);
257 return 1;
260 if (eflags == -1)
262 regfree (&re);
264 /* The test case file assumes something only guaranteed by the
265 rxspencer regex implementation. Namely that for empty
266 expressions regcomp() return REG_EMPTY. This is not the case
267 for us and so we ignore this error. */
268 if (strcmp (string, "EMPTY") == 0)
269 return 0;
271 printf ("%s regcomp unexpectedly succeeded\n", fail);
272 return 1;
275 if (regexec (&re, string, 10, rm, eflags))
277 regfree (&re);
278 if (expect == NULL)
279 return 0;
280 printf ("%s regexec failed\n", fail);
281 return 1;
284 regfree (&re);
286 if (expect == NULL)
288 printf ("%s regexec unexpectedly succeeded\n", fail);
289 return 1;
292 if (cflags & REG_NOSUB)
293 return 0;
295 ret = check_match (rm, 0, string, expect, fail);
296 if (matches == NULL)
297 return ret;
299 for (n = 1; ret == 0 && n < 10; ++n)
301 char *p = NULL;
303 if (matches)
305 p = strchr (matches, ',');
306 if (p != NULL)
307 *p = '\0';
309 ret = check_match (rm, n, string, matches ?: "-", fail);
310 if (p)
312 *p = ',';
313 matches = p + 1;
315 else
316 matches = NULL;
319 return ret;
322 static int
323 mb_test (const char *pattern, int cflags, const char *string, int eflags,
324 char *expect, const char *matches, const char *letters,
325 const char *fail)
327 char *pattern_mb = mb_frob_pattern (pattern, letters);
328 const char *string_mb
329 = eflags == -1 ? string : mb_frob_string (string, letters);
330 char *expect_mb = mb_frob_string (expect, letters);
331 char *matches_mb = mb_frob_string (matches, letters);
332 int ret = 0;
334 if (!pattern_mb || !string_mb
335 || (expect && !expect_mb) || (matches && !matches_mb))
337 printf ("%s %m", fail);
338 ret = 1;
340 else
341 ret = test (pattern_mb, cflags, string_mb, eflags, expect_mb,
342 matches_mb, fail);
344 free (matches_mb);
345 free (expect_mb);
346 if (string_mb != string)
347 free ((char *) string_mb);
348 free (pattern_mb);
349 return ret;
352 static int
353 mb_tests (const char *pattern, int cflags, const char *string, int eflags,
354 char *expect, const char *matches)
356 int ret = 0;
357 int i;
358 char letters[9], fail[20];
360 /* The tests aren't supposed to work with xdigit, since a-dA-D are
361 hex digits while \'a \'A \v{c}\v{C}\v{d}\v{D}\'e \'E are not. */
362 if (strstr (pattern, "[:xdigit:]"))
363 return 0;
365 /* XXX: regex ATM handles only single byte equivalence classes. */
366 if (strstr (pattern, "[[=b=]]"))
367 return 0;
369 for (i = 1; i < 16; ++i)
371 char *p = letters;
372 if (i & 1)
374 if (!strchr (pattern, 'a') && !strchr (string, 'a')
375 && !strchr (pattern, 'A') && !strchr (string, 'A'))
376 continue;
377 *p++ = 'a', *p++ = 'A';
379 if (i & 2)
381 if (!strchr (pattern, 'b') && !strchr (string, 'b')
382 && !strchr (pattern, 'B') && !strchr (string, 'B'))
383 continue;
384 *p++ = 'b', *p++ = 'B';
386 if (i & 4)
388 if (!strchr (pattern, 'c') && !strchr (string, 'c')
389 && !strchr (pattern, 'C') && !strchr (string, 'C'))
390 continue;
391 *p++ = 'c', *p++ = 'C';
393 if (i & 8)
395 if (!strchr (pattern, 'd') && !strchr (string, 'd')
396 && !strchr (pattern, 'D') && !strchr (string, 'D'))
397 continue;
398 *p++ = 'd', *p++ = 'D';
400 *p++ = '\0';
401 sprintf (fail, "UTF-8 %s FAIL", letters);
402 ret |= mb_test (pattern, cflags, string, eflags, expect, matches,
403 letters, fail);
405 return ret;
409 main (int argc, char **argv)
411 int ret = 0;
412 char *line = NULL;
413 size_t line_len = 0;
414 ssize_t len;
415 FILE *f;
416 static int test_utf8 = 0;
417 static const struct option options[] =
419 {"utf8", no_argument, &test_utf8, 1},
420 {NULL, 0, NULL, 0 }
423 mtrace ();
425 while (getopt_long (argc, argv, "", options, NULL) >= 0);
427 if (optind + 1 != argc)
429 fprintf (stderr, "Missing test filename\n");
430 return 1;
433 f = fopen (argv[optind], "r");
434 if (f == NULL)
436 fprintf (stderr, "Couldn't open %s\n", argv[optind]);
437 return 1;
440 while ((len = getline (&line, &line_len, f)) > 0)
442 char *pattern, *flagstr, *string, *expect, *matches, *p;
443 int cflags = REG_EXTENDED, eflags = 0, try_bre_ere = 0;
445 if (line[len - 1] == '\n')
446 line[len - 1] = '\0';
448 /* Skip comments and empty lines. */
449 if (*line == '#' || *line == '\0')
450 continue;
452 puts (line);
453 fflush (stdout);
455 pattern = strtok (line, "\t");
456 if (pattern == NULL)
457 continue;
459 if (strcmp (pattern, "\"\"") == 0)
460 pattern += 2;
462 flagstr = strtok (NULL, "\t");
463 if (flagstr == NULL)
464 continue;
466 string = strtok (NULL, "\t");
467 if (string == NULL)
468 continue;
470 if (strcmp (string, "\"\"") == 0)
471 string += 2;
473 for (p = flagstr; *p; ++p)
474 switch (*p)
476 case '-':
477 break;
478 case 'b':
479 cflags &= ~REG_EXTENDED;
480 break;
481 case '&':
482 try_bre_ere = 1;
483 break;
484 case 'C':
485 eflags = -1;
486 break;
487 case 'i':
488 cflags |= REG_ICASE;
489 break;
490 case 's':
491 cflags |= REG_NOSUB;
492 break;
493 case 'n':
494 cflags |= REG_NEWLINE;
495 break;
496 case '^':
497 eflags |= REG_NOTBOL;
498 break;
499 case '$':
500 eflags |= REG_NOTEOL;
501 break;
502 case 'm':
503 case 'p':
504 case '#':
505 /* Not supported. */
506 flagstr = NULL;
507 break;
510 if (flagstr == NULL)
511 continue;
513 replace_special_chars (pattern);
514 glibc_re_syntax (pattern);
515 if (eflags != -1)
516 replace_special_chars (string);
518 expect = strtok (NULL, "\t");
519 matches = NULL;
520 if (expect != NULL)
522 replace_special_chars (expect);
523 matches = strtok (NULL, "\t");
524 if (matches != NULL)
525 replace_special_chars (matches);
528 if (setlocale (LC_ALL, "C") == NULL)
530 puts ("setlocale C failed");
531 ret = 1;
533 if (test (pattern, cflags, string, eflags, expect, matches, "FAIL")
534 || (try_bre_ere
535 && test (pattern, cflags & ~REG_EXTENDED, string, eflags,
536 expect, matches, "FAIL")))
537 ret = 1;
538 else if (test_utf8)
540 if (setlocale (LC_ALL, "cs_CZ.UTF-8") == NULL)
542 puts ("setlocale cs_CZ.UTF-8 failed");
543 ret = 1;
545 else if (test (pattern, cflags, string, eflags, expect, matches,
546 "UTF-8 FAIL")
547 || (try_bre_ere
548 && test (pattern, cflags & ~REG_EXTENDED, string,
549 eflags, expect, matches, "UTF-8 FAIL")))
550 ret = 1;
551 else if (mb_tests (pattern, cflags, string, eflags, expect, matches)
552 || (try_bre_ere
553 && mb_tests (pattern, cflags & ~REG_EXTENDED, string,
554 eflags, expect, matches)))
555 ret = 1;
559 free (line);
560 fclose (f);
561 return ret;