Update copyright dates with scripts/update-copyrights
[glibc.git] / posix / tst-rxspencer.c
blobf740c1f54a723d22c620f8f3f0a5875b4e99ef7d
1 /* Regular expression tests.
2 Copyright (C) 2003-2023 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
19 #include <sys/types.h>
20 #include <mcheck.h>
21 #include <regex.h>
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <string.h>
25 #include <locale.h>
26 #include <getopt.h>
28 static void
29 replace_special_chars (char *str)
31 for (; (str = strpbrk (str, "NTSZ")) != NULL; ++str)
32 switch (*str)
34 case 'N': *str = '\n'; break;
35 case 'T': *str = '\t'; break;
36 case 'S': *str = ' '; break;
37 case 'Z': *str = '\0'; break;
41 static void
42 glibc_re_syntax (char *str)
44 char *p, *end = strchr (str, '\0') + 1;
46 /* Replace [[:<:]] with \< and [[:>:]] with \>. */
47 for (p = str; (p = strstr (p, "[[:")) != NULL; )
48 if ((p[3] == '<' || p[3] == '>') && strncmp (p + 4, ":]]", 3) == 0)
50 p[0] = '\\';
51 p[1] = p[3];
52 memmove (p + 2, p + 7, end - p - 7);
53 end -= 5;
54 p += 2;
56 else
57 p += 3;
60 static char *
61 mb_replace (char *dst, const char c)
63 switch (c)
65 /* Replace a with \'a and A with \'A. */
66 case 'a':
67 *dst++ = '\xc3';
68 *dst++ = '\xa1';
69 break;
70 case 'A':
71 *dst++ = '\xc3';
72 *dst++ = '\x81';
73 break;
74 /* Replace b with \v{c} and B with \v{C}. */
75 case 'b':
76 *dst++ = '\xc4';
77 *dst++ = '\x8d';
78 break;
79 case 'B':
80 *dst++ = '\xc4';
81 *dst++ = '\x8c';
82 break;
83 /* Replace c with \v{d} and C with \v{D}. */
84 case 'c':
85 *dst++ = '\xc4';
86 *dst++ = '\x8f';
87 break;
88 case 'C':
89 *dst++ = '\xc4';
90 *dst++ = '\x8e';
91 break;
92 /* Replace d with \'e and D with \'E. */
93 case 'd':
94 *dst++ = '\xc3';
95 *dst++ = '\xa9';
96 break;
97 case 'D':
98 *dst++ = '\xc3';
99 *dst++ = '\x89';
100 break;
102 return dst;
105 static char *
106 mb_frob_string (const char *str, const char *letters)
108 char *ret, *dst;
109 const char *src;
111 if (str == NULL)
112 return NULL;
114 ret = malloc (2 * strlen (str) + 1);
115 if (ret == NULL)
116 return NULL;
118 for (src = str, dst = ret; *src; ++src)
119 if (strchr (letters, *src))
120 dst = mb_replace (dst, *src);
121 else
122 *dst++ = *src;
123 *dst = '\0';
124 return ret;
127 /* Like mb_frob_string, but don't replace anything between
128 [: and :], [. and .] or [= and =] or characters escaped
129 with a backslash. */
131 static char *
132 mb_frob_pattern (const char *str, const char *letters)
134 char *ret, *dst;
135 const char *src;
136 int in_class = 0, escaped = 0;
138 if (str == NULL)
139 return NULL;
141 ret = malloc (2 * strlen (str) + 1);
142 if (ret == NULL)
143 return NULL;
145 for (src = str, dst = ret; *src; ++src)
146 if (*src == '\\')
148 escaped ^= 1;
149 *dst++ = *src;
151 else if (escaped)
153 escaped = 0;
154 *dst++ = *src;
155 continue;
157 else if (!in_class && strchr (letters, *src))
158 dst = mb_replace (dst, *src);
159 else
161 if (!in_class && *src == '[' && strchr (":.=", src[1]))
162 in_class = 1;
163 else if (in_class && *src == ']' && strchr (":.=", src[-1]))
164 in_class = 0;
165 *dst++ = *src;
167 *dst = '\0';
168 return ret;
171 static int
172 check_match (regmatch_t *rm, int idx, const char *string,
173 const char *match, const char *fail)
175 if (match[0] == '-' && match[1] == '\0')
177 if (rm[idx].rm_so == -1 && rm[idx].rm_eo == -1)
178 return 0;
179 printf ("%s rm[%d] unexpectedly matched\n", fail, idx);
180 return 1;
183 if (rm[idx].rm_so == -1 || rm[idx].rm_eo == -1)
185 printf ("%s rm[%d] unexpectedly did not match\n", fail, idx);
186 return 1;
189 if (match[0] == '@')
191 if (rm[idx].rm_so != rm[idx].rm_eo)
193 printf ("%s rm[%d] not empty\n", fail, idx);
194 return 1;
197 if (strncmp (string + rm[idx].rm_so, match + 1, strlen (match + 1) ?: 1))
199 printf ("%s rm[%d] not matching %s\n", fail, idx, match);
200 return 1;
202 return 0;
205 if (rm[idx].rm_eo - rm[idx].rm_so != strlen (match)
206 || strncmp (string + rm[idx].rm_so, match,
207 rm[idx].rm_eo - rm[idx].rm_so))
209 printf ("%s rm[%d] not matching %s\n", fail, idx, match);
210 return 1;
213 return 0;
216 static int
217 test (const char *pattern, int cflags, const char *string, int eflags,
218 char *expect, char *matches, const char *fail)
220 regex_t re;
221 regmatch_t rm[10];
222 int n, ret = 0;
224 n = regcomp (&re, pattern, cflags);
225 if (n != 0)
227 char buf[500];
228 if (eflags == -1)
230 static struct { reg_errcode_t code; const char *name; } codes []
231 #define C(x) { REG_##x, #x }
232 = { C(NOERROR), C(NOMATCH), C(BADPAT), C(ECOLLATE),
233 C(ECTYPE), C(EESCAPE), C(ESUBREG), C(EBRACK),
234 C(EPAREN), C(EBRACE), C(BADBR), C(ERANGE),
235 C(ESPACE), C(BADRPT) };
237 for (int i = 0; i < sizeof (codes) / sizeof (codes[0]); ++i)
238 if (n == codes[i].code)
240 if (strcmp (string, codes[i].name))
242 printf ("%s regcomp returned REG_%s (expected REG_%s)\n",
243 fail, codes[i].name, string);
244 return 1;
246 return 0;
249 printf ("%s regcomp return value REG_%d\n", fail, n);
250 return 1;
253 regerror (n, &re, buf, sizeof (buf));
254 printf ("%s regcomp failed: %s\n", fail, buf);
255 return 1;
258 if (eflags == -1)
260 regfree (&re);
262 /* The test case file assumes something only guaranteed by the
263 rxspencer regex implementation. Namely that for empty
264 expressions regcomp() return REG_EMPTY. This is not the case
265 for us and so we ignore this error. */
266 if (strcmp (string, "EMPTY") == 0)
267 return 0;
269 printf ("%s regcomp unexpectedly succeeded\n", fail);
270 return 1;
273 if (regexec (&re, string, 10, rm, eflags))
275 regfree (&re);
276 if (expect == NULL)
277 return 0;
278 printf ("%s regexec failed\n", fail);
279 return 1;
282 regfree (&re);
284 if (expect == NULL)
286 printf ("%s regexec unexpectedly succeeded\n", fail);
287 return 1;
290 if (cflags & REG_NOSUB)
291 return 0;
293 ret = check_match (rm, 0, string, expect, fail);
294 if (matches == NULL)
295 return ret;
297 for (n = 1; ret == 0 && n < 10; ++n)
299 char *p = NULL;
301 if (matches)
303 p = strchr (matches, ',');
304 if (p != NULL)
305 *p = '\0';
307 ret = check_match (rm, n, string, matches ?: "-", fail);
308 if (p)
310 *p = ',';
311 matches = p + 1;
313 else
314 matches = NULL;
317 return ret;
320 static int
321 mb_test (const char *pattern, int cflags, const char *string, int eflags,
322 char *expect, const char *matches, const char *letters,
323 const char *fail)
325 char *pattern_mb = mb_frob_pattern (pattern, letters);
326 const char *string_mb
327 = eflags == -1 ? string : mb_frob_string (string, letters);
328 char *expect_mb = mb_frob_string (expect, letters);
329 char *matches_mb = mb_frob_string (matches, letters);
330 int ret = 0;
332 if (!pattern_mb || !string_mb
333 || (expect && !expect_mb) || (matches && !matches_mb))
335 printf ("%s %m", fail);
336 ret = 1;
338 else
339 ret = test (pattern_mb, cflags, string_mb, eflags, expect_mb,
340 matches_mb, fail);
342 free (matches_mb);
343 free (expect_mb);
344 if (string_mb != string)
345 free ((char *) string_mb);
346 free (pattern_mb);
347 return ret;
350 static int
351 mb_tests (const char *pattern, int cflags, const char *string, int eflags,
352 char *expect, const char *matches)
354 int ret = 0;
355 int i;
356 char letters[9], fail[20];
358 /* The tests aren't supposed to work with xdigit, since a-dA-D are
359 hex digits while \'a \'A \v{c}\v{C}\v{d}\v{D}\'e \'E are not. */
360 if (strstr (pattern, "[:xdigit:]"))
361 return 0;
363 /* XXX: regex ATM handles only single byte equivalence classes. */
364 if (strstr (pattern, "[[=b=]]"))
365 return 0;
367 for (i = 1; i < 16; ++i)
369 char *p = letters;
370 if (i & 1)
372 if (!strchr (pattern, 'a') && !strchr (string, 'a')
373 && !strchr (pattern, 'A') && !strchr (string, 'A'))
374 continue;
375 *p++ = 'a', *p++ = 'A';
377 if (i & 2)
379 if (!strchr (pattern, 'b') && !strchr (string, 'b')
380 && !strchr (pattern, 'B') && !strchr (string, 'B'))
381 continue;
382 *p++ = 'b', *p++ = 'B';
384 if (i & 4)
386 if (!strchr (pattern, 'c') && !strchr (string, 'c')
387 && !strchr (pattern, 'C') && !strchr (string, 'C'))
388 continue;
389 *p++ = 'c', *p++ = 'C';
391 if (i & 8)
393 if (!strchr (pattern, 'd') && !strchr (string, 'd')
394 && !strchr (pattern, 'D') && !strchr (string, 'D'))
395 continue;
396 *p++ = 'd', *p++ = 'D';
398 *p++ = '\0';
399 sprintf (fail, "UTF-8 %s FAIL", letters);
400 ret |= mb_test (pattern, cflags, string, eflags, expect, matches,
401 letters, fail);
403 return ret;
407 main (int argc, char **argv)
409 int ret = 0;
410 char *line = NULL;
411 size_t line_len = 0;
412 ssize_t len;
413 FILE *f;
414 static int test_utf8 = 0;
415 static const struct option options[] =
417 {"utf8", no_argument, &test_utf8, 1},
418 {NULL, 0, NULL, 0 }
421 mtrace ();
423 while (getopt_long (argc, argv, "", options, NULL) >= 0);
425 if (optind + 1 != argc)
427 fprintf (stderr, "Missing test filename\n");
428 return 1;
431 f = fopen (argv[optind], "r");
432 if (f == NULL)
434 fprintf (stderr, "Couldn't open %s\n", argv[optind]);
435 return 1;
438 while ((len = getline (&line, &line_len, f)) > 0)
440 char *pattern, *flagstr, *string, *expect, *matches, *p;
441 int cflags = REG_EXTENDED, eflags = 0, try_bre_ere = 0;
443 if (line[len - 1] == '\n')
444 line[len - 1] = '\0';
446 /* Skip comments and empty lines. */
447 if (*line == '#' || *line == '\0')
448 continue;
450 puts (line);
451 fflush (stdout);
453 pattern = strtok (line, "\t");
454 if (pattern == NULL)
455 continue;
457 if (strcmp (pattern, "\"\"") == 0)
458 pattern += 2;
460 flagstr = strtok (NULL, "\t");
461 if (flagstr == NULL)
462 continue;
464 string = strtok (NULL, "\t");
465 if (string == NULL)
466 continue;
468 if (strcmp (string, "\"\"") == 0)
469 string += 2;
471 for (p = flagstr; *p; ++p)
472 switch (*p)
474 case '-':
475 break;
476 case 'b':
477 cflags &= ~REG_EXTENDED;
478 break;
479 case '&':
480 try_bre_ere = 1;
481 break;
482 case 'C':
483 eflags = -1;
484 break;
485 case 'i':
486 cflags |= REG_ICASE;
487 break;
488 case 's':
489 cflags |= REG_NOSUB;
490 break;
491 case 'n':
492 cflags |= REG_NEWLINE;
493 break;
494 case '^':
495 eflags |= REG_NOTBOL;
496 break;
497 case '$':
498 eflags |= REG_NOTEOL;
499 break;
500 case 'm':
501 case 'p':
502 case '#':
503 /* Not supported. */
504 flagstr = NULL;
505 break;
508 if (flagstr == NULL)
509 continue;
511 replace_special_chars (pattern);
512 glibc_re_syntax (pattern);
513 if (eflags != -1)
514 replace_special_chars (string);
516 expect = strtok (NULL, "\t");
517 matches = NULL;
518 if (expect != NULL)
520 replace_special_chars (expect);
521 matches = strtok (NULL, "\t");
522 if (matches != NULL)
523 replace_special_chars (matches);
526 if (setlocale (LC_ALL, "C") == NULL)
528 puts ("setlocale C failed");
529 ret = 1;
531 if (test (pattern, cflags, string, eflags, expect, matches, "FAIL")
532 || (try_bre_ere
533 && test (pattern, cflags & ~REG_EXTENDED, string, eflags,
534 expect, matches, "FAIL")))
535 ret = 1;
536 else if (test_utf8)
538 if (setlocale (LC_ALL, "cs_CZ.UTF-8") == NULL)
540 puts ("setlocale cs_CZ.UTF-8 failed");
541 ret = 1;
543 else if (test (pattern, cflags, string, eflags, expect, matches,
544 "UTF-8 FAIL")
545 || (try_bre_ere
546 && test (pattern, cflags & ~REG_EXTENDED, string,
547 eflags, expect, matches, "UTF-8 FAIL")))
548 ret = 1;
549 else if (mb_tests (pattern, cflags, string, eflags, expect, matches)
550 || (try_bre_ere
551 && mb_tests (pattern, cflags & ~REG_EXTENDED, string,
552 eflags, expect, matches)))
553 ret = 1;
557 free (line);
558 fclose (f);
559 return ret;