Add CVE-2013-4332 to NEWS.
[glibc.git] / posix / bug-regex19.c
blob3ae4cab79555028a661da7023148761e075d0cd3
1 /* Regular expression tests.
2 Copyright (C) 2003-2013 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Jakub Jelinek <jakub@redhat.com>, 2003.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <http://www.gnu.org/licenses/>. */
20 #include <sys/types.h>
21 #include <mcheck.h>
22 #include <regex.h>
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <locale.h>
28 #define BRE RE_SYNTAX_POSIX_BASIC
29 #define ERE RE_SYNTAX_POSIX_EXTENDED
31 static struct test_s
33 int syntax;
34 const char *pattern;
35 const char *string;
36 int start, res;
37 } tests[] = {
38 {BRE, "\\<A", "CBAA", 0, -1},
39 {BRE, "\\<A", "CBAA", 2, -1},
40 {BRE, "A\\>", "CAAB", 1, -1},
41 {BRE, "\\bA", "CBAA", 0, -1},
42 {BRE, "\\bA", "CBAA", 2, -1},
43 {BRE, "A\\b", "CAAB", 1, -1},
44 {BRE, "\\<A", "AA", 0, 0},
45 {BRE, "\\<A", "C-AA", 2, 2},
46 {BRE, "A\\>", "CAA-", 1, 2},
47 {BRE, "A\\>", "CAA", 1, 2},
48 {BRE, "\\bA", "AA", 0, 0},
49 {BRE, "\\bA", "C-AA", 2, 2},
50 {BRE, "A\\b", "CAA-", 1, 2},
51 {BRE, "A\\b", "CAA", 1, 2},
52 {BRE, "\\<[A]", "CBAA", 0, -1},
53 {BRE, "\\<[A]", "CBAA", 2, -1},
54 {BRE, "[A]\\>", "CAAB", 1, -1},
55 {BRE, "\\b[A]", "CBAA", 0, -1},
56 {BRE, "\\b[A]", "CBAA", 2, -1},
57 {BRE, "[A]\\b", "CAAB", 1, -1},
58 {BRE, "\\<[A]", "AA", 0, 0},
59 {BRE, "\\<[A]", "C-AA", 2, 2},
60 {BRE, "[A]\\>", "CAA-", 1, 2},
61 {BRE, "[A]\\>", "CAA", 1, 2},
62 {BRE, "\\b[A]", "AA", 0, 0},
63 {BRE, "\\b[A]", "C-AA", 2, 2},
64 {BRE, "[A]\\b", "CAA-", 1, 2},
65 {BRE, "[A]\\b", "CAA", 1, 2},
66 {ERE, "\\b(A|!|.B)", "A=AC", 0, 0},
67 {ERE, "\\b(A|!|.B)", "=AC", 0, 1},
68 {ERE, "\\b(A|!|.B)", "!AC", 0, 1},
69 {ERE, "\\b(A|!|.B)", "=AB", 0, 1},
70 {ERE, "\\b(A|!|.B)", "DA!C", 0, 2},
71 {ERE, "\\b(A|!|.B)", "=CB", 0, 1},
72 {ERE, "\\b(A|!|.B)", "!CB", 0, 1},
73 {ERE, "\\b(A|!|.B)", "D,B", 0, 1},
74 {ERE, "\\b(A|!|.B)", "!.C", 0, -1},
75 {ERE, "\\b(A|!|.B)", "BCB", 0, -1},
76 {ERE, "(A|\\b)(A|B|C)", "DAAD", 0, 1},
77 {ERE, "(A|\\b)(A|B|C)", "DABD", 0, 1},
78 {ERE, "(A|\\b)(A|B|C)", "AD", 0, 0},
79 {ERE, "(A|\\b)(A|B|C)", "C!", 0, 0},
80 {ERE, "(A|\\b)(A|B|C)", "D,B", 0, 2},
81 {ERE, "(A|\\b)(A|B|C)", "DA?A", 0, 3},
82 {ERE, "(A|\\b)(A|B|C)", "BBC", 0, 0},
83 {ERE, "(A|\\b)(A|B|C)", "DA", 0, -1},
84 {ERE, "(!|\\b)(!|=|~)", "A!=\\", 0, 1},
85 {ERE, "(!|\\b)(!|=|~)", "/!=A", 0, 1},
86 {ERE, "(!|\\b)(!|=|~)", "A=A", 0, 1},
87 {ERE, "(!|\\b)(!|=|~)", "==!=", 0, 2},
88 {ERE, "(!|\\b)(!|=|~)", "==C~", 0, 3},
89 {ERE, "(!|\\b)(!|=|~)", "=~=", 0, -1},
90 {ERE, "(!|\\b)(!|=|~)", "~!", 0, -1},
91 {ERE, "(!|\\b)(!|=|~)", "~=~", 0, -1},
92 {ERE, "(\\b|A.)[ABC]", "AC", 0, 0},
93 {ERE, "(\\b|A.)[ABC]", "=A", 0, 1},
94 {ERE, "(\\b|A.)[ABC]", "DACC", 0, 1},
95 {ERE, "(\\b|A.)[A~C]", "AC", 0, 0},
96 {ERE, "(\\b|A.)[A~C]", "=A", 0, 1},
97 {ERE, "(\\b|A.)[A~C]", "DACC", 0, 1},
98 {ERE, "(\\b|A.)[A~C]", "B!A=", 0, 2},
99 {ERE, "(\\b|A.)[A~C]", "B~C", 0, 1},
100 {ERE, ".\\b.", "AA~", 0, 1},
101 {ERE, ".\\b.", "=A=", 0, 0},
102 {ERE, ".\\b.", "==", 0, -1},
103 {ERE, ".\\b.", "ABA", 0, -1},
104 {ERE, "[^k]\\b[^k]", "AA~", 0, 1},
105 {ERE, "[^k]\\b[^k]", "=A=", 0, 0},
106 {ERE, "[^k]\\b[^k]", "Ak~kA~", 0, 4},
107 {ERE, "[^k]\\b[^k]", "==", 0, -1},
108 {ERE, "[^k]\\b[^k]", "ABA", 0, -1},
109 {ERE, "[^k]\\b[^k]", "Ak~", 0, -1},
110 {ERE, "[^k]\\b[^k]", "k=k", 0, -1},
111 {ERE, "[^C]\\b[^C]", "AA~", 0, 1},
112 {ERE, "[^C]\\b[^C]", "=A=", 0, 0},
113 {ERE, "[^C]\\b[^C]", "AC~CA~", 0, 4},
114 {ERE, "[^C]\\b[^C]", "==", 0, -1},
115 {ERE, "[^C]\\b[^C]", "ABA", 0, -1},
116 {ERE, "[^C]\\b[^C]", "AC~", 0, -1},
117 {ERE, "[^C]\\b[^C]", "C=C", 0, -1},
118 {ERE, "\\<(A|!|.B)", "A=AC", 0, 0},
119 {ERE, "\\<(A|!|.B)", "=AC", 0, 1},
120 {ERE, "\\<(A|!|.B)", "!AC", 0, 1},
121 {ERE, "\\<(A|!|.B)", "=AB", 0, 1},
122 {ERE, "\\<(A|!|.B)", "=CB", 0, 1},
123 {ERE, "\\<(A|!|.B)", "!CB", 0, 1},
124 {ERE, "\\<(A|!|.B)", "DA!C", 0, -1},
125 {ERE, "\\<(A|!|.B)", "D,B", 0, -1},
126 {ERE, "\\<(A|!|.B)", "!.C", 0, -1},
127 {ERE, "\\<(A|!|.B)", "BCB", 0, -1},
128 {ERE, "(A|\\<)(A|B|C)", "DAAD", 0, 1},
129 {ERE, "(A|\\<)(A|B|C)", "DABD", 0, 1},
130 {ERE, "(A|\\<)(A|B|C)", "AD", 0, 0},
131 {ERE, "(A|\\<)(A|B|C)", "C!", 0, 0},
132 {ERE, "(A|\\<)(A|B|C)", "D,B", 0, 2},
133 {ERE, "(A|\\<)(A|B|C)", "DA?A", 0, 3},
134 {ERE, "(A|\\<)(A|B|C)", "BBC", 0, 0},
135 {ERE, "(A|\\<)(A|B|C)", "DA", 0, -1},
136 {ERE, "(!|\\<)(!|=|~)", "A!=\\", 0, 1},
137 {ERE, "(!|\\<)(!|=|~)", "/!=A", 0, 1},
138 {ERE, "(!|\\<)(!|=|~)", "==!=", 0, 2},
139 {ERE, "(!|\\<)(!|=|~)", "==C~", 0, -1},
140 {ERE, "(!|\\<)(!|=|~)", "A=A", 0, -1},
141 {ERE, "(!|\\<)(!|=|~)", "=~=", 0, -1},
142 {ERE, "(!|\\<)(!|=|~)", "~!", 0, -1},
143 {ERE, "(!|\\<)(!|=|~)", "~=~", 0, -1},
144 {ERE, "(\\<|A.)[ABC]", "AC", 0, 0},
145 {ERE, "(\\<|A.)[ABC]", "=A", 0, 1},
146 {ERE, "(\\<|A.)[ABC]", "DACC", 0, 1},
147 {ERE, "(\\<|A.)[A~C]", "AC", 0, 0},
148 {ERE, "(\\<|A.)[A~C]", "=A", 0, 1},
149 {ERE, "(\\<|A.)[A~C]", "DACC", 0, 1},
150 {ERE, "(\\<|A.)[A~C]", "B!A=", 0, 2},
151 {ERE, "(\\<|A.)[A~C]", "B~C", 0, 2},
152 {ERE, ".\\<.", "=A=", 0, 0},
153 {ERE, ".\\<.", "AA~", 0, -1},
154 {ERE, ".\\<.", "==", 0, -1},
155 {ERE, ".\\<.", "ABA", 0, -1},
156 {ERE, "[^k]\\<[^k]", "=k=A=", 0, 2},
157 {ERE, "[^k]\\<[^k]", "kk~", 0, -1},
158 {ERE, "[^k]\\<[^k]", "==", 0, -1},
159 {ERE, "[^k]\\<[^k]", "ABA", 0, -1},
160 {ERE, "[^k]\\<[^k]", "=k=", 0, -1},
161 {ERE, "[^C]\\<[^C]", "=C=A=", 0, 2},
162 {ERE, "[^C]\\<[^C]", "CC~", 0, -1},
163 {ERE, "[^C]\\<[^C]", "==", 0, -1},
164 {ERE, "[^C]\\<[^C]", "ABA", 0, -1},
165 {ERE, "[^C]\\<[^C]", "=C=", 0, -1},
166 {ERE, ".\\B.", "ABA", 0, 0},
167 {ERE, ".\\B.", "=BDC", 0, 1},
168 {ERE, "[^k]\\B[^k]", "kkkABA", 0, 3},
169 {ERE, "[^k]\\B[^k]", "kBk", 0, -1},
170 {ERE, "[^C]\\B[^C]", "CCCABA", 0, 3},
171 {ERE, "[^C]\\B[^C]", "CBC", 0, -1},
172 {ERE, ".(\\b|\\B).", "=~AB", 0, 0},
173 {ERE, ".(\\b|\\B).", "A=C", 0, 0},
174 {ERE, ".(\\b|\\B).", "ABC", 0, 0},
175 {ERE, ".(\\b|\\B).", "=~\\!", 0, 0},
176 {ERE, "[^k](\\b|\\B)[^k]", "=~AB", 0, 0},
177 {ERE, "[^k](\\b|\\B)[^k]", "A=C", 0, 0},
178 {ERE, "[^k](\\b|\\B)[^k]", "ABC", 0, 0},
179 {ERE, "[^k](\\b|\\B)[^k]", "=~kBD", 0, 0},
180 {ERE, "[^k](\\b|\\B)[^k]", "=~\\!", 0, 0},
181 {ERE, "[^k](\\b|\\B)[^k]", "=~kB", 0, 0},
182 {ERE, "[^C](\\b|\\B)[^C]", "=~AB", 0, 0},
183 {ERE, "[^C](\\b|\\B)[^C]", "A=C", 0, 0},
184 {ERE, "[^C](\\b|\\B)[^C]", "ABC", 0, 0},
185 {ERE, "[^C](\\b|\\B)[^C]", "=~CBD", 0, 0},
186 {ERE, "[^C](\\b|\\B)[^C]", "=~\\!", 0, 0},
187 {ERE, "[^C](\\b|\\B)[^C]", "=~CB", 0, 0},
188 {ERE, "\\b([A]|[!]|.B)", "A=AC", 0, 0},
189 {ERE, "\\b([A]|[!]|.B)", "=AC", 0, 1},
190 {ERE, "\\b([A]|[!]|.B)", "!AC", 0, 1},
191 {ERE, "\\b([A]|[!]|.B)", "=AB", 0, 1},
192 {ERE, "\\b([A]|[!]|.B)", "DA!C", 0, 2},
193 {ERE, "\\b([A]|[!]|.B)", "=CB", 0, 1},
194 {ERE, "\\b([A]|[!]|.B)", "!CB", 0, 1},
195 {ERE, "\\b([A]|[!]|.B)", "D,B", 0, 1},
196 {ERE, "\\b([A]|[!]|.B)", "!.C", 0, -1},
197 {ERE, "\\b([A]|[!]|.B)", "BCB", 0, -1},
198 {ERE, "([A]|\\b)([A]|[B]|[C])", "DAAD", 0, 1},
199 {ERE, "([A]|\\b)([A]|[B]|[C])", "DABD", 0, 1},
200 {ERE, "([A]|\\b)([A]|[B]|[C])", "AD", 0, 0},
201 {ERE, "([A]|\\b)([A]|[B]|[C])", "C!", 0, 0},
202 {ERE, "([A]|\\b)([A]|[B]|[C])", "D,B", 0, 2},
203 {ERE, "([A]|\\b)([A]|[B]|[C])", "DA?A", 0, 3},
204 {ERE, "([A]|\\b)([A]|[B]|[C])", "BBC", 0, 0},
205 {ERE, "([A]|\\b)([A]|[B]|[C])", "DA", 0, -1},
206 {ERE, "([!]|\\b)([!]|[=]|[~])", "A!=\\", 0, 1},
207 {ERE, "([!]|\\b)([!]|[=]|[~])", "/!=A", 0, 1},
208 {ERE, "([!]|\\b)([!]|[=]|[~])", "A=A", 0, 1},
209 {ERE, "([!]|\\b)([!]|[=]|[~])", "==!=", 0, 2},
210 {ERE, "([!]|\\b)([!]|[=]|[~])", "==C~", 0, 3},
211 {ERE, "([!]|\\b)([!]|[=]|[~])", "=~=", 0, -1},
212 {ERE, "([!]|\\b)([!]|[=]|[~])", "~!", 0, -1},
213 {ERE, "([!]|\\b)([!]|[=]|[~])", "~=~", 0, -1},
214 {ERE, "\\<([A]|[!]|.B)", "A=AC", 0, 0},
215 {ERE, "\\<([A]|[!]|.B)", "=AC", 0, 1},
216 {ERE, "\\<([A]|[!]|.B)", "!AC", 0, 1},
217 {ERE, "\\<([A]|[!]|.B)", "=AB", 0, 1},
218 {ERE, "\\<([A]|[!]|.B)", "=CB", 0, 1},
219 {ERE, "\\<([A]|[!]|.B)", "!CB", 0, 1},
220 {ERE, "\\<([A]|[!]|.B)", "DA!C", 0, -1},
221 {ERE, "\\<([A]|[!]|.B)", "D,B", 0, -1},
222 {ERE, "\\<([A]|[!]|.B)", "!.C", 0, -1},
223 {ERE, "\\<([A]|[!]|.B)", "BCB", 0, -1},
224 {ERE, "([A]|\\<)([A]|[B]|[C])", "DAAD", 0, 1},
225 {ERE, "([A]|\\<)([A]|[B]|[C])", "DABD", 0, 1},
226 {ERE, "([A]|\\<)([A]|[B]|[C])", "AD", 0, 0},
227 {ERE, "([A]|\\<)([A]|[B]|[C])", "C!", 0, 0},
228 {ERE, "([A]|\\<)([A]|[B]|[C])", "D,B", 0, 2},
229 {ERE, "([A]|\\<)([A]|[B]|[C])", "DA?A", 0, 3},
230 {ERE, "([A]|\\<)([A]|[B]|[C])", "BBC", 0, 0},
231 {ERE, "([A]|\\<)([A]|[B]|[C])", "DA", 0, -1},
232 {ERE, "([!]|\\<)([!=]|[~])", "A!=\\", 0, 1},
233 {ERE, "([!]|\\<)([!=]|[~])", "/!=A", 0, 1},
234 {ERE, "([!]|\\<)([!=]|[~])", "==!=", 0, 2},
235 {ERE, "([!]|\\<)([!=]|[~])", "==C~", 0, -1},
236 {ERE, "([!]|\\<)([!=]|[~])", "A=A", 0, -1},
237 {ERE, "([!]|\\<)([!=]|[~])", "=~=", 0, -1},
238 {ERE, "([!]|\\<)([!=]|[~])", "~!", 0, -1},
239 {ERE, "([!]|\\<)([!=]|[~])", "~=~", 0, -1},
240 {ERE, "(\\<|[A].)[ABC]", "AC", 0, 0},
241 {ERE, "(\\<|[A].)[ABC]", "=A", 0, 1},
242 {ERE, "(\\<|[A].)[ABC]", "DACC", 0, 1},
243 {ERE, "(\\<|[A].)[A~C]", "AC", 0, 0},
244 {ERE, "(\\<|[A].)[A~C]", "=A", 0, 1},
245 {ERE, "(\\<|[A].)[A~C]", "DACC", 0, 1},
246 {ERE, "(\\<|[A].)[A~C]", "B!A=", 0, 2},
247 {ERE, "(\\<|[A].)[A~C]", "B~C", 0, 2},
248 {ERE, "^[^A]*\\bB", "==B", 0, 0},
249 {ERE, "^[^A]*\\bB", "CBD!=B", 0, 0},
250 {ERE, "[^A]*\\bB", "==B", 2, 2}
254 do_one_test (const struct test_s *test, const char *fail)
256 int res;
257 const char *err;
258 struct re_pattern_buffer regbuf;
260 re_set_syntax (test->syntax);
261 memset (&regbuf, '\0', sizeof (regbuf));
262 err = re_compile_pattern (test->pattern, strlen (test->pattern),
263 &regbuf);
264 if (err != NULL)
266 printf ("%sre_compile_pattern \"%s\" failed: %s\n", fail, test->pattern,
267 err);
268 return 1;
271 res = re_search (&regbuf, test->string, strlen (test->string),
272 test->start, strlen (test->string) - test->start, NULL);
273 if (res != test->res)
275 printf ("%sre_search \"%s\" \"%s\" failed: %d (expected %d)\n",
276 fail, test->pattern, test->string, res, test->res);
277 regfree (&regbuf);
278 return 1;
281 if (test->res > 0 && test->start == 0)
283 res = re_search (&regbuf, test->string, strlen (test->string),
284 test->res, strlen (test->string) - test->res, NULL);
285 if (res != test->res)
287 printf ("%sre_search from expected \"%s\" \"%s\" failed: %d (expected %d)\n",
288 fail, test->pattern, test->string, res, test->res);
289 regfree (&regbuf);
290 return 1;
294 regfree (&regbuf);
295 return 0;
298 static char *
299 replace (char *p, char c)
301 switch (c)
303 /* A -> A" */
304 case 'A': *p++ = '\xc3'; *p++ = '\x84'; break;
305 /* B -> O" */
306 case 'B': *p++ = '\xc3'; *p++ = '\x96'; break;
307 /* C -> U" */
308 case 'C': *p++ = '\xc3'; *p++ = '\x9c'; break;
309 /* D -> a" */
310 case 'D': *p++ = '\xc3'; *p++ = '\xa4'; break;
311 /* ! -> MULTIPLICATION SIGN */
312 case '!': *p++ = '\xc3'; *p++ = '\x97'; break;
313 /* = -> EM DASH */
314 case '=': *p++ = '\xe2'; *p++ = '\x80'; *p++ = '\x94'; break;
315 /* ~ -> MUSICAL SYMBOL HALF NOTE */
316 case '~': *p++ = '\xf0'; *p++ = '\x9d'; *p++ = '\x85'; *p++ = '\x9e';
317 break;
319 return p;
323 do_mb_tests (const struct test_s *test)
325 int i, j;
326 struct test_s t;
327 const char *const chars = "ABCD!=~";
328 char repl[8], *p;
329 char pattern[strlen (test->pattern) * 4 + 1];
330 char string[strlen (test->string) * 4 + 1];
331 char fail[8 + sizeof ("UTF-8 ")];
333 t = *test;
334 t.pattern = pattern;
335 t.string = string;
336 strcpy (fail, "UTF-8 ");
337 for (i = 1; i < 128; ++i)
339 p = repl;
340 for (j = 0; j < 7; ++j)
341 if (i & (1 << j))
343 if (!strchr (test->pattern, chars[j])
344 && !strchr (test->string, chars[j]))
345 break;
346 *p++ = chars[j];
348 if (j < 7)
349 continue;
350 *p = '\0';
352 for (j = 0, p = pattern; test->pattern[j]; ++j)
353 if (strchr (repl, test->pattern[j]))
354 p = replace (p, test->pattern[j]);
355 else if (test->pattern[j] == '\\' && test->pattern[j + 1])
357 *p++ = test->pattern[j++];
358 *p++ = test->pattern[j];
360 else
361 *p++ = test->pattern[j];
362 *p = '\0';
364 t.start = test->start;
365 t.res = test->res;
367 for (j = 0, p = string; test->string[j]; ++j)
368 if (strchr (repl, test->string[j]))
370 char *d = replace (p, test->string[j]);
371 if (test->start > j)
372 t.start += d - p - 1;
373 if (test->res > j)
374 t.res += d - p - 1;
375 p = d;
377 else
378 *p++ = test->string[j];
379 *p = '\0';
381 p = stpcpy (fail + strlen ("UTF-8 "), repl);
382 *p++ = ' ';
383 *p = '\0';
385 if (do_one_test (&t, fail))
386 return 1;
388 return 0;
392 main (void)
394 size_t i;
395 int ret = 0;
397 mtrace ();
399 for (i = 0; i < sizeof (tests) / sizeof (tests[0]); ++i)
401 if (setlocale (LC_ALL, "de_DE.ISO-8859-1") == NULL)
403 puts ("setlocale de_DE.ISO-8859-1 failed");
404 ret = 1;
406 ret |= do_one_test (&tests[i], "");
407 if (setlocale (LC_ALL, "de_DE.UTF-8") == NULL)
409 puts ("setlocale de_DE.UTF-8 failed");
410 ret = 1;
412 ret |= do_one_test (&tests[i], "UTF-8 ");
413 ret |= do_mb_tests (&tests[i]);
416 return ret;