1 /* Regular expression tests.
2 Copyright (C) 2003, 2005 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Jakub Jelinek <jakub@redhat.com>, 2003.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
21 #include <sys/types.h>
29 #define BRE RE_SYNTAX_POSIX_BASIC
30 #define ERE RE_SYNTAX_POSIX_EXTENDED
39 {BRE
, "\\<A", "CBAA", 0, -1},
40 {BRE
, "\\<A", "CBAA", 2, -1},
41 {BRE
, "A\\>", "CAAB", 1, -1},
42 {BRE
, "\\bA", "CBAA", 0, -1},
43 {BRE
, "\\bA", "CBAA", 2, -1},
44 {BRE
, "A\\b", "CAAB", 1, -1},
45 {BRE
, "\\<A", "AA", 0, 0},
46 {BRE
, "\\<A", "C-AA", 2, 2},
47 {BRE
, "A\\>", "CAA-", 1, 2},
48 {BRE
, "A\\>", "CAA", 1, 2},
49 {BRE
, "\\bA", "AA", 0, 0},
50 {BRE
, "\\bA", "C-AA", 2, 2},
51 {BRE
, "A\\b", "CAA-", 1, 2},
52 {BRE
, "A\\b", "CAA", 1, 2},
53 {BRE
, "\\<[A]", "CBAA", 0, -1},
54 {BRE
, "\\<[A]", "CBAA", 2, -1},
55 {BRE
, "[A]\\>", "CAAB", 1, -1},
56 {BRE
, "\\b[A]", "CBAA", 0, -1},
57 {BRE
, "\\b[A]", "CBAA", 2, -1},
58 {BRE
, "[A]\\b", "CAAB", 1, -1},
59 {BRE
, "\\<[A]", "AA", 0, 0},
60 {BRE
, "\\<[A]", "C-AA", 2, 2},
61 {BRE
, "[A]\\>", "CAA-", 1, 2},
62 {BRE
, "[A]\\>", "CAA", 1, 2},
63 {BRE
, "\\b[A]", "AA", 0, 0},
64 {BRE
, "\\b[A]", "C-AA", 2, 2},
65 {BRE
, "[A]\\b", "CAA-", 1, 2},
66 {BRE
, "[A]\\b", "CAA", 1, 2},
67 {ERE
, "\\b(A|!|.B)", "A=AC", 0, 0},
68 {ERE
, "\\b(A|!|.B)", "=AC", 0, 1},
69 {ERE
, "\\b(A|!|.B)", "!AC", 0, 1},
70 {ERE
, "\\b(A|!|.B)", "=AB", 0, 1},
71 {ERE
, "\\b(A|!|.B)", "DA!C", 0, 2},
72 {ERE
, "\\b(A|!|.B)", "=CB", 0, 1},
73 {ERE
, "\\b(A|!|.B)", "!CB", 0, 1},
74 {ERE
, "\\b(A|!|.B)", "D,B", 0, 1},
75 {ERE
, "\\b(A|!|.B)", "!.C", 0, -1},
76 {ERE
, "\\b(A|!|.B)", "BCB", 0, -1},
77 {ERE
, "(A|\\b)(A|B|C)", "DAAD", 0, 1},
78 {ERE
, "(A|\\b)(A|B|C)", "DABD", 0, 1},
79 {ERE
, "(A|\\b)(A|B|C)", "AD", 0, 0},
80 {ERE
, "(A|\\b)(A|B|C)", "C!", 0, 0},
81 {ERE
, "(A|\\b)(A|B|C)", "D,B", 0, 2},
82 {ERE
, "(A|\\b)(A|B|C)", "DA?A", 0, 3},
83 {ERE
, "(A|\\b)(A|B|C)", "BBC", 0, 0},
84 {ERE
, "(A|\\b)(A|B|C)", "DA", 0, -1},
85 {ERE
, "(!|\\b)(!|=|~)", "A!=\\", 0, 1},
86 {ERE
, "(!|\\b)(!|=|~)", "/!=A", 0, 1},
87 {ERE
, "(!|\\b)(!|=|~)", "A=A", 0, 1},
88 {ERE
, "(!|\\b)(!|=|~)", "==!=", 0, 2},
89 {ERE
, "(!|\\b)(!|=|~)", "==C~", 0, 3},
90 {ERE
, "(!|\\b)(!|=|~)", "=~=", 0, -1},
91 {ERE
, "(!|\\b)(!|=|~)", "~!", 0, -1},
92 {ERE
, "(!|\\b)(!|=|~)", "~=~", 0, -1},
93 {ERE
, "(\\b|A.)[ABC]", "AC", 0, 0},
94 {ERE
, "(\\b|A.)[ABC]", "=A", 0, 1},
95 {ERE
, "(\\b|A.)[ABC]", "DACC", 0, 1},
96 {ERE
, "(\\b|A.)[A~C]", "AC", 0, 0},
97 {ERE
, "(\\b|A.)[A~C]", "=A", 0, 1},
98 {ERE
, "(\\b|A.)[A~C]", "DACC", 0, 1},
99 {ERE
, "(\\b|A.)[A~C]", "B!A=", 0, 2},
100 {ERE
, "(\\b|A.)[A~C]", "B~C", 0, 1},
101 {ERE
, ".\\b.", "AA~", 0, 1},
102 {ERE
, ".\\b.", "=A=", 0, 0},
103 {ERE
, ".\\b.", "==", 0, -1},
104 {ERE
, ".\\b.", "ABA", 0, -1},
105 {ERE
, "[^k]\\b[^k]", "AA~", 0, 1},
106 {ERE
, "[^k]\\b[^k]", "=A=", 0, 0},
107 {ERE
, "[^k]\\b[^k]", "Ak~kA~", 0, 4},
108 {ERE
, "[^k]\\b[^k]", "==", 0, -1},
109 {ERE
, "[^k]\\b[^k]", "ABA", 0, -1},
110 {ERE
, "[^k]\\b[^k]", "Ak~", 0, -1},
111 {ERE
, "[^k]\\b[^k]", "k=k", 0, -1},
112 {ERE
, "[^C]\\b[^C]", "AA~", 0, 1},
113 {ERE
, "[^C]\\b[^C]", "=A=", 0, 0},
114 {ERE
, "[^C]\\b[^C]", "AC~CA~", 0, 4},
115 {ERE
, "[^C]\\b[^C]", "==", 0, -1},
116 {ERE
, "[^C]\\b[^C]", "ABA", 0, -1},
117 {ERE
, "[^C]\\b[^C]", "AC~", 0, -1},
118 {ERE
, "[^C]\\b[^C]", "C=C", 0, -1},
119 {ERE
, "\\<(A|!|.B)", "A=AC", 0, 0},
120 {ERE
, "\\<(A|!|.B)", "=AC", 0, 1},
121 {ERE
, "\\<(A|!|.B)", "!AC", 0, 1},
122 {ERE
, "\\<(A|!|.B)", "=AB", 0, 1},
123 {ERE
, "\\<(A|!|.B)", "=CB", 0, 1},
124 {ERE
, "\\<(A|!|.B)", "!CB", 0, 1},
125 {ERE
, "\\<(A|!|.B)", "DA!C", 0, -1},
126 {ERE
, "\\<(A|!|.B)", "D,B", 0, -1},
127 {ERE
, "\\<(A|!|.B)", "!.C", 0, -1},
128 {ERE
, "\\<(A|!|.B)", "BCB", 0, -1},
129 {ERE
, "(A|\\<)(A|B|C)", "DAAD", 0, 1},
130 {ERE
, "(A|\\<)(A|B|C)", "DABD", 0, 1},
131 {ERE
, "(A|\\<)(A|B|C)", "AD", 0, 0},
132 {ERE
, "(A|\\<)(A|B|C)", "C!", 0, 0},
133 {ERE
, "(A|\\<)(A|B|C)", "D,B", 0, 2},
134 {ERE
, "(A|\\<)(A|B|C)", "DA?A", 0, 3},
135 {ERE
, "(A|\\<)(A|B|C)", "BBC", 0, 0},
136 {ERE
, "(A|\\<)(A|B|C)", "DA", 0, -1},
137 {ERE
, "(!|\\<)(!|=|~)", "A!=\\", 0, 1},
138 {ERE
, "(!|\\<)(!|=|~)", "/!=A", 0, 1},
139 {ERE
, "(!|\\<)(!|=|~)", "==!=", 0, 2},
140 {ERE
, "(!|\\<)(!|=|~)", "==C~", 0, -1},
141 {ERE
, "(!|\\<)(!|=|~)", "A=A", 0, -1},
142 {ERE
, "(!|\\<)(!|=|~)", "=~=", 0, -1},
143 {ERE
, "(!|\\<)(!|=|~)", "~!", 0, -1},
144 {ERE
, "(!|\\<)(!|=|~)", "~=~", 0, -1},
145 {ERE
, "(\\<|A.)[ABC]", "AC", 0, 0},
146 {ERE
, "(\\<|A.)[ABC]", "=A", 0, 1},
147 {ERE
, "(\\<|A.)[ABC]", "DACC", 0, 1},
148 {ERE
, "(\\<|A.)[A~C]", "AC", 0, 0},
149 {ERE
, "(\\<|A.)[A~C]", "=A", 0, 1},
150 {ERE
, "(\\<|A.)[A~C]", "DACC", 0, 1},
151 {ERE
, "(\\<|A.)[A~C]", "B!A=", 0, 2},
152 {ERE
, "(\\<|A.)[A~C]", "B~C", 0, 2},
153 {ERE
, ".\\<.", "=A=", 0, 0},
154 {ERE
, ".\\<.", "AA~", 0, -1},
155 {ERE
, ".\\<.", "==", 0, -1},
156 {ERE
, ".\\<.", "ABA", 0, -1},
157 {ERE
, "[^k]\\<[^k]", "=k=A=", 0, 2},
158 {ERE
, "[^k]\\<[^k]", "kk~", 0, -1},
159 {ERE
, "[^k]\\<[^k]", "==", 0, -1},
160 {ERE
, "[^k]\\<[^k]", "ABA", 0, -1},
161 {ERE
, "[^k]\\<[^k]", "=k=", 0, -1},
162 {ERE
, "[^C]\\<[^C]", "=C=A=", 0, 2},
163 {ERE
, "[^C]\\<[^C]", "CC~", 0, -1},
164 {ERE
, "[^C]\\<[^C]", "==", 0, -1},
165 {ERE
, "[^C]\\<[^C]", "ABA", 0, -1},
166 {ERE
, "[^C]\\<[^C]", "=C=", 0, -1},
167 {ERE
, ".\\B.", "ABA", 0, 0},
168 {ERE
, ".\\B.", "=BDC", 0, 1},
169 {ERE
, "[^k]\\B[^k]", "kkkABA", 0, 3},
170 {ERE
, "[^k]\\B[^k]", "kBk", 0, -1},
171 {ERE
, "[^C]\\B[^C]", "CCCABA", 0, 3},
172 {ERE
, "[^C]\\B[^C]", "CBC", 0, -1},
173 {ERE
, ".(\\b|\\B).", "=~AB", 0, 0},
174 {ERE
, ".(\\b|\\B).", "A=C", 0, 0},
175 {ERE
, ".(\\b|\\B).", "ABC", 0, 0},
176 {ERE
, ".(\\b|\\B).", "=~\\!", 0, 0},
177 {ERE
, "[^k](\\b|\\B)[^k]", "=~AB", 0, 0},
178 {ERE
, "[^k](\\b|\\B)[^k]", "A=C", 0, 0},
179 {ERE
, "[^k](\\b|\\B)[^k]", "ABC", 0, 0},
180 {ERE
, "[^k](\\b|\\B)[^k]", "=~kBD", 0, 0},
181 {ERE
, "[^k](\\b|\\B)[^k]", "=~\\!", 0, 0},
182 {ERE
, "[^k](\\b|\\B)[^k]", "=~kB", 0, 0},
183 {ERE
, "[^C](\\b|\\B)[^C]", "=~AB", 0, 0},
184 {ERE
, "[^C](\\b|\\B)[^C]", "A=C", 0, 0},
185 {ERE
, "[^C](\\b|\\B)[^C]", "ABC", 0, 0},
186 {ERE
, "[^C](\\b|\\B)[^C]", "=~CBD", 0, 0},
187 {ERE
, "[^C](\\b|\\B)[^C]", "=~\\!", 0, 0},
188 {ERE
, "[^C](\\b|\\B)[^C]", "=~CB", 0, 0},
189 {ERE
, "\\b([A]|[!]|.B)", "A=AC", 0, 0},
190 {ERE
, "\\b([A]|[!]|.B)", "=AC", 0, 1},
191 {ERE
, "\\b([A]|[!]|.B)", "!AC", 0, 1},
192 {ERE
, "\\b([A]|[!]|.B)", "=AB", 0, 1},
193 {ERE
, "\\b([A]|[!]|.B)", "DA!C", 0, 2},
194 {ERE
, "\\b([A]|[!]|.B)", "=CB", 0, 1},
195 {ERE
, "\\b([A]|[!]|.B)", "!CB", 0, 1},
196 {ERE
, "\\b([A]|[!]|.B)", "D,B", 0, 1},
197 {ERE
, "\\b([A]|[!]|.B)", "!.C", 0, -1},
198 {ERE
, "\\b([A]|[!]|.B)", "BCB", 0, -1},
199 {ERE
, "([A]|\\b)([A]|[B]|[C])", "DAAD", 0, 1},
200 {ERE
, "([A]|\\b)([A]|[B]|[C])", "DABD", 0, 1},
201 {ERE
, "([A]|\\b)([A]|[B]|[C])", "AD", 0, 0},
202 {ERE
, "([A]|\\b)([A]|[B]|[C])", "C!", 0, 0},
203 {ERE
, "([A]|\\b)([A]|[B]|[C])", "D,B", 0, 2},
204 {ERE
, "([A]|\\b)([A]|[B]|[C])", "DA?A", 0, 3},
205 {ERE
, "([A]|\\b)([A]|[B]|[C])", "BBC", 0, 0},
206 {ERE
, "([A]|\\b)([A]|[B]|[C])", "DA", 0, -1},
207 {ERE
, "([!]|\\b)([!]|[=]|[~])", "A!=\\", 0, 1},
208 {ERE
, "([!]|\\b)([!]|[=]|[~])", "/!=A", 0, 1},
209 {ERE
, "([!]|\\b)([!]|[=]|[~])", "A=A", 0, 1},
210 {ERE
, "([!]|\\b)([!]|[=]|[~])", "==!=", 0, 2},
211 {ERE
, "([!]|\\b)([!]|[=]|[~])", "==C~", 0, 3},
212 {ERE
, "([!]|\\b)([!]|[=]|[~])", "=~=", 0, -1},
213 {ERE
, "([!]|\\b)([!]|[=]|[~])", "~!", 0, -1},
214 {ERE
, "([!]|\\b)([!]|[=]|[~])", "~=~", 0, -1},
215 {ERE
, "\\<([A]|[!]|.B)", "A=AC", 0, 0},
216 {ERE
, "\\<([A]|[!]|.B)", "=AC", 0, 1},
217 {ERE
, "\\<([A]|[!]|.B)", "!AC", 0, 1},
218 {ERE
, "\\<([A]|[!]|.B)", "=AB", 0, 1},
219 {ERE
, "\\<([A]|[!]|.B)", "=CB", 0, 1},
220 {ERE
, "\\<([A]|[!]|.B)", "!CB", 0, 1},
221 {ERE
, "\\<([A]|[!]|.B)", "DA!C", 0, -1},
222 {ERE
, "\\<([A]|[!]|.B)", "D,B", 0, -1},
223 {ERE
, "\\<([A]|[!]|.B)", "!.C", 0, -1},
224 {ERE
, "\\<([A]|[!]|.B)", "BCB", 0, -1},
225 {ERE
, "([A]|\\<)([A]|[B]|[C])", "DAAD", 0, 1},
226 {ERE
, "([A]|\\<)([A]|[B]|[C])", "DABD", 0, 1},
227 {ERE
, "([A]|\\<)([A]|[B]|[C])", "AD", 0, 0},
228 {ERE
, "([A]|\\<)([A]|[B]|[C])", "C!", 0, 0},
229 {ERE
, "([A]|\\<)([A]|[B]|[C])", "D,B", 0, 2},
230 {ERE
, "([A]|\\<)([A]|[B]|[C])", "DA?A", 0, 3},
231 {ERE
, "([A]|\\<)([A]|[B]|[C])", "BBC", 0, 0},
232 {ERE
, "([A]|\\<)([A]|[B]|[C])", "DA", 0, -1},
233 {ERE
, "([!]|\\<)([!=]|[~])", "A!=\\", 0, 1},
234 {ERE
, "([!]|\\<)([!=]|[~])", "/!=A", 0, 1},
235 {ERE
, "([!]|\\<)([!=]|[~])", "==!=", 0, 2},
236 {ERE
, "([!]|\\<)([!=]|[~])", "==C~", 0, -1},
237 {ERE
, "([!]|\\<)([!=]|[~])", "A=A", 0, -1},
238 {ERE
, "([!]|\\<)([!=]|[~])", "=~=", 0, -1},
239 {ERE
, "([!]|\\<)([!=]|[~])", "~!", 0, -1},
240 {ERE
, "([!]|\\<)([!=]|[~])", "~=~", 0, -1},
241 {ERE
, "(\\<|[A].)[ABC]", "AC", 0, 0},
242 {ERE
, "(\\<|[A].)[ABC]", "=A", 0, 1},
243 {ERE
, "(\\<|[A].)[ABC]", "DACC", 0, 1},
244 {ERE
, "(\\<|[A].)[A~C]", "AC", 0, 0},
245 {ERE
, "(\\<|[A].)[A~C]", "=A", 0, 1},
246 {ERE
, "(\\<|[A].)[A~C]", "DACC", 0, 1},
247 {ERE
, "(\\<|[A].)[A~C]", "B!A=", 0, 2},
248 {ERE
, "(\\<|[A].)[A~C]", "B~C", 0, 2},
249 {ERE
, "^[^A]*\\bB", "==B", 0, 0},
250 {ERE
, "^[^A]*\\bB", "CBD!=B", 0, 0},
251 {ERE
, "[^A]*\\bB", "==B", 2, 2}
255 do_one_test (const struct test_s
*test
, const char *fail
)
259 struct re_pattern_buffer regbuf
;
261 re_set_syntax (test
->syntax
);
262 memset (®buf
, '\0', sizeof (regbuf
));
263 err
= re_compile_pattern (test
->pattern
, strlen (test
->pattern
),
267 printf ("%sre_compile_pattern \"%s\" failed: %s\n", fail
, test
->pattern
,
272 res
= re_search (®buf
, test
->string
, strlen (test
->string
),
273 test
->start
, strlen (test
->string
) - test
->start
, NULL
);
274 if (res
!= test
->res
)
276 printf ("%sre_search \"%s\" \"%s\" failed: %d (expected %d)\n",
277 fail
, test
->pattern
, test
->string
, res
, test
->res
);
282 if (test
->res
> 0 && test
->start
== 0)
284 res
= re_search (®buf
, test
->string
, strlen (test
->string
),
285 test
->res
, strlen (test
->string
) - test
->res
, NULL
);
286 if (res
!= test
->res
)
288 printf ("%sre_search from expected \"%s\" \"%s\" failed: %d (expected %d)\n",
289 fail
, test
->pattern
, test
->string
, res
, test
->res
);
300 replace (char *p
, char c
)
305 case 'A': *p
++ = '\xc3'; *p
++ = '\x84'; break;
307 case 'B': *p
++ = '\xc3'; *p
++ = '\x96'; break;
309 case 'C': *p
++ = '\xc3'; *p
++ = '\x9c'; break;
311 case 'D': *p
++ = '\xc3'; *p
++ = '\xa4'; break;
312 /* ! -> MULTIPLICATION SIGN */
313 case '!': *p
++ = '\xc3'; *p
++ = '\x97'; break;
315 case '=': *p
++ = '\xe2'; *p
++ = '\x80'; *p
++ = '\x94'; break;
316 /* ~ -> MUSICAL SYMBOL HALF NOTE */
317 case '~': *p
++ = '\xf0'; *p
++ = '\x9d'; *p
++ = '\x85'; *p
++ = '\x9e';
324 do_mb_tests (const struct test_s
*test
)
328 const char *const chars
= "ABCD!=~";
330 char pattern
[strlen (test
->pattern
) * 4 + 1];
331 char string
[strlen (test
->string
) * 4 + 1];
332 char fail
[8 + sizeof ("UTF-8 ")];
337 strcpy (fail
, "UTF-8 ");
338 for (i
= 1; i
< 128; ++i
)
341 for (j
= 0; j
< 7; ++j
)
344 if (!strchr (test
->pattern
, chars
[j
])
345 && !strchr (test
->string
, chars
[j
]))
353 for (j
= 0, p
= pattern
; test
->pattern
[j
]; ++j
)
354 if (strchr (repl
, test
->pattern
[j
]))
355 p
= replace (p
, test
->pattern
[j
]);
356 else if (test
->pattern
[j
] == '\\' && test
->pattern
[j
+ 1])
358 *p
++ = test
->pattern
[j
++];
359 *p
++ = test
->pattern
[j
];
362 *p
++ = test
->pattern
[j
];
365 t
.start
= test
->start
;
368 for (j
= 0, p
= string
; test
->string
[j
]; ++j
)
369 if (strchr (repl
, test
->string
[j
]))
371 char *d
= replace (p
, test
->string
[j
]);
373 t
.start
+= d
- p
- 1;
379 *p
++ = test
->string
[j
];
382 p
= stpcpy (fail
+ strlen ("UTF-8 "), repl
);
386 if (do_one_test (&t
, fail
))
400 for (i
= 0; i
< sizeof (tests
) / sizeof (tests
[0]); ++i
)
402 if (setlocale (LC_ALL
, "de_DE.ISO-8859-1") == NULL
)
404 puts ("setlocale de_DE.ISO-8859-1 failed");
407 ret
|= do_one_test (&tests
[i
], "");
408 if (setlocale (LC_ALL
, "de_DE.UTF-8") == NULL
)
410 puts ("setlocale de_DE.UTF-8 failed");
413 ret
|= do_one_test (&tests
[i
], "UTF-8 ");
414 ret
|= do_mb_tests (&tests
[i
]);