* posix/regcomp.c (re_compile_fastmap_iter): Use __mbrtowc.
[glibc.git] / posix / tst-regex.c
bloba7fba698f208ca3f102c2ca0e68f6f41ff4c77b1
1 /* Copyright (C) 2001, 2003, 2008 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Lesser General Public
6 License as published by the Free Software Foundation; either
7 version 2.1 of the License, or (at your option) any later version.
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
14 You should have received a copy of the GNU Lesser General Public
15 License along with the GNU C Library; if not, write to the Free
16 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
17 02111-1307 USA. */
19 #include <spawn.h>
20 #include "spawn_int.h"
22 #include <assert.h>
23 #include <errno.h>
24 #include <error.h>
25 #include <fcntl.h>
26 #include <getopt.h>
27 #include <iconv.h>
28 #include <locale.h>
29 #include <mcheck.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <time.h>
34 #include <unistd.h>
35 #include <sys/stat.h>
36 #include <sys/types.h>
37 #include <regex.h>
40 #if defined _POSIX_CPUTIME && _POSIX_CPUTIME >= 0
41 static clockid_t cl;
42 static int use_clock;
43 #endif
44 static iconv_t cd;
45 static char *mem;
46 static char *umem;
47 static size_t memlen;
48 static size_t umemlen;
49 static int timing;
51 static int test_expr (const char *expr, int expected, int expectedicase);
52 static int run_test (const char *expr, const char *mem, size_t memlen,
53 int icase, int expected);
54 static int run_test_backwards (const char *expr, const char *mem,
55 size_t memlen, int icase, int expected);
58 static int
59 do_test (void)
61 const char *file;
62 int fd;
63 struct stat st;
64 int result;
65 char *inmem;
66 char *outmem;
67 size_t inlen;
68 size_t outlen;
70 mtrace ();
72 /* Make the content of the file available in memory. */
73 file = "../ChangeLog.8";
74 fd = open (file, O_RDONLY);
75 if (fd == -1)
76 error (EXIT_FAILURE, errno, "cannot open %s", basename (file));
78 if (fstat (fd, &st) != 0)
79 error (EXIT_FAILURE, errno, "cannot stat %s", basename (file));
80 memlen = st.st_size;
82 mem = (char *) malloc (memlen + 1);
83 if (mem == NULL)
84 error (EXIT_FAILURE, errno, "while allocating buffer");
86 if ((size_t) read (fd, mem, memlen) != memlen)
87 error (EXIT_FAILURE, 0, "cannot read entire file");
88 mem[memlen] = '\0';
90 close (fd);
92 /* We have to convert a few things from Latin-1 to UTF-8. */
93 cd = iconv_open ("UTF-8", "ISO-8859-1");
94 if (cd == (iconv_t) -1)
95 error (EXIT_FAILURE, errno, "cannot get conversion descriptor");
97 /* For the second test we have to convert the file content to UTF-8.
98 Since the text is mostly ASCII it should be enough to allocate
99 twice as much memory for the UTF-8 text than for the Latin-1
100 text. */
101 umem = (char *) calloc (2, memlen);
102 if (umem == NULL)
103 error (EXIT_FAILURE, errno, "while allocating buffer");
105 inmem = mem;
106 inlen = memlen;
107 outmem = umem;
108 outlen = 2 * memlen - 1;
109 iconv (cd, &inmem, &inlen, &outmem, &outlen);
110 umemlen = outmem - umem;
111 if (inlen != 0)
112 error (EXIT_FAILURE, errno, "cannot convert buffer");
114 #if defined _POSIX_CPUTIME && _POSIX_CPUTIME >= 0
115 # if _POSIX_CPUTIME == 0
116 if (sysconf (_SC_CPUTIME) < 0)
117 use_clock = 0;
118 else
119 # endif
120 /* See whether we can use the CPU clock. */
121 use_clock = clock_getcpuclockid (0, &cl) == 0;
122 #endif
124 #ifdef DEBUG
125 re_set_syntax (RE_DEBUG);
126 #endif
128 /* Run the actual tests. All tests are run in a single-byte and a
129 multi-byte locale. */
130 result = test_expr ("[äáàâéèêíìîñöóòôüúùû]", 2, 2);
131 result |= test_expr ("G.ran", 2, 3);
132 result |= test_expr ("G.\\{1\\}ran", 2, 3);
133 result |= test_expr ("G.*ran", 3, 44);
134 result |= test_expr ("[äáàâ]", 0, 0);
135 result |= test_expr ("Uddeborg", 2, 2);
136 result |= test_expr (".Uddeborg", 2, 2);
138 /* Free the resources. */
139 free (umem);
140 iconv_close (cd);
141 free (mem);
143 return result;
147 static int
148 test_expr (const char *expr, int expected, int expectedicase)
150 int result;
151 char *inmem;
152 char *outmem;
153 size_t inlen;
154 size_t outlen;
155 char *uexpr;
157 /* First test: search with an ISO-8859-1 locale. */
158 if (setlocale (LC_ALL, "de_DE.ISO-8859-1") == NULL)
159 error (EXIT_FAILURE, 0, "cannot set locale de_DE.ISO-8859-1");
161 printf ("\nTest \"%s\" with 8-bit locale\n", expr);
162 result = run_test (expr, mem, memlen, 0, expected);
163 printf ("\nTest \"%s\" with 8-bit locale, case insensitive\n", expr);
164 result |= run_test (expr, mem, memlen, 1, expectedicase);
165 printf ("\nTest \"%s\" backwards with 8-bit locale\n", expr);
166 result |= run_test_backwards (expr, mem, memlen, 0, expected);
167 printf ("\nTest \"%s\" backwards with 8-bit locale, case insensitive\n",
168 expr);
169 result |= run_test_backwards (expr, mem, memlen, 1, expectedicase);
171 /* Second test: search with an UTF-8 locale. */
172 if (setlocale (LC_ALL, "de_DE.UTF-8") == NULL)
173 error (EXIT_FAILURE, 0, "cannot set locale de_DE.UTF-8");
175 inmem = (char *) expr;
176 inlen = strlen (expr);
177 outlen = inlen * MB_CUR_MAX;
178 outmem = uexpr = alloca (outlen + 1);
179 memset (outmem, '\0', outlen + 1);
180 iconv (cd, &inmem, &inlen, &outmem, &outlen);
181 if (inlen != 0)
182 error (EXIT_FAILURE, errno, "cannot convert expression");
184 /* Run the tests. */
185 printf ("\nTest \"%s\" with multi-byte locale\n", expr);
186 result |= run_test (uexpr, umem, umemlen, 0, expected);
187 printf ("\nTest \"%s\" with multi-byte locale, case insensitive\n", expr);
188 result |= run_test (uexpr, umem, umemlen, 1, expectedicase);
189 printf ("\nTest \"%s\" backwards with multi-byte locale\n", expr);
190 result |= run_test_backwards (uexpr, umem, umemlen, 0, expected);
191 printf ("\nTest \"%s\" backwards with multi-byte locale, case insensitive\n",
192 expr);
193 result |= run_test_backwards (uexpr, umem, umemlen, 1, expectedicase);
195 return result;
199 static int
200 run_test (const char *expr, const char *mem, size_t memlen, int icase,
201 int expected)
203 #if defined _POSIX_CPUTIME && _POSIX_CPUTIME >= 0
204 struct timespec start;
205 struct timespec finish;
206 #endif
207 regex_t re;
208 int err;
209 size_t offset;
210 int cnt;
212 #if defined _POSIX_CPUTIME && _POSIX_CPUTIME >= 0
213 if (use_clock && !timing)
214 use_clock = clock_gettime (cl, &start) == 0;
215 #endif
217 err = regcomp (&re, expr, REG_NEWLINE | (icase ? REG_ICASE : 0));
218 if (err != REG_NOERROR)
220 char buf[200];
221 regerror (err, &re, buf, sizeof buf);
222 error (EXIT_FAILURE, 0, "cannot compile expression: %s", buf);
225 cnt = 0;
226 offset = 0;
227 assert (mem[memlen] == '\0');
228 while (offset < memlen)
230 regmatch_t ma[1];
231 const char *sp;
232 const char *ep;
234 err = regexec (&re, mem + offset, 1, ma, 0);
235 if (err == REG_NOMATCH)
236 break;
238 if (err != REG_NOERROR)
240 char buf[200];
241 regerror (err, &re, buf, sizeof buf);
242 error (EXIT_FAILURE, 0, "cannot use expression: %s", buf);
245 assert (ma[0].rm_so >= 0);
246 sp = mem + offset + ma[0].rm_so;
247 while (sp > mem && sp[-1] != '\n')
248 --sp;
250 ep = mem + offset + ma[0].rm_so;
251 while (*ep != '\0' && *ep != '\n')
252 ++ep;
254 printf ("match %d: \"%.*s\"\n", ++cnt, (int) (ep - sp), sp);
256 offset = ep + 1 - mem;
259 regfree (&re);
261 #if defined _POSIX_CPUTIME && _POSIX_CPUTIME >= 0
262 if (use_clock && !timing)
264 use_clock = clock_gettime (cl, &finish) == 0;
265 if (use_clock)
267 if (finish.tv_nsec < start.tv_nsec)
269 finish.tv_nsec -= start.tv_nsec - 1000000000;
270 finish.tv_sec -= 1 + start.tv_sec;
272 else
274 finish.tv_nsec -= start.tv_nsec;
275 finish.tv_sec -= start.tv_sec;
278 printf ("elapsed time: %ld.%09ld sec\n",
279 finish.tv_sec, finish.tv_nsec);
283 if (use_clock && timing)
285 struct timespec mintime = { .tv_sec = 24 * 60 * 60 };
287 for (int i = 0; i < 10; ++i)
289 offset = 0;
290 use_clock = clock_gettime (cl, &start) == 0;
292 if (!use_clock)
293 continue;
295 err = regcomp (&re, expr, REG_NEWLINE | (icase ? REG_ICASE : 0));
296 if (err != REG_NOERROR)
297 continue;
299 while (offset < memlen)
301 regmatch_t ma[1];
303 err = regexec (&re, mem + offset, 1, ma, 0);
304 if (err != REG_NOERROR)
305 break;
307 offset += ma[0].rm_eo;
310 regfree (&re);
312 use_clock = clock_gettime (cl, &finish) == 0;
313 if (use_clock)
315 if (finish.tv_nsec < start.tv_nsec)
317 finish.tv_nsec -= start.tv_nsec - 1000000000;
318 finish.tv_sec -= 1 + start.tv_sec;
320 else
322 finish.tv_nsec -= start.tv_nsec;
323 finish.tv_sec -= start.tv_sec;
325 if (finish.tv_sec < mintime.tv_sec
326 || (finish.tv_sec == mintime.tv_sec
327 && finish.tv_nsec < mintime.tv_nsec))
328 mintime = finish;
331 printf ("elapsed time: %ld.%09ld sec\n",
332 mintime.tv_sec, mintime.tv_nsec);
334 #endif
336 /* Return an error if the number of matches found is not match we
337 expect. */
338 return cnt != expected;
342 static int
343 run_test_backwards (const char *expr, const char *mem, size_t memlen,
344 int icase, int expected)
346 #if defined _POSIX_CPUTIME && _POSIX_CPUTIME >= 0
347 struct timespec start;
348 struct timespec finish;
349 #endif
350 struct re_pattern_buffer re;
351 const char *err;
352 size_t offset;
353 int cnt;
355 #if defined _POSIX_CPUTIME && _POSIX_CPUTIME >= 0
356 if (use_clock && !timing)
357 use_clock = clock_gettime (cl, &start) == 0;
358 #endif
360 re_set_syntax ((RE_SYNTAX_POSIX_BASIC & ~RE_DOT_NEWLINE)
361 | RE_HAT_LISTS_NOT_NEWLINE
362 | (icase ? RE_ICASE : 0));
364 memset (&re, 0, sizeof (re));
365 re.fastmap = malloc (256);
366 if (re.fastmap == NULL)
367 error (EXIT_FAILURE, errno, "cannot allocate fastmap");
369 err = re_compile_pattern (expr, strlen (expr), &re);
370 if (err != NULL)
371 error (EXIT_FAILURE, 0, "cannot compile expression: %s", err);
373 if (re_compile_fastmap (&re))
374 error (EXIT_FAILURE, 0, "couldn't compile fastmap");
376 cnt = 0;
377 offset = memlen;
378 assert (mem[memlen] == '\0');
379 while (offset <= memlen)
381 int start;
382 const char *sp;
383 const char *ep;
385 start = re_search (&re, mem, memlen, offset, -offset, NULL);
386 if (start == -1)
387 break;
389 if (start == -2)
390 error (EXIT_FAILURE, 0, "internal error in re_search");
392 sp = mem + start;
393 while (sp > mem && sp[-1] != '\n')
394 --sp;
396 ep = mem + start;
397 while (*ep != '\0' && *ep != '\n')
398 ++ep;
400 printf ("match %d: \"%.*s\"\n", ++cnt, (int) (ep - sp), sp);
402 offset = sp - 1 - mem;
405 regfree (&re);
407 #if defined _POSIX_CPUTIME && _POSIX_CPUTIME >= 0
408 if (use_clock && !timing)
410 use_clock = clock_gettime (cl, &finish) == 0;
411 if (use_clock)
413 if (finish.tv_nsec < start.tv_nsec)
415 finish.tv_nsec -= start.tv_nsec - 1000000000;
416 finish.tv_sec -= 1 + start.tv_sec;
418 else
420 finish.tv_nsec -= start.tv_nsec;
421 finish.tv_sec -= start.tv_sec;
424 printf ("elapsed time: %ld.%09ld sec\n",
425 finish.tv_sec, finish.tv_nsec);
429 if (use_clock && timing)
431 struct timespec mintime = { .tv_sec = 24 * 60 * 60 };
433 for (int i = 0; i < 10; ++i)
435 offset = memlen;
436 use_clock = clock_gettime (cl, &start) == 0;
438 if (!use_clock)
439 continue;
441 memset (&re, 0, sizeof (re));
442 re.fastmap = malloc (256);
443 if (re.fastmap == NULL)
444 continue;
446 err = re_compile_pattern (expr, strlen (expr), &re);
447 if (err != NULL)
448 continue;
450 if (re_compile_fastmap (&re))
452 regfree (&re);
453 continue;
456 while (offset <= memlen)
458 int start;
459 const char *sp;
461 start = re_search (&re, mem, memlen, offset, -offset, NULL);
462 if (start < -1)
463 break;
465 sp = mem + start;
466 while (sp > mem && sp[-1] != '\n')
467 --sp;
469 offset = sp - 1 - mem;
472 regfree (&re);
474 use_clock = clock_gettime (cl, &finish) == 0;
475 if (use_clock)
477 if (finish.tv_nsec < start.tv_nsec)
479 finish.tv_nsec -= start.tv_nsec - 1000000000;
480 finish.tv_sec -= 1 + start.tv_sec;
482 else
484 finish.tv_nsec -= start.tv_nsec;
485 finish.tv_sec -= start.tv_sec;
487 if (finish.tv_sec < mintime.tv_sec
488 || (finish.tv_sec == mintime.tv_sec
489 && finish.tv_nsec < mintime.tv_nsec))
490 mintime = finish;
493 printf ("elapsed time: %ld.%09ld sec\n",
494 mintime.tv_sec, mintime.tv_nsec);
496 #endif
498 /* Return an error if the number of matches found is not match we
499 expect. */
500 return cnt != expected;
503 /* If --timing is used we will need a larger timout. */
504 #define TIMEOUT 50
505 #define CMDLINE_OPTIONS \
506 {"timing", no_argument, &timing, 1 },
507 #define TEST_FUNCTION do_test ()
508 #include "../test-skeleton.c"