1 /* Copyright (C) 2001-2023 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Lesser General Public
6 License as published by the Free Software Foundation; either
7 version 2.1 of the License, or (at your option) any later version.
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
14 You should have received a copy of the GNU Lesser General Public
15 License along with the GNU C Library; if not, see
16 <https://www.gnu.org/licenses/>. */
33 #include <sys/types.h>
35 #include <support/support.h>
38 #if defined _POSIX_CPUTIME && _POSIX_CPUTIME >= 0
46 static size_t umemlen
;
49 static int test_expr (const char *expr
, int expected
, int expectedicase
);
50 static int run_test (const char *expr
, const char *mem
, size_t memlen
,
51 int icase
, int expected
);
52 static int run_test_backwards (const char *expr
, const char *mem
,
53 size_t memlen
, int icase
, int expected
);
70 /* Make the content of the file available in memory. */
71 file
= "./tst-regex.input";
72 fd
= open (file
, O_RDONLY
);
74 error (EXIT_FAILURE
, errno
, "cannot open %s", basename (file
));
76 if (fstat (fd
, &st
) != 0)
77 error (EXIT_FAILURE
, errno
, "cannot stat %s", basename (file
));
80 mem
= (char *) malloc (memlen
+ 1);
82 error (EXIT_FAILURE
, errno
, "while allocating buffer");
84 if ((size_t) read (fd
, mem
, memlen
) != memlen
)
85 error (EXIT_FAILURE
, 0, "cannot read entire file");
90 /* We have to convert a few things from UTF-8 to Latin-1. */
91 cd
= iconv_open ("ISO-8859-1", "UTF-8");
92 if (cd
== (iconv_t
) -1)
93 error (EXIT_FAILURE
, errno
, "cannot get conversion descriptor");
95 /* For the second test we have to convert the file content to Latin-1.
96 This cannot grow the data. */
97 umem
= (char *) malloc (memlen
+ 1);
99 error (EXIT_FAILURE
, errno
, "while allocating buffer");
105 iconv (cd
, &inmem
, &inlen
, &outmem
, &outlen
);
106 umemlen
= outmem
- umem
;
108 error (EXIT_FAILURE
, errno
, "cannot convert buffer");
109 umem
[umemlen
] = '\0';
111 #if defined _POSIX_CPUTIME && _POSIX_CPUTIME >= 0
112 # if _POSIX_CPUTIME == 0
113 if (sysconf (_SC_CPUTIME
) < 0)
117 /* See whether we can use the CPU clock. */
118 use_clock
= clock_getcpuclockid (0, &cl
) == 0;
122 re_set_syntax (RE_DEBUG
);
125 /* Run the actual tests. All tests are run in a single-byte and a
126 multi-byte locale. */
127 result
|= test_expr ("[äáàâéèêíìîñöóòôüúùû]", 4, 4);
128 result
|= test_expr ("G.ran", 2, 3);
129 result
|= test_expr ("G.\\{1\\}ran", 2, 3);
130 result
|= test_expr ("G.*ran", 3, 44);
131 result
|= test_expr ("[äáàâ]", 0, 0);
132 result
|= test_expr ("Uddeborg", 2, 2);
133 result
|= test_expr (".Uddeborg", 2, 2);
135 /* Free the resources. */
145 test_expr (const char *expr
, int expected
, int expectedicase
)
154 /* First test: search with basic C.UTF-8 locale. */
155 printf ("INFO: Testing C.UTF-8.\n");
156 xsetlocale (LC_ALL
, "C.UTF-8");
158 printf ("\nTest \"%s\" with multi-byte locale\n", expr
);
159 result
|= run_test (expr
, mem
, memlen
, 0, expected
);
160 printf ("\nTest \"%s\" with multi-byte locale, case insensitive\n", expr
);
161 result
|= run_test (expr
, mem
, memlen
, 1, expectedicase
);
162 printf ("\nTest \"%s\" backwards with multi-byte locale\n", expr
);
163 result
|= run_test_backwards (expr
, mem
, memlen
, 0, expected
);
164 printf ("\nTest \"%s\" backwards with multi-byte locale, case insensitive\n",
166 result
|= run_test_backwards (expr
, mem
, memlen
, 1, expectedicase
);
168 /* Second test: search with an UTF-8 locale. */
169 printf ("INFO: Testing de_DE.UTF-8.\n");
170 xsetlocale (LC_ALL
, "de_DE.UTF-8");
172 printf ("\nTest \"%s\" with multi-byte locale\n", expr
);
173 result
|= run_test (expr
, mem
, memlen
, 0, expected
);
174 printf ("\nTest \"%s\" with multi-byte locale, case insensitive\n", expr
);
175 result
|= run_test (expr
, mem
, memlen
, 1, expectedicase
);
176 printf ("\nTest \"%s\" backwards with multi-byte locale\n", expr
);
177 result
|= run_test_backwards (expr
, mem
, memlen
, 0, expected
);
178 printf ("\nTest \"%s\" backwards with multi-byte locale, case insensitive\n",
180 result
|= run_test_backwards (expr
, mem
, memlen
, 1, expectedicase
);
182 /* Second test: search with an ISO-8859-1 locale. */
183 printf ("INFO: Testing de_DE.ISO-8859-1.\n");
184 xsetlocale (LC_ALL
, "de_DE.ISO-8859-1");
186 inmem
= (char *) expr
;
187 inlen
= strlen (expr
);
189 outmem
= uexpr
= alloca (outlen
+ 1);
190 memset (outmem
, '\0', outlen
+ 1);
191 iconv (cd
, &inmem
, &inlen
, &outmem
, &outlen
);
193 error (EXIT_FAILURE
, errno
, "cannot convert expression");
196 printf ("\nTest \"%s\" with 8-bit locale\n", expr
);
197 result
|= run_test (uexpr
, umem
, umemlen
, 0, expected
);
198 printf ("\nTest \"%s\" with 8-bit locale, case insensitive\n", expr
);
199 result
|= run_test (uexpr
, umem
, umemlen
, 1, expectedicase
);
200 printf ("\nTest \"%s\" backwards with 8-bit locale\n", expr
);
201 result
|= run_test_backwards (uexpr
, umem
, umemlen
, 0, expected
);
202 printf ("\nTest \"%s\" backwards with 8-bit locale, case insensitive\n",
204 result
|= run_test_backwards (uexpr
, umem
, umemlen
, 1, expectedicase
);
211 run_test (const char *expr
, const char *mem
, size_t memlen
, int icase
,
214 #if defined _POSIX_CPUTIME && _POSIX_CPUTIME >= 0
215 struct timespec start
;
216 struct timespec finish
;
223 #if defined _POSIX_CPUTIME && _POSIX_CPUTIME >= 0
224 if (use_clock
&& !timing
)
225 use_clock
= clock_gettime (cl
, &start
) == 0;
228 err
= regcomp (&re
, expr
, REG_NEWLINE
| (icase
? REG_ICASE
: 0));
229 if (err
!= REG_NOERROR
)
232 regerror (err
, &re
, buf
, sizeof buf
);
233 error (EXIT_FAILURE
, 0, "cannot compile expression: %s", buf
);
238 assert (mem
[memlen
] == '\0');
239 while (offset
< memlen
)
245 err
= regexec (&re
, mem
+ offset
, 1, ma
, 0);
246 if (err
== REG_NOMATCH
)
249 if (err
!= REG_NOERROR
)
252 regerror (err
, &re
, buf
, sizeof buf
);
253 error (EXIT_FAILURE
, 0, "cannot use expression: %s", buf
);
256 assert (ma
[0].rm_so
>= 0);
257 sp
= mem
+ offset
+ ma
[0].rm_so
;
258 while (sp
> mem
&& sp
[-1] != '\n')
261 ep
= mem
+ offset
+ ma
[0].rm_so
;
262 while (*ep
!= '\0' && *ep
!= '\n')
265 printf ("match %d: \"%.*s\"\n", ++cnt
, (int) (ep
- sp
), sp
);
267 offset
= ep
+ 1 - mem
;
272 #if defined _POSIX_CPUTIME && _POSIX_CPUTIME >= 0
273 if (use_clock
&& !timing
)
275 use_clock
= clock_gettime (cl
, &finish
) == 0;
278 if (finish
.tv_nsec
< start
.tv_nsec
)
280 finish
.tv_nsec
-= start
.tv_nsec
- 1000000000;
281 finish
.tv_sec
-= 1 + start
.tv_sec
;
285 finish
.tv_nsec
-= start
.tv_nsec
;
286 finish
.tv_sec
-= start
.tv_sec
;
289 printf ("elapsed time: %jd.%09jd sec\n",
290 (intmax_t) finish
.tv_sec
, (intmax_t) finish
.tv_nsec
);
294 if (use_clock
&& timing
)
296 struct timespec mintime
= { .tv_sec
= 24 * 60 * 60 };
298 for (int i
= 0; i
< 10; ++i
)
301 use_clock
= clock_gettime (cl
, &start
) == 0;
306 err
= regcomp (&re
, expr
, REG_NEWLINE
| (icase
? REG_ICASE
: 0));
307 if (err
!= REG_NOERROR
)
310 while (offset
< memlen
)
314 err
= regexec (&re
, mem
+ offset
, 1, ma
, 0);
315 if (err
!= REG_NOERROR
)
318 offset
+= ma
[0].rm_eo
;
323 use_clock
= clock_gettime (cl
, &finish
) == 0;
326 if (finish
.tv_nsec
< start
.tv_nsec
)
328 finish
.tv_nsec
-= start
.tv_nsec
- 1000000000;
329 finish
.tv_sec
-= 1 + start
.tv_sec
;
333 finish
.tv_nsec
-= start
.tv_nsec
;
334 finish
.tv_sec
-= start
.tv_sec
;
336 if (finish
.tv_sec
< mintime
.tv_sec
337 || (finish
.tv_sec
== mintime
.tv_sec
338 && finish
.tv_nsec
< mintime
.tv_nsec
))
342 printf ("elapsed time: %jd.%09jd sec\n",
343 (intmax_t) mintime
.tv_sec
, (intmax_t) mintime
.tv_nsec
);
347 /* Return an error if the number of matches found is not match we
349 return cnt
!= expected
;
354 run_test_backwards (const char *expr
, const char *mem
, size_t memlen
,
355 int icase
, int expected
)
357 #if defined _POSIX_CPUTIME && _POSIX_CPUTIME >= 0
358 struct timespec start
;
359 struct timespec finish
;
361 struct re_pattern_buffer re
;
366 #if defined _POSIX_CPUTIME && _POSIX_CPUTIME >= 0
367 if (use_clock
&& !timing
)
368 use_clock
= clock_gettime (cl
, &start
) == 0;
371 re_set_syntax ((RE_SYNTAX_POSIX_BASIC
& ~RE_DOT_NEWLINE
)
372 | RE_HAT_LISTS_NOT_NEWLINE
373 | (icase
? RE_ICASE
: 0));
375 memset (&re
, 0, sizeof (re
));
376 re
.fastmap
= malloc (256);
377 if (re
.fastmap
== NULL
)
378 error (EXIT_FAILURE
, errno
, "cannot allocate fastmap");
380 err
= re_compile_pattern (expr
, strlen (expr
), &re
);
382 error (EXIT_FAILURE
, 0, "cannot compile expression: %s", err
);
384 if (re_compile_fastmap (&re
))
385 error (EXIT_FAILURE
, 0, "couldn't compile fastmap");
389 assert (mem
[memlen
] == '\0');
390 while (offset
<= memlen
)
396 start
= re_search (&re
, mem
, memlen
, offset
, -offset
, NULL
);
401 error (EXIT_FAILURE
, 0, "internal error in re_search");
404 while (sp
> mem
&& sp
[-1] != '\n')
408 while (*ep
!= '\0' && *ep
!= '\n')
411 printf ("match %d: \"%.*s\"\n", ++cnt
, (int) (ep
- sp
), sp
);
413 offset
= sp
- 1 - mem
;
418 #if defined _POSIX_CPUTIME && _POSIX_CPUTIME >= 0
419 if (use_clock
&& !timing
)
421 use_clock
= clock_gettime (cl
, &finish
) == 0;
424 if (finish
.tv_nsec
< start
.tv_nsec
)
426 finish
.tv_nsec
-= start
.tv_nsec
- 1000000000;
427 finish
.tv_sec
-= 1 + start
.tv_sec
;
431 finish
.tv_nsec
-= start
.tv_nsec
;
432 finish
.tv_sec
-= start
.tv_sec
;
435 printf ("elapsed time: %jd.%09jd sec\n",
436 (intmax_t) finish
.tv_sec
, (intmax_t) finish
.tv_nsec
);
440 if (use_clock
&& timing
)
442 struct timespec mintime
= { .tv_sec
= 24 * 60 * 60 };
444 for (int i
= 0; i
< 10; ++i
)
447 use_clock
= clock_gettime (cl
, &start
) == 0;
452 memset (&re
, 0, sizeof (re
));
453 re
.fastmap
= malloc (256);
454 if (re
.fastmap
== NULL
)
457 err
= re_compile_pattern (expr
, strlen (expr
), &re
);
461 if (re_compile_fastmap (&re
))
467 while (offset
<= memlen
)
472 start
= re_search (&re
, mem
, memlen
, offset
, -offset
, NULL
);
477 while (sp
> mem
&& sp
[-1] != '\n')
480 offset
= sp
- 1 - mem
;
485 use_clock
= clock_gettime (cl
, &finish
) == 0;
488 if (finish
.tv_nsec
< start
.tv_nsec
)
490 finish
.tv_nsec
-= start
.tv_nsec
- 1000000000;
491 finish
.tv_sec
-= 1 + start
.tv_sec
;
495 finish
.tv_nsec
-= start
.tv_nsec
;
496 finish
.tv_sec
-= start
.tv_sec
;
498 if (finish
.tv_sec
< mintime
.tv_sec
499 || (finish
.tv_sec
== mintime
.tv_sec
500 && finish
.tv_nsec
< mintime
.tv_nsec
))
504 printf ("elapsed time: %jd.%09jd sec\n",
505 (intmax_t) mintime
.tv_sec
, (intmax_t) mintime
.tv_nsec
);
509 /* Return an error if the number of matches found is not match we
511 return cnt
!= expected
;
514 /* If --timing is used we will need a larger timout. */
516 #define CMDLINE_OPTIONS \
517 {"timing", no_argument, &timing, 1 },
518 #define TEST_FUNCTION do_test ()
519 #include "../test-skeleton.c"