tgupdate: merge pcreposix-compat base into pcreposix-compat
[pcreposix-compat.git] / pcretest.c
blobf3e2867b28d6f2f59c4c6e0c954e0694d88629d8
1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places. The
8 addition of 16-bit support has made it even worse. :-(
10 -----------------------------------------------------------------------------
11 Redistribution and use in source and binary forms, with or without
12 modification, are permitted provided that the following conditions are met:
14 * Redistributions of source code must retain the above copyright notice,
15 this list of conditions and the following disclaimer.
17 * Redistributions in binary form must reproduce the above copyright
18 notice, this list of conditions and the following disclaimer in the
19 documentation and/or other materials provided with the distribution.
21 * Neither the name of the University of Cambridge nor the names of its
22 contributors may be used to endorse or promote products derived from
23 this software without specific prior written permission.
25 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 POSSIBILITY OF SUCH DAMAGE.
36 -----------------------------------------------------------------------------
39 /* This program now supports the testing of all of the 8-bit, 16-bit, and
40 32-bit PCRE libraries in a single program. This is different from the modules
41 such as pcre_compile.c in the library itself, which are compiled separately for
42 each mode. If two modes are enabled, for example, pcre_compile.c is compiled
43 twice. By contrast, pcretest.c is compiled only once. Therefore, it must not
44 make use of any of the macros from pcre_internal.h that depend on
45 COMPILE_PCRE8, COMPILE_PCRE16, or COMPILE_PCRE32. It does, however, make use of
46 SUPPORT_PCRE8, SUPPORT_PCRE16, and SUPPORT_PCRE32 to ensure that it calls only
47 supported library functions. */
49 #ifdef HAVE_CONFIG_H
50 #include "config.h"
51 #endif
53 #include <ctype.h>
54 #include <stdio.h>
55 #include <string.h>
56 #include <stdlib.h>
57 #include <time.h>
58 #include <locale.h>
59 #include <errno.h>
61 /* Both libreadline and libedit are optionally supported. The user-supplied
62 original patch uses readline/readline.h for libedit, but in at least one system
63 it is installed as editline/readline.h, so the configuration code now looks for
64 that first, falling back to readline/readline.h. */
66 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
67 #ifdef HAVE_UNISTD_H
68 #include <unistd.h>
69 #endif
70 #if defined(SUPPORT_LIBREADLINE)
71 #include <readline/readline.h>
72 #include <readline/history.h>
73 #else
74 #if defined(HAVE_EDITLINE_READLINE_H)
75 #include <editline/readline.h>
76 #else
77 #include <readline/readline.h>
78 #endif
79 #endif
80 #endif
82 /* A number of things vary for Windows builds. Originally, pcretest opened its
83 input and output without "b"; then I was told that "b" was needed in some
84 environments, so it was added for release 5.0 to both the input and output. (It
85 makes no difference on Unix-like systems.) Later I was told that it is wrong
86 for the input on Windows. I've now abstracted the modes into two macros that
87 are set here, to make it easier to fiddle with them, and removed "b" from the
88 input mode under Windows. */
90 #if defined(_WIN32) || defined(WIN32)
91 #include <io.h> /* For _setmode() */
92 #include <fcntl.h> /* For _O_BINARY */
93 #define INPUT_MODE "r"
94 #define OUTPUT_MODE "wb"
96 #ifndef isatty
97 #define isatty _isatty /* This is what Windows calls them, I'm told, */
98 #endif /* though in some environments they seem to */
99 /* be already defined, hence the #ifndefs. */
100 #ifndef fileno
101 #define fileno _fileno
102 #endif
104 /* A user sent this fix for Borland Builder 5 under Windows. */
106 #ifdef __BORLANDC__
107 #define _setmode(handle, mode) setmode(handle, mode)
108 #endif
110 /* Not Windows */
112 #else
113 #include <sys/time.h> /* These two includes are needed */
114 #include <sys/resource.h> /* for setrlimit(). */
115 #if defined NATIVE_ZOS /* z/OS uses non-binary I/O */
116 #define INPUT_MODE "r"
117 #define OUTPUT_MODE "w"
118 #else
119 #define INPUT_MODE "rb"
120 #define OUTPUT_MODE "wb"
121 #endif
122 #endif
124 #ifdef __VMS
125 #include <ssdef.h>
126 void vms_setsymbol( char *, char *, int );
127 #endif
130 #define PRIV(name) name
132 /* We have to include pcre_internal.h because we need the internal info for
133 displaying the results of pcre_study() and we also need to know about the
134 internal macros, structures, and other internal data values; pcretest has
135 "inside information" compared to a program that strictly follows the PCRE API.
137 Although pcre_internal.h does itself include pcre.h, we explicitly include it
138 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
139 appropriately for an application, not for building PCRE. */
141 #include "pcre.h"
142 #include "pcre_internal.h"
144 /* The pcre_printint() function, which prints the internal form of a compiled
145 regex, is held in a separate file so that (a) it can be compiled in either
146 8-, 16- or 32-bit mode, and (b) it can be #included directly in pcre_compile.c
147 when that is compiled in debug mode. */
149 #ifdef SUPPORT_PCRE8
150 void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
151 #endif
152 #ifdef SUPPORT_PCRE16
153 void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
154 #endif
155 #ifdef SUPPORT_PCRE32
156 void pcre32_printint(pcre *external_re, FILE *f, BOOL print_lengths);
157 #endif
159 /* We need access to some of the data tables that PCRE uses. So as not to have
160 to keep two copies, we include the source files here, changing the names of the
161 external symbols to prevent clashes. */
163 #define PCRE_INCLUDED
165 #include "pcre_tables.c"
166 #include "pcre_ucd.c"
168 /* The definition of the macro PRINTABLE, which determines whether to print an
169 output character as-is or as a hex value when showing compiled patterns, is
170 the same as in the printint.src file. We uses it here in cases when the locale
171 has not been explicitly changed, so as to get consistent output from systems
172 that differ in their output from isprint() even in the "C" locale. */
174 #ifdef EBCDIC
175 #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
176 #else
177 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
178 #endif
180 #define PRINTOK(c) (locale_set? (((c) < 256) && isprint(c)) : PRINTABLE(c))
182 /* Posix support is disabled in 16 or 32 bit only mode. */
183 #if !defined SUPPORT_PCRE8 && !defined NOPOSIX
184 #define NOPOSIX
185 #endif
187 /* It is possible to compile this test program without including support for
188 testing the POSIX interface, though this is not available via the standard
189 Makefile. */
191 #if !defined NOPOSIX
192 #include "pcreposix.h"
193 #endif
195 /* It is also possible, originally for the benefit of a version that was
196 imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
197 NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
198 automatically cut out the UTF support if PCRE is built without it. */
200 #ifndef SUPPORT_UTF
201 #ifndef NOUTF
202 #define NOUTF
203 #endif
204 #endif
206 /* To make the code a bit tidier for 8/16/32-bit support, we define macros
207 for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
208 only from one place and is handled differently). I couldn't dream up any way of
209 using a single macro to do this in a generic way, because of the many different
210 argument requirements. We know that at least one of SUPPORT_PCRE8 and
211 SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
212 use these in the definitions of generic macros.
214 **** Special note about the PCHARSxxx macros: the address of the string to be
215 printed is always given as two arguments: a base address followed by an offset.
216 The base address is cast to the correct data size for 8 or 16 bit data; the
217 offset is in units of this size. If the string were given as base+offset in one
218 argument, the casting might be incorrectly applied. */
220 #ifdef SUPPORT_PCRE8
222 #define PCHARS8(lv, p, offset, len, f) \
223 lv = pchars((pcre_uint8 *)(p) + offset, len, f)
225 #define PCHARSV8(p, offset, len, f) \
226 (void)pchars((pcre_uint8 *)(p) + offset, len, f)
228 #define READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re) \
229 p = read_capture_name8(p, cn8, re)
231 #define STRLEN8(p) ((int)strlen((char *)p))
233 #define SET_PCRE_CALLOUT8(callout) \
234 pcre_callout = callout
236 #define SET_PCRE_STACK_GUARD8(stack_guard) \
237 pcre_stack_guard = stack_guard
239 #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
240 pcre_assign_jit_stack(extra, callback, userdata)
242 #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
243 re = pcre_compile((char *)pat, options, error, erroffset, tables)
245 #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
246 namesptr, cbuffer, size) \
247 rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
248 (char *)namesptr, cbuffer, size)
250 #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
251 rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
253 #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
254 offsets, size_offsets, workspace, size_workspace) \
255 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
256 offsets, size_offsets, workspace, size_workspace)
258 #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
259 offsets, size_offsets) \
260 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
261 offsets, size_offsets)
263 #define PCRE_FREE_STUDY8(extra) \
264 pcre_free_study(extra)
266 #define PCRE_FREE_SUBSTRING8(substring) \
267 pcre_free_substring(substring)
269 #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
270 pcre_free_substring_list(listptr)
272 #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
273 getnamesptr, subsptr) \
274 rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
275 (char *)getnamesptr, subsptr)
277 #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
278 n = pcre_get_stringnumber(re, (char *)ptr)
280 #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
281 rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
283 #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
284 rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
286 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
287 rc = pcre_pattern_to_host_byte_order(re, extra, tables)
289 #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
290 pcre_printint(re, outfile, debug_lengths)
292 #define PCRE_STUDY8(extra, re, options, error) \
293 extra = pcre_study(re, options, error)
295 #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
296 pcre_jit_stack_alloc(startsize, maxsize)
298 #define PCRE_JIT_STACK_FREE8(stack) \
299 pcre_jit_stack_free(stack)
301 #define pcre8_maketables pcre_maketables
303 #endif /* SUPPORT_PCRE8 */
305 /* -----------------------------------------------------------*/
307 #ifdef SUPPORT_PCRE16
309 #define PCHARS16(lv, p, offset, len, f) \
310 lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
312 #define PCHARSV16(p, offset, len, f) \
313 (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
315 #define READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re) \
316 p = read_capture_name16(p, cn16, re)
318 #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
320 #define SET_PCRE_CALLOUT16(callout) \
321 pcre16_callout = (int (*)(pcre16_callout_block *))callout
323 #define SET_PCRE_STACK_GUARD16(stack_guard) \
324 pcre16_stack_guard = (int (*)(void))stack_guard
326 #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
327 pcre16_assign_jit_stack((pcre16_extra *)extra, \
328 (pcre16_jit_callback)callback, userdata)
330 #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
331 re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
332 tables)
334 #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
335 namesptr, cbuffer, size) \
336 rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
337 count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
339 #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
340 rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
341 (PCRE_UCHAR16 *)cbuffer, size/2)
343 #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
344 offsets, size_offsets, workspace, size_workspace) \
345 count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
346 (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
347 workspace, size_workspace)
349 #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
350 offsets, size_offsets) \
351 count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
352 len, start_offset, options, offsets, size_offsets)
354 #define PCRE_FREE_STUDY16(extra) \
355 pcre16_free_study((pcre16_extra *)extra)
357 #define PCRE_FREE_SUBSTRING16(substring) \
358 pcre16_free_substring((PCRE_SPTR16)substring)
360 #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
361 pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
363 #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
364 getnamesptr, subsptr) \
365 rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
366 count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
368 #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
369 n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
371 #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
372 rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
373 (PCRE_SPTR16 *)(void*)subsptr)
375 #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
376 rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
377 (PCRE_SPTR16 **)(void*)listptr)
379 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
380 rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
381 tables)
383 #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
384 pcre16_printint(re, outfile, debug_lengths)
386 #define PCRE_STUDY16(extra, re, options, error) \
387 extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
389 #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
390 (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
392 #define PCRE_JIT_STACK_FREE16(stack) \
393 pcre16_jit_stack_free((pcre16_jit_stack *)stack)
395 #endif /* SUPPORT_PCRE16 */
397 /* -----------------------------------------------------------*/
399 #ifdef SUPPORT_PCRE32
401 #define PCHARS32(lv, p, offset, len, f) \
402 lv = pchars32((PCRE_SPTR32)(p) + offset, len, use_utf, f)
404 #define PCHARSV32(p, offset, len, f) \
405 (void)pchars32((PCRE_SPTR32)(p) + offset, len, use_utf, f)
407 #define READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re) \
408 p = read_capture_name32(p, cn32, re)
410 #define STRLEN32(p) ((int)strlen32((PCRE_SPTR32)p))
412 #define SET_PCRE_CALLOUT32(callout) \
413 pcre32_callout = (int (*)(pcre32_callout_block *))callout
415 #define SET_PCRE_STACK_GUARD32(stack_guard) \
416 pcre32_stack_guard = (int (*)(void))stack_guard
418 #define PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata) \
419 pcre32_assign_jit_stack((pcre32_extra *)extra, \
420 (pcre32_jit_callback)callback, userdata)
422 #define PCRE_COMPILE32(re, pat, options, error, erroffset, tables) \
423 re = (pcre *)pcre32_compile((PCRE_SPTR32)pat, options, error, erroffset, \
424 tables)
426 #define PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
427 namesptr, cbuffer, size) \
428 rc = pcre32_copy_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
429 count, (PCRE_SPTR32)namesptr, (PCRE_UCHAR32 *)cbuffer, size/4)
431 #define PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size) \
432 rc = pcre32_copy_substring((PCRE_SPTR32)bptr, offsets, count, i, \
433 (PCRE_UCHAR32 *)cbuffer, size/4)
435 #define PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
436 offsets, size_offsets, workspace, size_workspace) \
437 count = pcre32_dfa_exec((pcre32 *)re, (pcre32_extra *)extra, \
438 (PCRE_SPTR32)bptr, len, start_offset, options, offsets, size_offsets, \
439 workspace, size_workspace)
441 #define PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
442 offsets, size_offsets) \
443 count = pcre32_exec((pcre32 *)re, (pcre32_extra *)extra, (PCRE_SPTR32)bptr, \
444 len, start_offset, options, offsets, size_offsets)
446 #define PCRE_FREE_STUDY32(extra) \
447 pcre32_free_study((pcre32_extra *)extra)
449 #define PCRE_FREE_SUBSTRING32(substring) \
450 pcre32_free_substring((PCRE_SPTR32)substring)
452 #define PCRE_FREE_SUBSTRING_LIST32(listptr) \
453 pcre32_free_substring_list((PCRE_SPTR32 *)listptr)
455 #define PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
456 getnamesptr, subsptr) \
457 rc = pcre32_get_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
458 count, (PCRE_SPTR32)getnamesptr, (PCRE_SPTR32 *)(void*)subsptr)
460 #define PCRE_GET_STRINGNUMBER32(n, rc, ptr) \
461 n = pcre32_get_stringnumber(re, (PCRE_SPTR32)ptr)
463 #define PCRE_GET_SUBSTRING32(rc, bptr, offsets, count, i, subsptr) \
464 rc = pcre32_get_substring((PCRE_SPTR32)bptr, offsets, count, i, \
465 (PCRE_SPTR32 *)(void*)subsptr)
467 #define PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr) \
468 rc = pcre32_get_substring_list((PCRE_SPTR32)bptr, offsets, count, \
469 (PCRE_SPTR32 **)(void*)listptr)
471 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables) \
472 rc = pcre32_pattern_to_host_byte_order((pcre32 *)re, (pcre32_extra *)extra, \
473 tables)
475 #define PCRE_PRINTINT32(re, outfile, debug_lengths) \
476 pcre32_printint(re, outfile, debug_lengths)
478 #define PCRE_STUDY32(extra, re, options, error) \
479 extra = (pcre_extra *)pcre32_study((pcre32 *)re, options, error)
481 #define PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
482 (pcre_jit_stack *)pcre32_jit_stack_alloc(startsize, maxsize)
484 #define PCRE_JIT_STACK_FREE32(stack) \
485 pcre32_jit_stack_free((pcre32_jit_stack *)stack)
487 #endif /* SUPPORT_PCRE32 */
490 /* ----- More than one mode is supported; a runtime test is needed, except for
491 pcre_config(), and the JIT stack functions, when it doesn't matter which
492 available version is called. ----- */
494 enum {
495 PCRE8_MODE,
496 PCRE16_MODE,
497 PCRE32_MODE
500 #if (defined (SUPPORT_PCRE8) + defined (SUPPORT_PCRE16) + \
501 defined (SUPPORT_PCRE32)) >= 2
503 #define CHAR_SIZE (1U << pcre_mode)
505 /* There doesn't seem to be an easy way of writing these macros that can cope
506 with the 3 pairs of bit sizes plus all three bit sizes. So just handle all the
507 cases separately. */
509 /* ----- All three modes supported ----- */
511 #if defined(SUPPORT_PCRE8) && defined(SUPPORT_PCRE16) && defined(SUPPORT_PCRE32)
513 #define PCHARS(lv, p, offset, len, f) \
514 if (pcre_mode == PCRE32_MODE) \
515 PCHARS32(lv, p, offset, len, f); \
516 else if (pcre_mode == PCRE16_MODE) \
517 PCHARS16(lv, p, offset, len, f); \
518 else \
519 PCHARS8(lv, p, offset, len, f)
521 #define PCHARSV(p, offset, len, f) \
522 if (pcre_mode == PCRE32_MODE) \
523 PCHARSV32(p, offset, len, f); \
524 else if (pcre_mode == PCRE16_MODE) \
525 PCHARSV16(p, offset, len, f); \
526 else \
527 PCHARSV8(p, offset, len, f)
529 #define READ_CAPTURE_NAME(p, cn8, cn16, cn32, re) \
530 if (pcre_mode == PCRE32_MODE) \
531 READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re); \
532 else if (pcre_mode == PCRE16_MODE) \
533 READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re); \
534 else \
535 READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re)
537 #define SET_PCRE_CALLOUT(callout) \
538 if (pcre_mode == PCRE32_MODE) \
539 SET_PCRE_CALLOUT32(callout); \
540 else if (pcre_mode == PCRE16_MODE) \
541 SET_PCRE_CALLOUT16(callout); \
542 else \
543 SET_PCRE_CALLOUT8(callout)
545 #define SET_PCRE_STACK_GUARD(stack_guard) \
546 if (pcre_mode == PCRE32_MODE) \
547 SET_PCRE_STACK_GUARD32(stack_guard); \
548 else if (pcre_mode == PCRE16_MODE) \
549 SET_PCRE_STACK_GUARD16(stack_guard); \
550 else \
551 SET_PCRE_STACK_GUARD8(stack_guard)
553 #define STRLEN(p) (pcre_mode == PCRE32_MODE ? STRLEN32(p) : pcre_mode == PCRE16_MODE ? STRLEN16(p) : STRLEN8(p))
555 #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
556 if (pcre_mode == PCRE32_MODE) \
557 PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata); \
558 else if (pcre_mode == PCRE16_MODE) \
559 PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
560 else \
561 PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
563 #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
564 if (pcre_mode == PCRE32_MODE) \
565 PCRE_COMPILE32(re, pat, options, error, erroffset, tables); \
566 else if (pcre_mode == PCRE16_MODE) \
567 PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
568 else \
569 PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
571 #define PCRE_CONFIG pcre_config
573 #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
574 namesptr, cbuffer, size) \
575 if (pcre_mode == PCRE32_MODE) \
576 PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
577 namesptr, cbuffer, size); \
578 else if (pcre_mode == PCRE16_MODE) \
579 PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
580 namesptr, cbuffer, size); \
581 else \
582 PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
583 namesptr, cbuffer, size)
585 #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
586 if (pcre_mode == PCRE32_MODE) \
587 PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size); \
588 else if (pcre_mode == PCRE16_MODE) \
589 PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
590 else \
591 PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
593 #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
594 offsets, size_offsets, workspace, size_workspace) \
595 if (pcre_mode == PCRE32_MODE) \
596 PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
597 offsets, size_offsets, workspace, size_workspace); \
598 else if (pcre_mode == PCRE16_MODE) \
599 PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
600 offsets, size_offsets, workspace, size_workspace); \
601 else \
602 PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
603 offsets, size_offsets, workspace, size_workspace)
605 #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
606 offsets, size_offsets) \
607 if (pcre_mode == PCRE32_MODE) \
608 PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
609 offsets, size_offsets); \
610 else if (pcre_mode == PCRE16_MODE) \
611 PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
612 offsets, size_offsets); \
613 else \
614 PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
615 offsets, size_offsets)
617 #define PCRE_FREE_STUDY(extra) \
618 if (pcre_mode == PCRE32_MODE) \
619 PCRE_FREE_STUDY32(extra); \
620 else if (pcre_mode == PCRE16_MODE) \
621 PCRE_FREE_STUDY16(extra); \
622 else \
623 PCRE_FREE_STUDY8(extra)
625 #define PCRE_FREE_SUBSTRING(substring) \
626 if (pcre_mode == PCRE32_MODE) \
627 PCRE_FREE_SUBSTRING32(substring); \
628 else if (pcre_mode == PCRE16_MODE) \
629 PCRE_FREE_SUBSTRING16(substring); \
630 else \
631 PCRE_FREE_SUBSTRING8(substring)
633 #define PCRE_FREE_SUBSTRING_LIST(listptr) \
634 if (pcre_mode == PCRE32_MODE) \
635 PCRE_FREE_SUBSTRING_LIST32(listptr); \
636 else if (pcre_mode == PCRE16_MODE) \
637 PCRE_FREE_SUBSTRING_LIST16(listptr); \
638 else \
639 PCRE_FREE_SUBSTRING_LIST8(listptr)
641 #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
642 getnamesptr, subsptr) \
643 if (pcre_mode == PCRE32_MODE) \
644 PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
645 getnamesptr, subsptr); \
646 else if (pcre_mode == PCRE16_MODE) \
647 PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
648 getnamesptr, subsptr); \
649 else \
650 PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
651 getnamesptr, subsptr)
653 #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
654 if (pcre_mode == PCRE32_MODE) \
655 PCRE_GET_STRINGNUMBER32(n, rc, ptr); \
656 else if (pcre_mode == PCRE16_MODE) \
657 PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
658 else \
659 PCRE_GET_STRINGNUMBER8(n, rc, ptr)
661 #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
662 if (pcre_mode == PCRE32_MODE) \
663 PCRE_GET_SUBSTRING32(rc, bptr, use_offsets, count, i, subsptr); \
664 else if (pcre_mode == PCRE16_MODE) \
665 PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
666 else \
667 PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
669 #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
670 if (pcre_mode == PCRE32_MODE) \
671 PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr); \
672 else if (pcre_mode == PCRE16_MODE) \
673 PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
674 else \
675 PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
677 #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
678 (pcre_mode == PCRE32_MODE ? \
679 PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
680 : pcre_mode == PCRE16_MODE ? \
681 PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
682 : PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
684 #define PCRE_JIT_STACK_FREE(stack) \
685 if (pcre_mode == PCRE32_MODE) \
686 PCRE_JIT_STACK_FREE32(stack); \
687 else if (pcre_mode == PCRE16_MODE) \
688 PCRE_JIT_STACK_FREE16(stack); \
689 else \
690 PCRE_JIT_STACK_FREE8(stack)
692 #define PCRE_MAKETABLES \
693 (pcre_mode == PCRE32_MODE ? pcre32_maketables() : pcre_mode == PCRE16_MODE ? pcre16_maketables() : pcre_maketables())
695 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
696 if (pcre_mode == PCRE32_MODE) \
697 PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables); \
698 else if (pcre_mode == PCRE16_MODE) \
699 PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
700 else \
701 PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
703 #define PCRE_PRINTINT(re, outfile, debug_lengths) \
704 if (pcre_mode == PCRE32_MODE) \
705 PCRE_PRINTINT32(re, outfile, debug_lengths); \
706 else if (pcre_mode == PCRE16_MODE) \
707 PCRE_PRINTINT16(re, outfile, debug_lengths); \
708 else \
709 PCRE_PRINTINT8(re, outfile, debug_lengths)
711 #define PCRE_STUDY(extra, re, options, error) \
712 if (pcre_mode == PCRE32_MODE) \
713 PCRE_STUDY32(extra, re, options, error); \
714 else if (pcre_mode == PCRE16_MODE) \
715 PCRE_STUDY16(extra, re, options, error); \
716 else \
717 PCRE_STUDY8(extra, re, options, error)
720 /* ----- Two out of three modes are supported ----- */
722 #else
724 /* We can use some macro trickery to make a single set of definitions work in
725 the three different cases. */
727 /* ----- 32-bit and 16-bit but not 8-bit supported ----- */
729 #if defined(SUPPORT_PCRE32) && defined(SUPPORT_PCRE16)
730 #define BITONE 32
731 #define BITTWO 16
733 /* ----- 32-bit and 8-bit but not 16-bit supported ----- */
735 #elif defined(SUPPORT_PCRE32) && defined(SUPPORT_PCRE8)
736 #define BITONE 32
737 #define BITTWO 8
739 /* ----- 16-bit and 8-bit but not 32-bit supported ----- */
741 #else
742 #define BITONE 16
743 #define BITTWO 8
744 #endif
746 #define glue(a,b) a##b
747 #define G(a,b) glue(a,b)
750 /* ----- Common macros for two-mode cases ----- */
752 #define PCHARS(lv, p, offset, len, f) \
753 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
754 G(PCHARS,BITONE)(lv, p, offset, len, f); \
755 else \
756 G(PCHARS,BITTWO)(lv, p, offset, len, f)
758 #define PCHARSV(p, offset, len, f) \
759 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
760 G(PCHARSV,BITONE)(p, offset, len, f); \
761 else \
762 G(PCHARSV,BITTWO)(p, offset, len, f)
764 #define READ_CAPTURE_NAME(p, cn8, cn16, cn32, re) \
765 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
766 G(READ_CAPTURE_NAME,BITONE)(p, cn8, cn16, cn32, re); \
767 else \
768 G(READ_CAPTURE_NAME,BITTWO)(p, cn8, cn16, cn32, re)
770 #define SET_PCRE_CALLOUT(callout) \
771 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
772 G(SET_PCRE_CALLOUT,BITONE)(callout); \
773 else \
774 G(SET_PCRE_CALLOUT,BITTWO)(callout)
776 #define SET_PCRE_STACK_GUARD(stack_guard) \
777 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
778 G(SET_PCRE_STACK_GUARD,BITONE)(stack_guard); \
779 else \
780 G(SET_PCRE_STACK_GUARD,BITTWO)(stack_guard)
782 #define STRLEN(p) ((pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
783 G(STRLEN,BITONE)(p) : G(STRLEN,BITTWO)(p))
785 #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
786 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
787 G(PCRE_ASSIGN_JIT_STACK,BITONE)(extra, callback, userdata); \
788 else \
789 G(PCRE_ASSIGN_JIT_STACK,BITTWO)(extra, callback, userdata)
791 #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
792 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
793 G(PCRE_COMPILE,BITONE)(re, pat, options, error, erroffset, tables); \
794 else \
795 G(PCRE_COMPILE,BITTWO)(re, pat, options, error, erroffset, tables)
797 #define PCRE_CONFIG G(G(pcre,BITONE),_config)
799 #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
800 namesptr, cbuffer, size) \
801 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
802 G(PCRE_COPY_NAMED_SUBSTRING,BITONE)(rc, re, bptr, offsets, count, \
803 namesptr, cbuffer, size); \
804 else \
805 G(PCRE_COPY_NAMED_SUBSTRING,BITTWO)(rc, re, bptr, offsets, count, \
806 namesptr, cbuffer, size)
808 #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
809 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
810 G(PCRE_COPY_SUBSTRING,BITONE)(rc, bptr, offsets, count, i, cbuffer, size); \
811 else \
812 G(PCRE_COPY_SUBSTRING,BITTWO)(rc, bptr, offsets, count, i, cbuffer, size)
814 #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
815 offsets, size_offsets, workspace, size_workspace) \
816 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
817 G(PCRE_DFA_EXEC,BITONE)(count, re, extra, bptr, len, start_offset, options, \
818 offsets, size_offsets, workspace, size_workspace); \
819 else \
820 G(PCRE_DFA_EXEC,BITTWO)(count, re, extra, bptr, len, start_offset, options, \
821 offsets, size_offsets, workspace, size_workspace)
823 #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
824 offsets, size_offsets) \
825 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
826 G(PCRE_EXEC,BITONE)(count, re, extra, bptr, len, start_offset, options, \
827 offsets, size_offsets); \
828 else \
829 G(PCRE_EXEC,BITTWO)(count, re, extra, bptr, len, start_offset, options, \
830 offsets, size_offsets)
832 #define PCRE_FREE_STUDY(extra) \
833 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
834 G(PCRE_FREE_STUDY,BITONE)(extra); \
835 else \
836 G(PCRE_FREE_STUDY,BITTWO)(extra)
838 #define PCRE_FREE_SUBSTRING(substring) \
839 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
840 G(PCRE_FREE_SUBSTRING,BITONE)(substring); \
841 else \
842 G(PCRE_FREE_SUBSTRING,BITTWO)(substring)
844 #define PCRE_FREE_SUBSTRING_LIST(listptr) \
845 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
846 G(PCRE_FREE_SUBSTRING_LIST,BITONE)(listptr); \
847 else \
848 G(PCRE_FREE_SUBSTRING_LIST,BITTWO)(listptr)
850 #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
851 getnamesptr, subsptr) \
852 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
853 G(PCRE_GET_NAMED_SUBSTRING,BITONE)(rc, re, bptr, offsets, count, \
854 getnamesptr, subsptr); \
855 else \
856 G(PCRE_GET_NAMED_SUBSTRING,BITTWO)(rc, re, bptr, offsets, count, \
857 getnamesptr, subsptr)
859 #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
860 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
861 G(PCRE_GET_STRINGNUMBER,BITONE)(n, rc, ptr); \
862 else \
863 G(PCRE_GET_STRINGNUMBER,BITTWO)(n, rc, ptr)
865 #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
866 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
867 G(PCRE_GET_SUBSTRING,BITONE)(rc, bptr, use_offsets, count, i, subsptr); \
868 else \
869 G(PCRE_GET_SUBSTRING,BITTWO)(rc, bptr, use_offsets, count, i, subsptr)
871 #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
872 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
873 G(PCRE_GET_SUBSTRING_LIST,BITONE)(rc, bptr, offsets, count, listptr); \
874 else \
875 G(PCRE_GET_SUBSTRING_LIST,BITTWO)(rc, bptr, offsets, count, listptr)
877 #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
878 (pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
879 G(PCRE_JIT_STACK_ALLOC,BITONE)(startsize, maxsize) \
880 : G(PCRE_JIT_STACK_ALLOC,BITTWO)(startsize, maxsize)
882 #define PCRE_JIT_STACK_FREE(stack) \
883 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
884 G(PCRE_JIT_STACK_FREE,BITONE)(stack); \
885 else \
886 G(PCRE_JIT_STACK_FREE,BITTWO)(stack)
888 #define PCRE_MAKETABLES \
889 (pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
890 G(G(pcre,BITONE),_maketables)() : G(G(pcre,BITTWO),_maketables)()
892 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
893 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
894 G(PCRE_PATTERN_TO_HOST_BYTE_ORDER,BITONE)(rc, re, extra, tables); \
895 else \
896 G(PCRE_PATTERN_TO_HOST_BYTE_ORDER,BITTWO)(rc, re, extra, tables)
898 #define PCRE_PRINTINT(re, outfile, debug_lengths) \
899 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
900 G(PCRE_PRINTINT,BITONE)(re, outfile, debug_lengths); \
901 else \
902 G(PCRE_PRINTINT,BITTWO)(re, outfile, debug_lengths)
904 #define PCRE_STUDY(extra, re, options, error) \
905 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
906 G(PCRE_STUDY,BITONE)(extra, re, options, error); \
907 else \
908 G(PCRE_STUDY,BITTWO)(extra, re, options, error)
910 #endif /* Two out of three modes */
912 /* ----- End of cases where more than one mode is supported ----- */
915 /* ----- Only 8-bit mode is supported ----- */
917 #elif defined SUPPORT_PCRE8
918 #define CHAR_SIZE 1
919 #define PCHARS PCHARS8
920 #define PCHARSV PCHARSV8
921 #define READ_CAPTURE_NAME READ_CAPTURE_NAME8
922 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT8
923 #define SET_PCRE_STACK_GUARD SET_PCRE_STACK_GUARD8
924 #define STRLEN STRLEN8
925 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK8
926 #define PCRE_COMPILE PCRE_COMPILE8
927 #define PCRE_CONFIG pcre_config
928 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
929 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING8
930 #define PCRE_DFA_EXEC PCRE_DFA_EXEC8
931 #define PCRE_EXEC PCRE_EXEC8
932 #define PCRE_FREE_STUDY PCRE_FREE_STUDY8
933 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING8
934 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST8
935 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING8
936 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER8
937 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING8
938 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST8
939 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC8
940 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE8
941 #define PCRE_MAKETABLES pcre_maketables()
942 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
943 #define PCRE_PRINTINT PCRE_PRINTINT8
944 #define PCRE_STUDY PCRE_STUDY8
946 /* ----- Only 16-bit mode is supported ----- */
948 #elif defined SUPPORT_PCRE16
949 #define CHAR_SIZE 2
950 #define PCHARS PCHARS16
951 #define PCHARSV PCHARSV16
952 #define READ_CAPTURE_NAME READ_CAPTURE_NAME16
953 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT16
954 #define SET_PCRE_STACK_GUARD SET_PCRE_STACK_GUARD16
955 #define STRLEN STRLEN16
956 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK16
957 #define PCRE_COMPILE PCRE_COMPILE16
958 #define PCRE_CONFIG pcre16_config
959 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
960 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING16
961 #define PCRE_DFA_EXEC PCRE_DFA_EXEC16
962 #define PCRE_EXEC PCRE_EXEC16
963 #define PCRE_FREE_STUDY PCRE_FREE_STUDY16
964 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING16
965 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST16
966 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING16
967 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER16
968 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING16
969 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST16
970 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC16
971 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE16
972 #define PCRE_MAKETABLES pcre16_maketables()
973 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
974 #define PCRE_PRINTINT PCRE_PRINTINT16
975 #define PCRE_STUDY PCRE_STUDY16
977 /* ----- Only 32-bit mode is supported ----- */
979 #elif defined SUPPORT_PCRE32
980 #define CHAR_SIZE 4
981 #define PCHARS PCHARS32
982 #define PCHARSV PCHARSV32
983 #define READ_CAPTURE_NAME READ_CAPTURE_NAME32
984 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT32
985 #define SET_PCRE_STACK_GUARD SET_PCRE_STACK_GUARD32
986 #define STRLEN STRLEN32
987 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK32
988 #define PCRE_COMPILE PCRE_COMPILE32
989 #define PCRE_CONFIG pcre32_config
990 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING32
991 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING32
992 #define PCRE_DFA_EXEC PCRE_DFA_EXEC32
993 #define PCRE_EXEC PCRE_EXEC32
994 #define PCRE_FREE_STUDY PCRE_FREE_STUDY32
995 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING32
996 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST32
997 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING32
998 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER32
999 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING32
1000 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST32
1001 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC32
1002 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE32
1003 #define PCRE_MAKETABLES pcre32_maketables()
1004 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER32
1005 #define PCRE_PRINTINT PCRE_PRINTINT32
1006 #define PCRE_STUDY PCRE_STUDY32
1008 #endif
1010 /* ----- End of mode-specific function call macros ----- */
1013 /* Other parameters */
1015 #ifndef CLOCKS_PER_SEC
1016 #ifdef CLK_TCK
1017 #define CLOCKS_PER_SEC CLK_TCK
1018 #else
1019 #define CLOCKS_PER_SEC 100
1020 #endif
1021 #endif
1023 #if !defined NODFA
1024 #define DFA_WS_DIMENSION 1000
1025 #endif
1027 /* This is the default loop count for timing. */
1029 #define LOOPREPEAT 500000
1031 /* Static variables */
1033 static FILE *outfile;
1034 static int log_store = 0;
1035 static int callout_count;
1036 static int callout_extra;
1037 static int callout_fail_count;
1038 static int callout_fail_id;
1039 static int debug_lengths;
1040 static int first_callout;
1041 static int jit_was_used;
1042 static int locale_set = 0;
1043 static int show_malloc;
1044 static int stack_guard_return;
1045 static int use_utf;
1046 static const unsigned char *last_callout_mark = NULL;
1048 /* The buffers grow automatically if very long input lines are encountered. */
1050 static int buffer_size = 50000;
1051 static pcre_uint8 *buffer = NULL;
1052 static pcre_uint8 *pbuffer = NULL;
1054 /* Just as a safety check, make sure that COMPILE_PCRE[16|32] are *not* set. */
1056 #ifdef COMPILE_PCRE16
1057 #error COMPILE_PCRE16 must not be set when compiling pcretest.c
1058 #endif
1060 #ifdef COMPILE_PCRE32
1061 #error COMPILE_PCRE32 must not be set when compiling pcretest.c
1062 #endif
1064 /* We need buffers for building 16/32-bit strings, and the tables of operator
1065 lengths that are used for 16/32-bit compiling, in order to swap bytes in a
1066 pattern for saving/reloading testing. Luckily, the data for these tables is
1067 defined as a macro. However, we must ensure that LINK_SIZE and IMM2_SIZE (which
1068 are used in the tables) are adjusted appropriately for the 16/32-bit world.
1069 LINK_SIZE is also used later in this program. */
1071 #ifdef SUPPORT_PCRE16
1072 #undef IMM2_SIZE
1073 #define IMM2_SIZE 1
1075 #if LINK_SIZE == 2
1076 #undef LINK_SIZE
1077 #define LINK_SIZE 1
1078 #elif LINK_SIZE == 3 || LINK_SIZE == 4
1079 #undef LINK_SIZE
1080 #define LINK_SIZE 2
1081 #else
1082 #error LINK_SIZE must be either 2, 3, or 4
1083 #endif
1085 static int buffer16_size = 0;
1086 static pcre_uint16 *buffer16 = NULL;
1087 static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
1088 #endif /* SUPPORT_PCRE16 */
1090 #ifdef SUPPORT_PCRE32
1091 #undef IMM2_SIZE
1092 #define IMM2_SIZE 1
1093 #undef LINK_SIZE
1094 #define LINK_SIZE 1
1096 static int buffer32_size = 0;
1097 static pcre_uint32 *buffer32 = NULL;
1098 static const pcre_uint32 OP_lengths32[] = { OP_LENGTHS };
1099 #endif /* SUPPORT_PCRE32 */
1101 /* If we have 8-bit support, default to it; if there is also 16-or 32-bit
1102 support, it can be changed by an option. If there is no 8-bit support, there
1103 must be 16-or 32-bit support, so default it to 1. */
1105 #if defined SUPPORT_PCRE8
1106 static int pcre_mode = PCRE8_MODE;
1107 #elif defined SUPPORT_PCRE16
1108 static int pcre_mode = PCRE16_MODE;
1109 #elif defined SUPPORT_PCRE32
1110 static int pcre_mode = PCRE32_MODE;
1111 #endif
1113 /* JIT study options for -s+n and /S+n where '1' <= n <= '7'. */
1115 static int jit_study_bits[] =
1117 PCRE_STUDY_JIT_COMPILE,
1118 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
1119 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
1120 PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
1121 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
1122 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
1123 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE +
1124 PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
1127 #define PCRE_STUDY_ALLJIT (PCRE_STUDY_JIT_COMPILE | \
1128 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE | PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE)
1130 /* Textual explanations for runtime error codes */
1132 static const char *errtexts[] = {
1133 NULL, /* 0 is no error */
1134 NULL, /* NOMATCH is handled specially */
1135 "NULL argument passed",
1136 "bad option value",
1137 "magic number missing",
1138 "unknown opcode - pattern overwritten?",
1139 "no more memory",
1140 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
1141 "match limit exceeded",
1142 "callout error code",
1143 NULL, /* BADUTF8/16 is handled specially */
1144 NULL, /* BADUTF8/16 offset is handled specially */
1145 NULL, /* PARTIAL is handled specially */
1146 "not used - internal error",
1147 "internal error - pattern overwritten?",
1148 "bad count value",
1149 "item unsupported for DFA matching",
1150 "backreference condition or recursion test not supported for DFA matching",
1151 "match limit not supported for DFA matching",
1152 "workspace size exceeded in DFA matching",
1153 "too much recursion for DFA matching",
1154 "recursion limit exceeded",
1155 "not used - internal error",
1156 "invalid combination of newline options",
1157 "bad offset value",
1158 NULL, /* SHORTUTF8/16 is handled specially */
1159 "nested recursion at the same subject position",
1160 "JIT stack limit reached",
1161 "pattern compiled in wrong mode: 8-bit/16-bit error",
1162 "pattern compiled with other endianness",
1163 "invalid data in workspace for DFA restart",
1164 "bad JIT option",
1165 "bad length"
1169 /*************************************************
1170 * Alternate character tables *
1171 *************************************************/
1173 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
1174 using the default tables of the library. However, the T option can be used to
1175 select alternate sets of tables, for different kinds of testing. Note also that
1176 the L (locale) option also adjusts the tables. */
1178 /* This is the set of tables distributed as default with PCRE. It recognizes
1179 only ASCII characters. */
1181 static const pcre_uint8 tables0[] = {
1183 /* This table is a lower casing table. */
1185 0, 1, 2, 3, 4, 5, 6, 7,
1186 8, 9, 10, 11, 12, 13, 14, 15,
1187 16, 17, 18, 19, 20, 21, 22, 23,
1188 24, 25, 26, 27, 28, 29, 30, 31,
1189 32, 33, 34, 35, 36, 37, 38, 39,
1190 40, 41, 42, 43, 44, 45, 46, 47,
1191 48, 49, 50, 51, 52, 53, 54, 55,
1192 56, 57, 58, 59, 60, 61, 62, 63,
1193 64, 97, 98, 99,100,101,102,103,
1194 104,105,106,107,108,109,110,111,
1195 112,113,114,115,116,117,118,119,
1196 120,121,122, 91, 92, 93, 94, 95,
1197 96, 97, 98, 99,100,101,102,103,
1198 104,105,106,107,108,109,110,111,
1199 112,113,114,115,116,117,118,119,
1200 120,121,122,123,124,125,126,127,
1201 128,129,130,131,132,133,134,135,
1202 136,137,138,139,140,141,142,143,
1203 144,145,146,147,148,149,150,151,
1204 152,153,154,155,156,157,158,159,
1205 160,161,162,163,164,165,166,167,
1206 168,169,170,171,172,173,174,175,
1207 176,177,178,179,180,181,182,183,
1208 184,185,186,187,188,189,190,191,
1209 192,193,194,195,196,197,198,199,
1210 200,201,202,203,204,205,206,207,
1211 208,209,210,211,212,213,214,215,
1212 216,217,218,219,220,221,222,223,
1213 224,225,226,227,228,229,230,231,
1214 232,233,234,235,236,237,238,239,
1215 240,241,242,243,244,245,246,247,
1216 248,249,250,251,252,253,254,255,
1218 /* This table is a case flipping table. */
1220 0, 1, 2, 3, 4, 5, 6, 7,
1221 8, 9, 10, 11, 12, 13, 14, 15,
1222 16, 17, 18, 19, 20, 21, 22, 23,
1223 24, 25, 26, 27, 28, 29, 30, 31,
1224 32, 33, 34, 35, 36, 37, 38, 39,
1225 40, 41, 42, 43, 44, 45, 46, 47,
1226 48, 49, 50, 51, 52, 53, 54, 55,
1227 56, 57, 58, 59, 60, 61, 62, 63,
1228 64, 97, 98, 99,100,101,102,103,
1229 104,105,106,107,108,109,110,111,
1230 112,113,114,115,116,117,118,119,
1231 120,121,122, 91, 92, 93, 94, 95,
1232 96, 65, 66, 67, 68, 69, 70, 71,
1233 72, 73, 74, 75, 76, 77, 78, 79,
1234 80, 81, 82, 83, 84, 85, 86, 87,
1235 88, 89, 90,123,124,125,126,127,
1236 128,129,130,131,132,133,134,135,
1237 136,137,138,139,140,141,142,143,
1238 144,145,146,147,148,149,150,151,
1239 152,153,154,155,156,157,158,159,
1240 160,161,162,163,164,165,166,167,
1241 168,169,170,171,172,173,174,175,
1242 176,177,178,179,180,181,182,183,
1243 184,185,186,187,188,189,190,191,
1244 192,193,194,195,196,197,198,199,
1245 200,201,202,203,204,205,206,207,
1246 208,209,210,211,212,213,214,215,
1247 216,217,218,219,220,221,222,223,
1248 224,225,226,227,228,229,230,231,
1249 232,233,234,235,236,237,238,239,
1250 240,241,242,243,244,245,246,247,
1251 248,249,250,251,252,253,254,255,
1253 /* This table contains bit maps for various character classes. Each map is 32
1254 bytes long and the bits run from the least significant end of each byte. The
1255 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
1256 graph, print, punct, and cntrl. Other classes are built from combinations. */
1258 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
1259 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1260 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1261 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1263 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1264 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
1265 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1266 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1268 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1269 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1270 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1271 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1273 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1274 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
1275 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1276 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1278 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1279 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
1280 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1281 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1283 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1284 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
1285 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1286 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1288 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
1289 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
1290 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1291 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1293 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
1294 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
1295 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1296 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1298 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
1299 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
1300 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1301 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1303 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
1304 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
1305 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1306 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1308 /* This table identifies various classes of character by individual bits:
1309 0x01 white space character
1310 0x02 letter
1311 0x04 decimal digit
1312 0x08 hexadecimal digit
1313 0x10 alphanumeric or '_'
1314 0x80 regular expression metacharacter or binary zero
1317 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
1318 0x00,0x01,0x01,0x01,0x01,0x01,0x00,0x00, /* 8- 15 */
1319 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
1320 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
1321 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
1322 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
1323 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
1324 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
1325 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
1326 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
1327 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
1328 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
1329 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
1330 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
1331 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
1332 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
1333 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
1334 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
1335 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
1336 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
1337 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
1338 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
1339 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
1340 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
1341 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
1342 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
1343 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
1344 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
1345 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
1346 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
1347 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
1348 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
1350 /* This is a set of tables that came originally from a Windows user. It seems
1351 to be at least an approximation of ISO 8859. In particular, there are
1352 characters greater than 128 that are marked as spaces, letters, etc. */
1354 static const pcre_uint8 tables1[] = {
1355 0,1,2,3,4,5,6,7,
1356 8,9,10,11,12,13,14,15,
1357 16,17,18,19,20,21,22,23,
1358 24,25,26,27,28,29,30,31,
1359 32,33,34,35,36,37,38,39,
1360 40,41,42,43,44,45,46,47,
1361 48,49,50,51,52,53,54,55,
1362 56,57,58,59,60,61,62,63,
1363 64,97,98,99,100,101,102,103,
1364 104,105,106,107,108,109,110,111,
1365 112,113,114,115,116,117,118,119,
1366 120,121,122,91,92,93,94,95,
1367 96,97,98,99,100,101,102,103,
1368 104,105,106,107,108,109,110,111,
1369 112,113,114,115,116,117,118,119,
1370 120,121,122,123,124,125,126,127,
1371 128,129,130,131,132,133,134,135,
1372 136,137,138,139,140,141,142,143,
1373 144,145,146,147,148,149,150,151,
1374 152,153,154,155,156,157,158,159,
1375 160,161,162,163,164,165,166,167,
1376 168,169,170,171,172,173,174,175,
1377 176,177,178,179,180,181,182,183,
1378 184,185,186,187,188,189,190,191,
1379 224,225,226,227,228,229,230,231,
1380 232,233,234,235,236,237,238,239,
1381 240,241,242,243,244,245,246,215,
1382 248,249,250,251,252,253,254,223,
1383 224,225,226,227,228,229,230,231,
1384 232,233,234,235,236,237,238,239,
1385 240,241,242,243,244,245,246,247,
1386 248,249,250,251,252,253,254,255,
1387 0,1,2,3,4,5,6,7,
1388 8,9,10,11,12,13,14,15,
1389 16,17,18,19,20,21,22,23,
1390 24,25,26,27,28,29,30,31,
1391 32,33,34,35,36,37,38,39,
1392 40,41,42,43,44,45,46,47,
1393 48,49,50,51,52,53,54,55,
1394 56,57,58,59,60,61,62,63,
1395 64,97,98,99,100,101,102,103,
1396 104,105,106,107,108,109,110,111,
1397 112,113,114,115,116,117,118,119,
1398 120,121,122,91,92,93,94,95,
1399 96,65,66,67,68,69,70,71,
1400 72,73,74,75,76,77,78,79,
1401 80,81,82,83,84,85,86,87,
1402 88,89,90,123,124,125,126,127,
1403 128,129,130,131,132,133,134,135,
1404 136,137,138,139,140,141,142,143,
1405 144,145,146,147,148,149,150,151,
1406 152,153,154,155,156,157,158,159,
1407 160,161,162,163,164,165,166,167,
1408 168,169,170,171,172,173,174,175,
1409 176,177,178,179,180,181,182,183,
1410 184,185,186,187,188,189,190,191,
1411 224,225,226,227,228,229,230,231,
1412 232,233,234,235,236,237,238,239,
1413 240,241,242,243,244,245,246,215,
1414 248,249,250,251,252,253,254,223,
1415 192,193,194,195,196,197,198,199,
1416 200,201,202,203,204,205,206,207,
1417 208,209,210,211,212,213,214,247,
1418 216,217,218,219,220,221,222,255,
1419 0,62,0,0,1,0,0,0,
1420 0,0,0,0,0,0,0,0,
1421 32,0,0,0,1,0,0,0,
1422 0,0,0,0,0,0,0,0,
1423 0,0,0,0,0,0,255,3,
1424 126,0,0,0,126,0,0,0,
1425 0,0,0,0,0,0,0,0,
1426 0,0,0,0,0,0,0,0,
1427 0,0,0,0,0,0,255,3,
1428 0,0,0,0,0,0,0,0,
1429 0,0,0,0,0,0,12,2,
1430 0,0,0,0,0,0,0,0,
1431 0,0,0,0,0,0,0,0,
1432 254,255,255,7,0,0,0,0,
1433 0,0,0,0,0,0,0,0,
1434 255,255,127,127,0,0,0,0,
1435 0,0,0,0,0,0,0,0,
1436 0,0,0,0,254,255,255,7,
1437 0,0,0,0,0,4,32,4,
1438 0,0,0,128,255,255,127,255,
1439 0,0,0,0,0,0,255,3,
1440 254,255,255,135,254,255,255,7,
1441 0,0,0,0,0,4,44,6,
1442 255,255,127,255,255,255,127,255,
1443 0,0,0,0,254,255,255,255,
1444 255,255,255,255,255,255,255,127,
1445 0,0,0,0,254,255,255,255,
1446 255,255,255,255,255,255,255,255,
1447 0,2,0,0,255,255,255,255,
1448 255,255,255,255,255,255,255,127,
1449 0,0,0,0,255,255,255,255,
1450 255,255,255,255,255,255,255,255,
1451 0,0,0,0,254,255,0,252,
1452 1,0,0,248,1,0,0,120,
1453 0,0,0,0,254,255,255,255,
1454 0,0,128,0,0,0,128,0,
1455 255,255,255,255,0,0,0,0,
1456 0,0,0,0,0,0,0,128,
1457 255,255,255,255,0,0,0,0,
1458 0,0,0,0,0,0,0,0,
1459 128,0,0,0,0,0,0,0,
1460 0,1,1,0,1,1,0,0,
1461 0,0,0,0,0,0,0,0,
1462 0,0,0,0,0,0,0,0,
1463 1,0,0,0,128,0,0,0,
1464 128,128,128,128,0,0,128,0,
1465 28,28,28,28,28,28,28,28,
1466 28,28,0,0,0,0,0,128,
1467 0,26,26,26,26,26,26,18,
1468 18,18,18,18,18,18,18,18,
1469 18,18,18,18,18,18,18,18,
1470 18,18,18,128,128,0,128,16,
1471 0,26,26,26,26,26,26,18,
1472 18,18,18,18,18,18,18,18,
1473 18,18,18,18,18,18,18,18,
1474 18,18,18,128,128,0,0,0,
1475 0,0,0,0,0,1,0,0,
1476 0,0,0,0,0,0,0,0,
1477 0,0,0,0,0,0,0,0,
1478 0,0,0,0,0,0,0,0,
1479 1,0,0,0,0,0,0,0,
1480 0,0,18,0,0,0,0,0,
1481 0,0,20,20,0,18,0,0,
1482 0,20,18,0,0,0,0,0,
1483 18,18,18,18,18,18,18,18,
1484 18,18,18,18,18,18,18,18,
1485 18,18,18,18,18,18,18,0,
1486 18,18,18,18,18,18,18,18,
1487 18,18,18,18,18,18,18,18,
1488 18,18,18,18,18,18,18,18,
1489 18,18,18,18,18,18,18,0,
1490 18,18,18,18,18,18,18,18
1496 #ifndef HAVE_STRERROR
1497 /*************************************************
1498 * Provide strerror() for non-ANSI libraries *
1499 *************************************************/
1501 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1502 in their libraries, but can provide the same facility by this simple
1503 alternative function. */
1505 extern int sys_nerr;
1506 extern char *sys_errlist[];
1508 char *
1509 strerror(int n)
1511 if (n < 0 || n >= sys_nerr) return "unknown error number";
1512 return sys_errlist[n];
1514 #endif /* HAVE_STRERROR */
1518 /*************************************************
1519 * Print newline configuration *
1520 *************************************************/
1523 Arguments:
1524 rc the return code from PCRE_CONFIG_NEWLINE
1525 isc TRUE if called from "-C newline"
1526 Returns: nothing
1529 static void
1530 print_newline_config(int rc, BOOL isc)
1532 const char *s = NULL;
1533 if (!isc) printf(" Newline sequence is ");
1534 switch(rc)
1536 case CHAR_CR: s = "CR"; break;
1537 case CHAR_LF: s = "LF"; break;
1538 case (CHAR_CR<<8 | CHAR_LF): s = "CRLF"; break;
1539 case -1: s = "ANY"; break;
1540 case -2: s = "ANYCRLF"; break;
1542 default:
1543 printf("a non-standard value: 0x%04x\n", rc);
1544 return;
1547 printf("%s\n", s);
1552 /*************************************************
1553 * JIT memory callback *
1554 *************************************************/
1556 static pcre_jit_stack* jit_callback(void *arg)
1558 jit_was_used = TRUE;
1559 return (pcre_jit_stack *)arg;
1563 #if !defined NOUTF || defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32
1564 /*************************************************
1565 * Convert UTF-8 string to value *
1566 *************************************************/
1568 /* This function takes one or more bytes that represents a UTF-8 character,
1569 and returns the value of the character.
1571 Argument:
1572 utf8bytes a pointer to the byte vector
1573 vptr a pointer to an int to receive the value
1575 Returns: > 0 => the number of bytes consumed
1576 -6 to 0 => malformed UTF-8 character at offset = (-return)
1579 static int
1580 utf82ord(pcre_uint8 *utf8bytes, pcre_uint32 *vptr)
1582 pcre_uint32 c = *utf8bytes++;
1583 pcre_uint32 d = c;
1584 int i, j, s;
1586 for (i = -1; i < 6; i++) /* i is number of additional bytes */
1588 if ((d & 0x80) == 0) break;
1589 d <<= 1;
1592 if (i == -1) { *vptr = c; return 1; } /* ascii character */
1593 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
1595 /* i now has a value in the range 1-5 */
1597 s = 6*i;
1598 d = (c & utf8_table3[i]) << s;
1600 for (j = 0; j < i; j++)
1602 c = *utf8bytes++;
1603 if ((c & 0xc0) != 0x80) return -(j+1);
1604 s -= 6;
1605 d |= (c & 0x3f) << s;
1608 /* Check that encoding was the correct unique one */
1610 for (j = 0; j < utf8_table1_size; j++)
1611 if (d <= (pcre_uint32)utf8_table1[j]) break;
1612 if (j != i) return -(i+1);
1614 /* Valid value */
1616 *vptr = d;
1617 return i+1;
1619 #endif /* NOUTF || SUPPORT_PCRE16 */
1623 #if defined SUPPORT_PCRE8 && !defined NOUTF
1624 /*************************************************
1625 * Convert character value to UTF-8 *
1626 *************************************************/
1628 /* This function takes an integer value in the range 0 - 0x7fffffff
1629 and encodes it as a UTF-8 character in 0 to 6 bytes.
1631 Arguments:
1632 cvalue the character value
1633 utf8bytes pointer to buffer for result - at least 6 bytes long
1635 Returns: number of characters placed in the buffer
1638 static int
1639 ord2utf8(pcre_uint32 cvalue, pcre_uint8 *utf8bytes)
1641 register int i, j;
1642 if (cvalue > 0x7fffffffu)
1643 return -1;
1644 for (i = 0; i < utf8_table1_size; i++)
1645 if (cvalue <= (pcre_uint32)utf8_table1[i]) break;
1646 utf8bytes += i;
1647 for (j = i; j > 0; j--)
1649 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1650 cvalue >>= 6;
1652 *utf8bytes = utf8_table2[i] | cvalue;
1653 return i + 1;
1655 #endif
1658 #ifdef SUPPORT_PCRE16
1659 /*************************************************
1660 * Convert a string to 16-bit *
1661 *************************************************/
1663 /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1664 8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1665 double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1666 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1667 result is always left in buffer16.
1669 Note that this function does not object to surrogate values. This is
1670 deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1671 for the purpose of testing that they are correctly faulted.
1673 Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1674 in UTF-8 so that values greater than 255 can be handled.
1676 Arguments:
1677 data TRUE if converting a data line; FALSE for a regex
1678 p points to a byte string
1679 utf true if UTF-8 (to be converted to UTF-16)
1680 len number of bytes in the string (excluding trailing zero)
1682 Returns: number of 16-bit data items used (excluding trailing zero)
1683 OR -1 if a UTF-8 string is malformed
1684 OR -2 if a value > 0x10ffff is encountered
1685 OR -3 if a value > 0xffff is encountered when not in UTF mode
1688 static int
1689 to16(int data, pcre_uint8 *p, int utf, int len)
1691 pcre_uint16 *pp;
1693 if (buffer16_size < 2*len + 2)
1695 if (buffer16 != NULL) free(buffer16);
1696 buffer16_size = 2*len + 2;
1697 buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1698 if (buffer16 == NULL)
1700 fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1701 exit(1);
1705 pp = buffer16;
1707 if (!utf && !data)
1709 while (len-- > 0) *pp++ = *p++;
1712 else
1714 pcre_uint32 c = 0;
1715 while (len > 0)
1717 int chlen = utf82ord(p, &c);
1718 if (chlen <= 0) return -1;
1719 if (c > 0x10ffff) return -2;
1720 p += chlen;
1721 len -= chlen;
1722 if (c < 0x10000) *pp++ = c; else
1724 if (!utf) return -3;
1725 c -= 0x10000;
1726 *pp++ = 0xD800 | (c >> 10);
1727 *pp++ = 0xDC00 | (c & 0x3ff);
1732 *pp = 0;
1733 return pp - buffer16;
1735 #endif
1737 #ifdef SUPPORT_PCRE32
1738 /*************************************************
1739 * Convert a string to 32-bit *
1740 *************************************************/
1742 /* In non-UTF mode, the space needed for a 32-bit string is exactly four times the
1743 8-bit size. For a UTF-8 string, the size needed for UTF-32 is no more than four
1744 times, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1745 in UTF-32. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-32. The
1746 result is always left in buffer32.
1748 Note that this function does not object to surrogate values. This is
1749 deliberate; it makes it possible to construct UTF-32 strings that are invalid,
1750 for the purpose of testing that they are correctly faulted.
1752 Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1753 in UTF-8 so that values greater than 255 can be handled.
1755 Arguments:
1756 data TRUE if converting a data line; FALSE for a regex
1757 p points to a byte string
1758 utf true if UTF-8 (to be converted to UTF-32)
1759 len number of bytes in the string (excluding trailing zero)
1761 Returns: number of 32-bit data items used (excluding trailing zero)
1762 OR -1 if a UTF-8 string is malformed
1763 OR -2 if a value > 0x10ffff is encountered
1764 OR -3 if an ill-formed value is encountered (i.e. a surrogate)
1767 static int
1768 to32(int data, pcre_uint8 *p, int utf, int len)
1770 pcre_uint32 *pp;
1772 if (buffer32_size < 4*len + 4)
1774 if (buffer32 != NULL) free(buffer32);
1775 buffer32_size = 4*len + 4;
1776 buffer32 = (pcre_uint32 *)malloc(buffer32_size);
1777 if (buffer32 == NULL)
1779 fprintf(stderr, "pcretest: malloc(%d) failed for buffer32\n", buffer32_size);
1780 exit(1);
1784 pp = buffer32;
1786 if (!utf && !data)
1788 while (len-- > 0) *pp++ = *p++;
1791 else
1793 pcre_uint32 c = 0;
1794 while (len > 0)
1796 int chlen = utf82ord(p, &c);
1797 if (chlen <= 0) return -1;
1798 if (utf)
1800 if (c > 0x10ffff) return -2;
1801 if (!data && (c & 0xfffff800u) == 0xd800u) return -3;
1804 p += chlen;
1805 len -= chlen;
1806 *pp++ = c;
1810 *pp = 0;
1811 return pp - buffer32;
1814 /* Check that a 32-bit character string is valid UTF-32.
1816 Arguments:
1817 string points to the string
1818 length length of string, or -1 if the string is zero-terminated
1820 Returns: TRUE if the string is a valid UTF-32 string
1821 FALSE otherwise
1824 #ifdef NEVER /* Not used */
1825 #ifdef SUPPORT_UTF
1826 static BOOL
1827 valid_utf32(pcre_uint32 *string, int length)
1829 register pcre_uint32 *p;
1830 register pcre_uint32 c;
1832 for (p = string; length-- > 0; p++)
1834 c = *p;
1835 if (c > 0x10ffffu) return FALSE; /* Too big */
1836 if ((c & 0xfffff800u) == 0xd800u) return FALSE; /* Surrogate */
1839 return TRUE;
1841 #endif /* SUPPORT_UTF */
1842 #endif /* NEVER */
1843 #endif /* SUPPORT_PCRE32 */
1846 /*************************************************
1847 * Read or extend an input line *
1848 *************************************************/
1850 /* Input lines are read into buffer, but both patterns and data lines can be
1851 continued over multiple input lines. In addition, if the buffer fills up, we
1852 want to automatically expand it so as to be able to handle extremely large
1853 lines that are needed for certain stress tests. When the input buffer is
1854 expanded, the other two buffers must also be expanded likewise, and the
1855 contents of pbuffer, which are a copy of the input for callouts, must be
1856 preserved (for when expansion happens for a data line). This is not the most
1857 optimal way of handling this, but hey, this is just a test program!
1859 Arguments:
1860 f the file to read
1861 start where in buffer to start (this *must* be within buffer)
1862 prompt for stdin or readline()
1864 Returns: pointer to the start of new data
1865 could be a copy of start, or could be moved
1866 NULL if no data read and EOF reached
1869 static pcre_uint8 *
1870 extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1872 pcre_uint8 *here = start;
1874 for (;;)
1876 size_t rlen = (size_t)(buffer_size - (here - buffer));
1878 if (rlen > 1000)
1880 int dlen;
1882 /* If libreadline or libedit support is required, use readline() to read a
1883 line if the input is a terminal. Note that readline() removes the trailing
1884 newline, so we must put it back again, to be compatible with fgets(). */
1886 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
1887 if (isatty(fileno(f)))
1889 size_t len;
1890 char *s = readline(prompt);
1891 if (s == NULL) return (here == start)? NULL : start;
1892 len = strlen(s);
1893 if (len > 0) add_history(s);
1894 if (len > rlen - 1) len = rlen - 1;
1895 memcpy(here, s, len);
1896 here[len] = '\n';
1897 here[len+1] = 0;
1898 free(s);
1900 else
1901 #endif
1903 /* Read the next line by normal means, prompting if the file is stdin. */
1906 if (f == stdin) printf("%s", prompt);
1907 if (fgets((char *)here, rlen, f) == NULL)
1908 return (here == start)? NULL : start;
1911 dlen = (int)strlen((char *)here);
1912 if (dlen > 0 && here[dlen - 1] == '\n') return start;
1913 here += dlen;
1916 else
1918 int new_buffer_size = 2*buffer_size;
1919 pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1920 pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1922 if (new_buffer == NULL || new_pbuffer == NULL)
1924 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1925 exit(1);
1928 memcpy(new_buffer, buffer, buffer_size);
1929 memcpy(new_pbuffer, pbuffer, buffer_size);
1931 buffer_size = new_buffer_size;
1933 start = new_buffer + (start - buffer);
1934 here = new_buffer + (here - buffer);
1936 free(buffer);
1937 free(pbuffer);
1939 buffer = new_buffer;
1940 pbuffer = new_pbuffer;
1944 /* Control never gets here */
1949 /*************************************************
1950 * Read number from string *
1951 *************************************************/
1953 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1954 around with conditional compilation, just do the job by hand. It is only used
1955 for unpicking arguments, so just keep it simple.
1957 Arguments:
1958 str string to be converted
1959 endptr where to put the end pointer
1961 Returns: the unsigned long
1964 static int
1965 get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1967 int result = 0;
1968 while(*str != 0 && isspace(*str)) str++;
1969 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1970 *endptr = str;
1971 return(result);
1976 /*************************************************
1977 * Print one character *
1978 *************************************************/
1980 /* Print a single character either literally, or as a hex escape. */
1982 static int pchar(pcre_uint32 c, FILE *f)
1984 int n = 0;
1985 char tempbuffer[16];
1986 if (PRINTOK(c))
1988 if (f != NULL) fprintf(f, "%c", c);
1989 return 1;
1992 if (c < 0x100)
1994 if (use_utf)
1996 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1997 return 6;
1999 else
2001 if (f != NULL) fprintf(f, "\\x%02x", c);
2002 return 4;
2006 if (f != NULL) n = fprintf(f, "\\x{%02x}", c);
2007 else n = sprintf(tempbuffer, "\\x{%02x}", c);
2009 return n >= 0 ? n : 0;
2014 #ifdef SUPPORT_PCRE8
2015 /*************************************************
2016 * Print 8-bit character string *
2017 *************************************************/
2019 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
2020 If handed a NULL file, just counts chars without printing. */
2022 static int pchars(pcre_uint8 *p, int length, FILE *f)
2024 pcre_uint32 c = 0;
2025 int yield = 0;
2027 if (length < 0)
2028 length = strlen((char *)p);
2030 while (length-- > 0)
2032 #if !defined NOUTF
2033 if (use_utf)
2035 int rc = utf82ord(p, &c);
2036 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
2038 length -= rc - 1;
2039 p += rc;
2040 yield += pchar(c, f);
2041 continue;
2044 #endif
2045 c = *p++;
2046 yield += pchar(c, f);
2049 return yield;
2051 #endif
2055 #ifdef SUPPORT_PCRE16
2056 /*************************************************
2057 * Find length of 0-terminated 16-bit string *
2058 *************************************************/
2060 static int strlen16(PCRE_SPTR16 p)
2062 PCRE_SPTR16 pp = p;
2063 while (*pp != 0) pp++;
2064 return (int)(pp - p);
2066 #endif /* SUPPORT_PCRE16 */
2070 #ifdef SUPPORT_PCRE32
2071 /*************************************************
2072 * Find length of 0-terminated 32-bit string *
2073 *************************************************/
2075 static int strlen32(PCRE_SPTR32 p)
2077 PCRE_SPTR32 pp = p;
2078 while (*pp != 0) pp++;
2079 return (int)(pp - p);
2081 #endif /* SUPPORT_PCRE32 */
2085 #ifdef SUPPORT_PCRE16
2086 /*************************************************
2087 * Print 16-bit character string *
2088 *************************************************/
2090 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
2091 If handed a NULL file, just counts chars without printing. */
2093 static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
2095 int yield = 0;
2097 if (length < 0)
2098 length = strlen16(p);
2100 while (length-- > 0)
2102 pcre_uint32 c = *p++ & 0xffff;
2103 #if !defined NOUTF
2104 if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
2106 int d = *p & 0xffff;
2107 if (d >= 0xDC00 && d <= 0xDFFF)
2109 c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
2110 length--;
2111 p++;
2114 #endif
2115 yield += pchar(c, f);
2118 return yield;
2120 #endif /* SUPPORT_PCRE16 */
2124 #ifdef SUPPORT_PCRE32
2125 /*************************************************
2126 * Print 32-bit character string *
2127 *************************************************/
2129 /* Must handle UTF-32 strings in utf mode. Yields number of characters printed.
2130 If handed a NULL file, just counts chars without printing. */
2132 static int pchars32(PCRE_SPTR32 p, int length, BOOL utf, FILE *f)
2134 int yield = 0;
2136 (void)(utf); /* Avoid compiler warning */
2138 if (length < 0)
2139 length = strlen32(p);
2141 while (length-- > 0)
2143 pcre_uint32 c = *p++;
2144 yield += pchar(c, f);
2147 return yield;
2149 #endif /* SUPPORT_PCRE32 */
2153 #ifdef SUPPORT_PCRE8
2154 /*************************************************
2155 * Read a capture name (8-bit) and check it *
2156 *************************************************/
2158 static pcre_uint8 *
2159 read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
2161 pcre_uint8 *npp = *pp;
2162 while (isalnum(*p)) *npp++ = *p++;
2163 *npp++ = 0;
2164 *npp = 0;
2165 if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
2167 fprintf(outfile, "no parentheses with name \"");
2168 PCHARSV(*pp, 0, -1, outfile);
2169 fprintf(outfile, "\"\n");
2172 *pp = npp;
2173 return p;
2175 #endif /* SUPPORT_PCRE8 */
2179 #ifdef SUPPORT_PCRE16
2180 /*************************************************
2181 * Read a capture name (16-bit) and check it *
2182 *************************************************/
2184 /* Note that the text being read is 8-bit. */
2186 static pcre_uint8 *
2187 read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
2189 pcre_uint16 *npp = *pp;
2190 while (isalnum(*p)) *npp++ = *p++;
2191 *npp++ = 0;
2192 *npp = 0;
2193 if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
2195 fprintf(outfile, "no parentheses with name \"");
2196 PCHARSV(*pp, 0, -1, outfile);
2197 fprintf(outfile, "\"\n");
2199 *pp = npp;
2200 return p;
2202 #endif /* SUPPORT_PCRE16 */
2206 #ifdef SUPPORT_PCRE32
2207 /*************************************************
2208 * Read a capture name (32-bit) and check it *
2209 *************************************************/
2211 /* Note that the text being read is 8-bit. */
2213 static pcre_uint8 *
2214 read_capture_name32(pcre_uint8 *p, pcre_uint32 **pp, pcre *re)
2216 pcre_uint32 *npp = *pp;
2217 while (isalnum(*p)) *npp++ = *p++;
2218 *npp++ = 0;
2219 *npp = 0;
2220 if (pcre32_get_stringnumber((pcre32 *)re, (PCRE_SPTR32)(*pp)) < 0)
2222 fprintf(outfile, "no parentheses with name \"");
2223 PCHARSV(*pp, 0, -1, outfile);
2224 fprintf(outfile, "\"\n");
2226 *pp = npp;
2227 return p;
2229 #endif /* SUPPORT_PCRE32 */
2233 /*************************************************
2234 * Stack guard function *
2235 *************************************************/
2237 /* Called from PCRE when set in pcre_stack_guard. We give an error (non-zero)
2238 return when a count overflows. */
2240 static int stack_guard(void)
2242 return stack_guard_return;
2245 /*************************************************
2246 * Callout function *
2247 *************************************************/
2249 /* Called from PCRE as a result of the (?C) item. We print out where we are in
2250 the match. Yield zero unless more callouts than the fail count, or the callout
2251 data is not zero. */
2253 static int callout(pcre_callout_block *cb)
2255 FILE *f = (first_callout | callout_extra)? outfile : NULL;
2256 int i, current_position, pre_start, post_start, subject_length;
2258 if (callout_extra)
2260 fprintf(f, "Callout %d: last capture = %d\n",
2261 cb->callout_number, cb->capture_last);
2263 if (cb->offset_vector != NULL)
2265 for (i = 0; i < cb->capture_top * 2; i += 2)
2267 if (cb->offset_vector[i] < 0)
2268 fprintf(f, "%2d: <unset>\n", i/2);
2269 else
2271 fprintf(f, "%2d: ", i/2);
2272 PCHARSV(cb->subject, cb->offset_vector[i],
2273 cb->offset_vector[i+1] - cb->offset_vector[i], f);
2274 fprintf(f, "\n");
2280 /* Re-print the subject in canonical form, the first time or if giving full
2281 datails. On subsequent calls in the same match, we use pchars just to find the
2282 printed lengths of the substrings. */
2284 if (f != NULL) fprintf(f, "--->");
2286 /* If a lookbehind is involved, the current position may be earlier than the
2287 match start. If so, use the match start instead. */
2289 current_position = (cb->current_position >= cb->start_match)?
2290 cb->current_position : cb->start_match;
2292 PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
2293 PCHARS(post_start, cb->subject, cb->start_match,
2294 current_position - cb->start_match, f);
2296 PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
2298 PCHARSV(cb->subject, current_position, cb->subject_length - current_position, f);
2300 if (f != NULL) fprintf(f, "\n");
2302 /* Always print appropriate indicators, with callout number if not already
2303 shown. For automatic callouts, show the pattern offset. */
2305 if (cb->callout_number == 255)
2307 fprintf(outfile, "%+3d ", cb->pattern_position);
2308 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
2310 else
2312 if (callout_extra) fprintf(outfile, " ");
2313 else fprintf(outfile, "%3d ", cb->callout_number);
2316 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
2317 fprintf(outfile, "^");
2319 if (post_start > 0)
2321 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
2322 fprintf(outfile, "^");
2325 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
2326 fprintf(outfile, " ");
2328 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
2329 pbuffer + cb->pattern_position);
2331 fprintf(outfile, "\n");
2332 first_callout = 0;
2334 if (cb->mark != last_callout_mark)
2336 if (cb->mark == NULL)
2337 fprintf(outfile, "Latest Mark: <unset>\n");
2338 else
2340 fprintf(outfile, "Latest Mark: ");
2341 PCHARSV(cb->mark, 0, -1, outfile);
2342 putc('\n', outfile);
2344 last_callout_mark = cb->mark;
2347 if (cb->callout_data != NULL)
2349 int callout_data = *((int *)(cb->callout_data));
2350 if (callout_data != 0)
2352 fprintf(outfile, "Callout data = %d\n", callout_data);
2353 return callout_data;
2357 return (cb->callout_number != callout_fail_id)? 0 :
2358 (++callout_count >= callout_fail_count)? 1 : 0;
2362 /*************************************************
2363 * Local malloc functions *
2364 *************************************************/
2366 /* Alternative malloc function, to test functionality and save the size of a
2367 compiled re, which is the first store request that pcre_compile() makes. The
2368 show_malloc variable is set only during matching. */
2370 static void *new_malloc(size_t size)
2372 void *block = malloc(size);
2373 if (show_malloc)
2374 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
2375 return block;
2378 static void new_free(void *block)
2380 if (show_malloc)
2381 fprintf(outfile, "free %p\n", block);
2382 free(block);
2385 /* For recursion malloc/free, to test stacking calls */
2387 static void *stack_malloc(size_t size)
2389 void *block = malloc(size);
2390 if (show_malloc)
2391 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
2392 return block;
2395 static void stack_free(void *block)
2397 if (show_malloc)
2398 fprintf(outfile, "stack_free %p\n", block);
2399 free(block);
2403 /*************************************************
2404 * Call pcre_fullinfo() *
2405 *************************************************/
2407 /* Get one piece of information from the pcre_fullinfo() function. When only
2408 one of 8-, 16- or 32-bit is supported, pcre_mode should always have the correct
2409 value, but the code is defensive.
2411 Arguments:
2412 re compiled regex
2413 study study data
2414 option PCRE_INFO_xxx option
2415 ptr where to put the data
2417 Returns: 0 when OK, < 0 on error
2420 static int
2421 new_info(pcre *re, pcre_extra *study, int option, void *ptr)
2423 int rc;
2425 if (pcre_mode == PCRE32_MODE)
2426 #ifdef SUPPORT_PCRE32
2427 rc = pcre32_fullinfo((pcre32 *)re, (pcre32_extra *)study, option, ptr);
2428 #else
2429 rc = PCRE_ERROR_BADMODE;
2430 #endif
2431 else if (pcre_mode == PCRE16_MODE)
2432 #ifdef SUPPORT_PCRE16
2433 rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
2434 #else
2435 rc = PCRE_ERROR_BADMODE;
2436 #endif
2437 else
2438 #ifdef SUPPORT_PCRE8
2439 rc = pcre_fullinfo(re, study, option, ptr);
2440 #else
2441 rc = PCRE_ERROR_BADMODE;
2442 #endif
2444 if (rc < 0 && rc != PCRE_ERROR_UNSET)
2446 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2447 pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "", option);
2448 if (rc == PCRE_ERROR_BADMODE)
2449 fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
2450 "%d-bit mode\n", 8 * CHAR_SIZE,
2451 8 * (REAL_PCRE_FLAGS(re) & PCRE_MODE_MASK));
2454 return rc;
2459 /*************************************************
2460 * Swap byte functions *
2461 *************************************************/
2463 /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
2464 value, respectively.
2466 Arguments:
2467 value any number
2469 Returns: the byte swapped value
2472 static pcre_uint32
2473 swap_uint32(pcre_uint32 value)
2475 return ((value & 0x000000ff) << 24) |
2476 ((value & 0x0000ff00) << 8) |
2477 ((value & 0x00ff0000) >> 8) |
2478 (value >> 24);
2481 static pcre_uint16
2482 swap_uint16(pcre_uint16 value)
2484 return (value >> 8) | (value << 8);
2489 /*************************************************
2490 * Flip bytes in a compiled pattern *
2491 *************************************************/
2493 /* This function is called if the 'F' option was present on a pattern that is
2494 to be written to a file. We flip the bytes of all the integer fields in the
2495 regex data block and the study block. In 16-bit mode this also flips relevant
2496 bytes in the pattern itself. This is to make it possible to test PCRE's
2497 ability to reload byte-flipped patterns, e.g. those compiled on a different
2498 architecture. */
2500 #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
2501 static void
2502 regexflip8_or_16(pcre *ere, pcre_extra *extra)
2504 real_pcre8_or_16 *re = (real_pcre8_or_16 *)ere;
2505 #ifdef SUPPORT_PCRE16
2506 int op;
2507 pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
2508 int length = re->name_count * re->name_entry_size;
2509 #ifdef SUPPORT_UTF
2510 BOOL utf = (re->options & PCRE_UTF16) != 0;
2511 BOOL utf16_char = FALSE;
2512 #endif /* SUPPORT_UTF */
2513 #endif /* SUPPORT_PCRE16 */
2515 /* Always flip the bytes in the main data block and study blocks. */
2517 re->magic_number = REVERSED_MAGIC_NUMBER;
2518 re->size = swap_uint32(re->size);
2519 re->options = swap_uint32(re->options);
2520 re->flags = swap_uint32(re->flags);
2521 re->limit_match = swap_uint32(re->limit_match);
2522 re->limit_recursion = swap_uint32(re->limit_recursion);
2523 re->first_char = swap_uint16(re->first_char);
2524 re->req_char = swap_uint16(re->req_char);
2525 re->max_lookbehind = swap_uint16(re->max_lookbehind);
2526 re->top_bracket = swap_uint16(re->top_bracket);
2527 re->top_backref = swap_uint16(re->top_backref);
2528 re->name_table_offset = swap_uint16(re->name_table_offset);
2529 re->name_entry_size = swap_uint16(re->name_entry_size);
2530 re->name_count = swap_uint16(re->name_count);
2531 re->ref_count = swap_uint16(re->ref_count);
2533 if (extra != NULL && (extra->flags & PCRE_EXTRA_STUDY_DATA) != 0)
2535 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2536 rsd->size = swap_uint32(rsd->size);
2537 rsd->flags = swap_uint32(rsd->flags);
2538 rsd->minlength = swap_uint32(rsd->minlength);
2541 /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
2542 in the name table, if present, and then in the pattern itself. */
2544 #ifdef SUPPORT_PCRE16
2545 if (pcre_mode != PCRE16_MODE) return;
2547 while(TRUE)
2549 /* Swap previous characters. */
2550 while (length-- > 0)
2552 *ptr = swap_uint16(*ptr);
2553 ptr++;
2555 #ifdef SUPPORT_UTF
2556 if (utf16_char)
2558 if ((ptr[-1] & 0xfc00) == 0xd800)
2560 /* We know that there is only one extra character in UTF-16. */
2561 *ptr = swap_uint16(*ptr);
2562 ptr++;
2565 utf16_char = FALSE;
2566 #endif /* SUPPORT_UTF */
2568 /* Get next opcode. */
2570 length = 0;
2571 op = *ptr;
2572 *ptr++ = swap_uint16(op);
2574 switch (op)
2576 case OP_END:
2577 return;
2579 #ifdef SUPPORT_UTF
2580 case OP_CHAR:
2581 case OP_CHARI:
2582 case OP_NOT:
2583 case OP_NOTI:
2584 case OP_STAR:
2585 case OP_MINSTAR:
2586 case OP_PLUS:
2587 case OP_MINPLUS:
2588 case OP_QUERY:
2589 case OP_MINQUERY:
2590 case OP_UPTO:
2591 case OP_MINUPTO:
2592 case OP_EXACT:
2593 case OP_POSSTAR:
2594 case OP_POSPLUS:
2595 case OP_POSQUERY:
2596 case OP_POSUPTO:
2597 case OP_STARI:
2598 case OP_MINSTARI:
2599 case OP_PLUSI:
2600 case OP_MINPLUSI:
2601 case OP_QUERYI:
2602 case OP_MINQUERYI:
2603 case OP_UPTOI:
2604 case OP_MINUPTOI:
2605 case OP_EXACTI:
2606 case OP_POSSTARI:
2607 case OP_POSPLUSI:
2608 case OP_POSQUERYI:
2609 case OP_POSUPTOI:
2610 case OP_NOTSTAR:
2611 case OP_NOTMINSTAR:
2612 case OP_NOTPLUS:
2613 case OP_NOTMINPLUS:
2614 case OP_NOTQUERY:
2615 case OP_NOTMINQUERY:
2616 case OP_NOTUPTO:
2617 case OP_NOTMINUPTO:
2618 case OP_NOTEXACT:
2619 case OP_NOTPOSSTAR:
2620 case OP_NOTPOSPLUS:
2621 case OP_NOTPOSQUERY:
2622 case OP_NOTPOSUPTO:
2623 case OP_NOTSTARI:
2624 case OP_NOTMINSTARI:
2625 case OP_NOTPLUSI:
2626 case OP_NOTMINPLUSI:
2627 case OP_NOTQUERYI:
2628 case OP_NOTMINQUERYI:
2629 case OP_NOTUPTOI:
2630 case OP_NOTMINUPTOI:
2631 case OP_NOTEXACTI:
2632 case OP_NOTPOSSTARI:
2633 case OP_NOTPOSPLUSI:
2634 case OP_NOTPOSQUERYI:
2635 case OP_NOTPOSUPTOI:
2636 if (utf) utf16_char = TRUE;
2637 #endif
2638 /* Fall through. */
2640 default:
2641 length = OP_lengths16[op] - 1;
2642 break;
2644 case OP_CLASS:
2645 case OP_NCLASS:
2646 /* Skip the character bit map. */
2647 ptr += 32/sizeof(pcre_uint16);
2648 length = 0;
2649 break;
2651 case OP_XCLASS:
2652 /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
2653 if (LINK_SIZE > 1)
2654 length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
2655 - (1 + LINK_SIZE + 1));
2656 else
2657 length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
2659 /* Reverse the size of the XCLASS instance. */
2660 *ptr = swap_uint16(*ptr);
2661 ptr++;
2662 if (LINK_SIZE > 1)
2664 *ptr = swap_uint16(*ptr);
2665 ptr++;
2668 op = *ptr;
2669 *ptr = swap_uint16(op);
2670 ptr++;
2671 if ((op & XCL_MAP) != 0)
2673 /* Skip the character bit map. */
2674 ptr += 32/sizeof(pcre_uint16);
2675 length -= 32/sizeof(pcre_uint16);
2677 break;
2680 /* Control should never reach here in 16 bit mode. */
2681 #endif /* SUPPORT_PCRE16 */
2683 #endif /* SUPPORT_PCRE[8|16] */
2687 #if defined SUPPORT_PCRE32
2688 static void
2689 regexflip_32(pcre *ere, pcre_extra *extra)
2691 real_pcre32 *re = (real_pcre32 *)ere;
2692 int op;
2693 pcre_uint32 *ptr = (pcre_uint32 *)re + re->name_table_offset;
2694 int length = re->name_count * re->name_entry_size;
2696 /* Always flip the bytes in the main data block and study blocks. */
2698 re->magic_number = REVERSED_MAGIC_NUMBER;
2699 re->size = swap_uint32(re->size);
2700 re->options = swap_uint32(re->options);
2701 re->flags = swap_uint32(re->flags);
2702 re->limit_match = swap_uint32(re->limit_match);
2703 re->limit_recursion = swap_uint32(re->limit_recursion);
2704 re->first_char = swap_uint32(re->first_char);
2705 re->req_char = swap_uint32(re->req_char);
2706 re->max_lookbehind = swap_uint16(re->max_lookbehind);
2707 re->top_bracket = swap_uint16(re->top_bracket);
2708 re->top_backref = swap_uint16(re->top_backref);
2709 re->name_table_offset = swap_uint16(re->name_table_offset);
2710 re->name_entry_size = swap_uint16(re->name_entry_size);
2711 re->name_count = swap_uint16(re->name_count);
2712 re->ref_count = swap_uint16(re->ref_count);
2714 if (extra != NULL && (extra->flags & PCRE_EXTRA_STUDY_DATA) != 0)
2716 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2717 rsd->size = swap_uint32(rsd->size);
2718 rsd->flags = swap_uint32(rsd->flags);
2719 rsd->minlength = swap_uint32(rsd->minlength);
2722 /* In 32-bit mode we must swap bytes in the name table, if present, and then in
2723 the pattern itself. */
2725 while(TRUE)
2727 /* Swap previous characters. */
2728 while (length-- > 0)
2730 *ptr = swap_uint32(*ptr);
2731 ptr++;
2734 /* Get next opcode. */
2736 length = 0;
2737 op = *ptr;
2738 *ptr++ = swap_uint32(op);
2740 switch (op)
2742 case OP_END:
2743 return;
2745 default:
2746 length = OP_lengths32[op] - 1;
2747 break;
2749 case OP_CLASS:
2750 case OP_NCLASS:
2751 /* Skip the character bit map. */
2752 ptr += 32/sizeof(pcre_uint32);
2753 length = 0;
2754 break;
2756 case OP_XCLASS:
2757 /* LINK_SIZE can only be 1 in 32-bit mode. */
2758 length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
2760 /* Reverse the size of the XCLASS instance. */
2761 *ptr = swap_uint32(*ptr);
2762 ptr++;
2764 op = *ptr;
2765 *ptr = swap_uint32(op);
2766 ptr++;
2767 if ((op & XCL_MAP) != 0)
2769 /* Skip the character bit map. */
2770 ptr += 32/sizeof(pcre_uint32);
2771 length -= 32/sizeof(pcre_uint32);
2773 break;
2776 /* Control should never reach here in 32 bit mode. */
2779 #endif /* SUPPORT_PCRE32 */
2783 static void
2784 regexflip(pcre *ere, pcre_extra *extra)
2786 #if defined SUPPORT_PCRE32
2787 if (REAL_PCRE_FLAGS(ere) & PCRE_MODE32)
2788 regexflip_32(ere, extra);
2789 #endif
2790 #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
2791 if (REAL_PCRE_FLAGS(ere) & (PCRE_MODE8 | PCRE_MODE16))
2792 regexflip8_or_16(ere, extra);
2793 #endif
2798 /*************************************************
2799 * Check match or recursion limit *
2800 *************************************************/
2802 static int
2803 check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
2804 int start_offset, int options, int *use_offsets, int use_size_offsets,
2805 int flag, unsigned long int *limit, int errnumber, const char *msg)
2807 int count;
2808 int min = 0;
2809 int mid = 64;
2810 int max = -1;
2812 extra->flags |= flag;
2814 for (;;)
2816 *limit = mid;
2818 PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
2819 use_offsets, use_size_offsets);
2821 if (count == errnumber)
2823 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2824 min = mid;
2825 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
2828 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
2829 count == PCRE_ERROR_PARTIAL)
2831 if (mid == min + 1)
2833 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
2834 break;
2836 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2837 max = mid;
2838 mid = (min + mid)/2;
2840 else break; /* Some other error */
2843 extra->flags &= ~flag;
2844 return count;
2849 /*************************************************
2850 * Case-independent strncmp() function *
2851 *************************************************/
2854 Arguments:
2855 s first string
2856 t second string
2857 n number of characters to compare
2859 Returns: < 0, = 0, or > 0, according to the comparison
2862 static int
2863 strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2865 while (n--)
2867 int c = tolower(*s++) - tolower(*t++);
2868 if (c) return c;
2870 return 0;
2875 /*************************************************
2876 * Check multicharacter option *
2877 *************************************************/
2879 /* This is used both at compile and run-time to check for <xxx> escapes. Print
2880 a message and return 0 if there is no match.
2882 Arguments:
2883 p points after the leading '<'
2884 f file for error message
2885 nl TRUE to check only for newline settings
2886 stype "modifier" or "escape sequence"
2888 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
2891 static int
2892 check_mc_option(pcre_uint8 *p, FILE *f, BOOL nl, const char *stype)
2894 if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2895 if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2896 if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2897 if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2898 if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2899 if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2900 if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2902 if (!nl)
2904 if (strncmpic(p, (pcre_uint8 *)"JS>", 3) == 0) return PCRE_JAVASCRIPT_COMPAT;
2907 fprintf(f, "Unknown %s at: <%s\n", stype, p);
2908 return 0;
2913 /*************************************************
2914 * Usage function *
2915 *************************************************/
2917 static void
2918 usage(void)
2920 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
2921 printf("Input and output default to stdin and stdout.\n");
2922 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
2923 printf("If input is a terminal, readline() is used to read from it.\n");
2924 #else
2925 printf("This version of pcretest is not linked with readline().\n");
2926 #endif
2927 printf("\nOptions:\n");
2928 #ifdef SUPPORT_PCRE16
2929 printf(" -16 use the 16-bit library\n");
2930 #endif
2931 #ifdef SUPPORT_PCRE32
2932 printf(" -32 use the 32-bit library\n");
2933 #endif
2934 printf(" -b show compiled code\n");
2935 printf(" -C show PCRE compile-time options and exit\n");
2936 printf(" -C arg show a specific compile-time option and exit\n");
2937 printf(" with its value if numeric (else 0). The arg can be:\n");
2938 printf(" linksize internal link size [2, 3, 4]\n");
2939 printf(" pcre8 8 bit library support enabled [0, 1]\n");
2940 printf(" pcre16 16 bit library support enabled [0, 1]\n");
2941 printf(" pcre32 32 bit library support enabled [0, 1]\n");
2942 printf(" utf Unicode Transformation Format supported [0, 1]\n");
2943 printf(" ucp Unicode Properties supported [0, 1]\n");
2944 printf(" jit Just-in-time compiler supported [0, 1]\n");
2945 printf(" newline Newline type [CR, LF, CRLF, ANYCRLF, ANY]\n");
2946 printf(" bsr \\R type [ANYCRLF, ANY]\n");
2947 printf(" -d debug: show compiled code and information (-b and -i)\n");
2948 #if !defined NODFA
2949 printf(" -dfa force DFA matching for all subjects\n");
2950 #endif
2951 printf(" -help show usage information\n");
2952 printf(" -i show information about compiled patterns\n"
2953 " -M find MATCH_LIMIT minimum for each subject\n"
2954 " -m output memory used information\n"
2955 " -O set PCRE_NO_AUTO_POSSESS on each pattern\n"
2956 " -o <n> set size of offsets vector to <n>\n");
2957 #if !defined NOPOSIX
2958 printf(" -p use POSIX interface\n");
2959 #endif
2960 printf(" -q quiet: do not output PCRE version number at start\n");
2961 printf(" -S <n> set stack size to <n> megabytes\n");
2962 printf(" -s force each pattern to be studied at basic level\n"
2963 " -s+ force each pattern to be studied, using JIT if available\n"
2964 " -s++ ditto, verifying when JIT was actually used\n"
2965 " -s+n force each pattern to be studied, using JIT if available,\n"
2966 " where 1 <= n <= 7 selects JIT options\n"
2967 " -s++n ditto, verifying when JIT was actually used\n"
2968 " -t time compilation and execution\n");
2969 printf(" -t <n> time compilation and execution, repeating <n> times\n");
2970 printf(" -tm time execution (matching) only\n");
2971 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
2972 printf(" -T same as -t, but show total times at the end\n");
2973 printf(" -TM same as -tm, but show total time at the end\n");
2978 /*************************************************
2979 * Main Program *
2980 *************************************************/
2982 /* Read lines from named file or stdin and write to named file or stdout; lines
2983 consist of a regular expression, in delimiters and optionally followed by
2984 options, followed by a set of test data, terminated by an empty line. */
2986 int main(int argc, char **argv)
2988 FILE *infile = stdin;
2989 const char *version;
2990 int options = 0;
2991 int study_options = 0;
2992 int default_find_match_limit = FALSE;
2993 pcre_uint32 default_options = 0;
2994 int op = 1;
2995 int timeit = 0;
2996 int timeitm = 0;
2997 int showtotaltimes = 0;
2998 int showinfo = 0;
2999 int showstore = 0;
3000 int force_study = -1;
3001 int force_study_options = 0;
3002 int quiet = 0;
3003 int size_offsets = 45;
3004 int size_offsets_max;
3005 int *offsets = NULL;
3006 int debug = 0;
3007 int done = 0;
3008 int all_use_dfa = 0;
3009 int verify_jit = 0;
3010 int yield = 0;
3011 int stack_size;
3012 pcre_uint8 *dbuffer = NULL;
3013 pcre_uint8 lockout[24] = { 0 };
3014 size_t dbuffer_size = 1u << 14;
3015 clock_t total_compile_time = 0;
3016 clock_t total_study_time = 0;
3017 clock_t total_match_time = 0;
3019 #if !defined NOPOSIX
3020 int posix = 0;
3021 int cflags = 0;
3022 #endif
3023 #if !defined NODFA
3024 int *dfa_workspace = NULL;
3025 #endif
3027 pcre_jit_stack *jit_stack = NULL;
3029 /* These vectors store, end-to-end, a list of zero-terminated captured
3030 substring names, each list itself being terminated by an empty name. Assume
3031 that 1024 is plenty long enough for the few names we'll be testing. It is
3032 easiest to keep separate 8-, 16- and 32-bit versions, using the 32-bit version
3033 for the actual memory, to ensure alignment. */
3035 pcre_uint32 copynames[1024];
3036 pcre_uint32 getnames[1024];
3038 #ifdef SUPPORT_PCRE32
3039 pcre_uint32 *cn32ptr;
3040 pcre_uint32 *gn32ptr;
3041 #endif
3043 #ifdef SUPPORT_PCRE16
3044 pcre_uint16 *copynames16 = (pcre_uint16 *)copynames;
3045 pcre_uint16 *getnames16 = (pcre_uint16 *)getnames;
3046 pcre_uint16 *cn16ptr;
3047 pcre_uint16 *gn16ptr;
3048 #endif
3050 #ifdef SUPPORT_PCRE8
3051 pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
3052 pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
3053 pcre_uint8 *cn8ptr;
3054 pcre_uint8 *gn8ptr;
3055 #endif
3057 /* Get buffers from malloc() so that valgrind will check their misuse when
3058 debugging. They grow automatically when very long lines are read. The 16-
3059 and 32-bit buffers (buffer16, buffer32) are obtained only if needed. */
3061 buffer = (pcre_uint8 *)malloc(buffer_size);
3062 pbuffer = (pcre_uint8 *)malloc(buffer_size);
3064 /* The outfile variable is static so that new_malloc can use it. */
3066 outfile = stdout;
3068 /* The following _setmode() stuff is some Windows magic that tells its runtime
3069 library to translate CRLF into a single LF character. At least, that's what
3070 I've been told: never having used Windows I take this all on trust. Originally
3071 it set 0x8000, but then I was advised that _O_BINARY was better. */
3073 #if defined(_WIN32) || defined(WIN32)
3074 _setmode( _fileno( stdout ), _O_BINARY );
3075 #endif
3077 /* Get the version number: both pcre_version() and pcre16_version() give the
3078 same answer. We just need to ensure that we call one that is available. */
3080 #if defined SUPPORT_PCRE8
3081 version = pcre_version();
3082 #elif defined SUPPORT_PCRE16
3083 version = pcre16_version();
3084 #elif defined SUPPORT_PCRE32
3085 version = pcre32_version();
3086 #endif
3088 /* Scan options */
3090 while (argc > 1 && argv[op][0] == '-')
3092 pcre_uint8 *endptr;
3093 char *arg = argv[op];
3095 if (strcmp(arg, "-m") == 0) showstore = 1;
3096 else if (strcmp(arg, "-s") == 0) force_study = 0;
3098 else if (strncmp(arg, "-s+", 3) == 0)
3100 arg += 3;
3101 if (*arg == '+') { arg++; verify_jit = TRUE; }
3102 force_study = 1;
3103 if (*arg == 0)
3104 force_study_options = jit_study_bits[6];
3105 else if (*arg >= '1' && *arg <= '7')
3106 force_study_options = jit_study_bits[*arg - '1'];
3107 else goto BAD_ARG;
3109 else if (strcmp(arg, "-8") == 0)
3111 #ifdef SUPPORT_PCRE8
3112 pcre_mode = PCRE8_MODE;
3113 #else
3114 printf("** This version of PCRE was built without 8-bit support\n");
3115 exit(1);
3116 #endif
3118 else if (strcmp(arg, "-16") == 0)
3120 #ifdef SUPPORT_PCRE16
3121 pcre_mode = PCRE16_MODE;
3122 #else
3123 printf("** This version of PCRE was built without 16-bit support\n");
3124 exit(1);
3125 #endif
3127 else if (strcmp(arg, "-32") == 0)
3129 #ifdef SUPPORT_PCRE32
3130 pcre_mode = PCRE32_MODE;
3131 #else
3132 printf("** This version of PCRE was built without 32-bit support\n");
3133 exit(1);
3134 #endif
3136 else if (strcmp(arg, "-q") == 0) quiet = 1;
3137 else if (strcmp(arg, "-b") == 0) debug = 1;
3138 else if (strcmp(arg, "-i") == 0) showinfo = 1;
3139 else if (strcmp(arg, "-d") == 0) showinfo = debug = 1;
3140 else if (strcmp(arg, "-M") == 0) default_find_match_limit = TRUE;
3141 else if (strcmp(arg, "-O") == 0) default_options |= PCRE_NO_AUTO_POSSESS;
3142 #if !defined NODFA
3143 else if (strcmp(arg, "-dfa") == 0) all_use_dfa = 1;
3144 #endif
3145 else if (strcmp(arg, "-o") == 0 && argc > 2 &&
3146 ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
3147 *endptr == 0))
3149 op++;
3150 argc--;
3152 else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0 ||
3153 strcmp(arg, "-T") == 0 || strcmp(arg, "-TM") == 0)
3155 int temp;
3156 int both = arg[2] == 0;
3157 showtotaltimes = arg[1] == 'T';
3158 if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
3159 *endptr == 0))
3161 timeitm = temp;
3162 op++;
3163 argc--;
3165 else timeitm = LOOPREPEAT;
3166 if (both) timeit = timeitm;
3168 else if (strcmp(arg, "-S") == 0 && argc > 2 &&
3169 ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
3170 *endptr == 0))
3172 #if defined(_WIN32) || defined(WIN32) || defined(__minix) || defined(NATIVE_ZOS) || defined(__VMS)
3173 printf("PCRE: -S not supported on this OS\n");
3174 exit(1);
3175 #else
3176 int rc;
3177 struct rlimit rlim;
3178 getrlimit(RLIMIT_STACK, &rlim);
3179 rlim.rlim_cur = stack_size * 1024 * 1024;
3180 rc = setrlimit(RLIMIT_STACK, &rlim);
3181 if (rc != 0)
3183 printf("PCRE: setrlimit() failed with error %d\n", rc);
3184 exit(1);
3186 op++;
3187 argc--;
3188 #endif
3190 #if !defined NOPOSIX
3191 else if (strcmp(arg, "-p") == 0) posix = 1;
3192 #endif
3193 else if (strcmp(arg, "-C") == 0)
3195 int rc;
3196 unsigned long int lrc;
3198 if (argc > 2)
3200 if (strcmp(argv[op + 1], "linksize") == 0)
3202 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
3203 printf("%d\n", rc);
3204 yield = rc;
3206 #ifdef __VMS
3207 vms_setsymbol("LINKSIZE",0,yield );
3208 #endif
3210 else if (strcmp(argv[op + 1], "pcre8") == 0)
3212 #ifdef SUPPORT_PCRE8
3213 printf("1\n");
3214 yield = 1;
3215 #else
3216 printf("0\n");
3217 yield = 0;
3218 #endif
3219 #ifdef __VMS
3220 vms_setsymbol("PCRE8",0,yield );
3221 #endif
3223 else if (strcmp(argv[op + 1], "pcre16") == 0)
3225 #ifdef SUPPORT_PCRE16
3226 printf("1\n");
3227 yield = 1;
3228 #else
3229 printf("0\n");
3230 yield = 0;
3231 #endif
3232 #ifdef __VMS
3233 vms_setsymbol("PCRE16",0,yield );
3234 #endif
3236 else if (strcmp(argv[op + 1], "pcre32") == 0)
3238 #ifdef SUPPORT_PCRE32
3239 printf("1\n");
3240 yield = 1;
3241 #else
3242 printf("0\n");
3243 yield = 0;
3244 #endif
3245 #ifdef __VMS
3246 vms_setsymbol("PCRE32",0,yield );
3247 #endif
3249 else if (strcmp(argv[op + 1], "utf") == 0)
3251 #ifdef SUPPORT_PCRE8
3252 if (pcre_mode == PCRE8_MODE)
3253 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
3254 #endif
3255 #ifdef SUPPORT_PCRE16
3256 if (pcre_mode == PCRE16_MODE)
3257 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
3258 #endif
3259 #ifdef SUPPORT_PCRE32
3260 if (pcre_mode == PCRE32_MODE)
3261 (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
3262 #endif
3263 printf("%d\n", rc);
3264 yield = rc;
3265 #ifdef __VMS
3266 vms_setsymbol("UTF",0,yield );
3267 #endif
3269 else if (strcmp(argv[op + 1], "ucp") == 0)
3271 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
3272 printf("%d\n", rc);
3273 yield = rc;
3275 else if (strcmp(argv[op + 1], "jit") == 0)
3277 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
3278 printf("%d\n", rc);
3279 yield = rc;
3281 else if (strcmp(argv[op + 1], "newline") == 0)
3283 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
3284 print_newline_config(rc, TRUE);
3286 else if (strcmp(argv[op + 1], "bsr") == 0)
3288 (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
3289 printf("%s\n", rc? "ANYCRLF" : "ANY");
3291 else if (strcmp(argv[op + 1], "ebcdic") == 0)
3293 #ifdef EBCDIC
3294 printf("1\n");
3295 yield = 1;
3296 #else
3297 printf("0\n");
3298 #endif
3300 else if (strcmp(argv[op + 1], "ebcdic-nl") == 0)
3302 #ifdef EBCDIC
3303 printf("0x%02x\n", CHAR_LF);
3304 #else
3305 printf("0\n");
3306 #endif
3308 else
3310 printf("Unknown -C option: %s\n", argv[op + 1]);
3312 goto EXIT;
3315 /* No argument for -C: output all configuration information. */
3317 printf("PCRE version %s\n", version);
3318 printf("Compiled with\n");
3320 #ifdef EBCDIC
3321 printf(" EBCDIC code support: LF is 0x%02x\n", CHAR_LF);
3322 #endif
3324 /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
3325 are set, either both UTFs are supported or both are not supported. */
3327 #ifdef SUPPORT_PCRE8
3328 printf(" 8-bit support\n");
3329 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
3330 printf (" %sUTF-8 support\n", rc ? "" : "No ");
3331 #endif
3332 #ifdef SUPPORT_PCRE16
3333 printf(" 16-bit support\n");
3334 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
3335 printf (" %sUTF-16 support\n", rc ? "" : "No ");
3336 #endif
3337 #ifdef SUPPORT_PCRE32
3338 printf(" 32-bit support\n");
3339 (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
3340 printf (" %sUTF-32 support\n", rc ? "" : "No ");
3341 #endif
3343 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
3344 printf(" %sUnicode properties support\n", rc? "" : "No ");
3345 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
3346 if (rc)
3348 const char *arch;
3349 (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, (void *)(&arch));
3350 printf(" Just-in-time compiler support: %s\n", arch);
3352 else
3353 printf(" No just-in-time compiler support\n");
3354 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
3355 print_newline_config(rc, FALSE);
3356 (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
3357 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
3358 "all Unicode newlines");
3359 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
3360 printf(" Internal link size = %d\n", rc);
3361 (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
3362 printf(" POSIX malloc threshold = %d\n", rc);
3363 (void)PCRE_CONFIG(PCRE_CONFIG_PARENS_LIMIT, &lrc);
3364 printf(" Parentheses nest limit = %ld\n", lrc);
3365 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
3366 printf(" Default match limit = %ld\n", lrc);
3367 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
3368 printf(" Default recursion depth limit = %ld\n", lrc);
3369 (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
3370 printf(" Match recursion uses %s", rc? "stack" : "heap");
3371 if (showstore)
3373 PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0);
3374 printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size);
3376 printf("\n");
3377 goto EXIT;
3379 else if (strcmp(arg, "-help") == 0 ||
3380 strcmp(arg, "--help") == 0)
3382 usage();
3383 goto EXIT;
3385 else
3387 BAD_ARG:
3388 printf("** Unknown or malformed option %s\n", arg);
3389 usage();
3390 yield = 1;
3391 goto EXIT;
3393 op++;
3394 argc--;
3397 /* Get the store for the offsets vector, and remember what it was */
3399 size_offsets_max = size_offsets;
3400 offsets = (int *)malloc(size_offsets_max * sizeof(int));
3401 if (offsets == NULL)
3403 printf("** Failed to get %d bytes of memory for offsets vector\n",
3404 (int)(size_offsets_max * sizeof(int)));
3405 yield = 1;
3406 goto EXIT;
3409 /* Sort out the input and output files */
3411 if (argc > 1)
3413 infile = fopen(argv[op], INPUT_MODE);
3414 if (infile == NULL)
3416 printf("** Failed to open %s\n", argv[op]);
3417 yield = 1;
3418 goto EXIT;
3422 if (argc > 2)
3424 outfile = fopen(argv[op+1], OUTPUT_MODE);
3425 if (outfile == NULL)
3427 printf("** Failed to open %s\n", argv[op+1]);
3428 yield = 1;
3429 goto EXIT;
3433 /* Set alternative malloc function */
3435 #ifdef SUPPORT_PCRE8
3436 pcre_malloc = new_malloc;
3437 pcre_free = new_free;
3438 pcre_stack_malloc = stack_malloc;
3439 pcre_stack_free = stack_free;
3440 #endif
3442 #ifdef SUPPORT_PCRE16
3443 pcre16_malloc = new_malloc;
3444 pcre16_free = new_free;
3445 pcre16_stack_malloc = stack_malloc;
3446 pcre16_stack_free = stack_free;
3447 #endif
3449 #ifdef SUPPORT_PCRE32
3450 pcre32_malloc = new_malloc;
3451 pcre32_free = new_free;
3452 pcre32_stack_malloc = stack_malloc;
3453 pcre32_stack_free = stack_free;
3454 #endif
3456 /* Heading line unless quiet */
3458 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
3460 /* Main loop */
3462 while (!done)
3464 pcre *re = NULL;
3465 pcre_extra *extra = NULL;
3467 #if !defined NOPOSIX /* There are still compilers that require no indent */
3468 regex_t preg = { NULL, 0, 0} ;
3469 int do_posix = 0;
3470 #endif
3472 const char *error;
3473 pcre_uint8 *markptr;
3474 pcre_uint8 *p, *pp, *ppp;
3475 pcre_uint8 *to_file = NULL;
3476 const pcre_uint8 *tables = NULL;
3477 unsigned long int get_options;
3478 unsigned long int true_size, true_study_size = 0;
3479 size_t size;
3480 int do_allcaps = 0;
3481 int do_mark = 0;
3482 int do_study = 0;
3483 int no_force_study = 0;
3484 int do_debug = debug;
3485 int do_G = 0;
3486 int do_g = 0;
3487 int do_showinfo = showinfo;
3488 int do_showrest = 0;
3489 int do_showcaprest = 0;
3490 int do_flip = 0;
3491 int erroroffset, len, delimiter, poffset;
3493 #if !defined NODFA
3494 int dfa_matched = 0;
3495 #endif
3497 use_utf = 0;
3498 debug_lengths = 1;
3499 SET_PCRE_STACK_GUARD(NULL);
3501 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
3502 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
3503 fflush(outfile);
3505 p = buffer;
3506 while (isspace(*p)) p++;
3507 if (*p == 0) continue;
3509 /* Handle option lock-out setting */
3511 if (*p == '<' && p[1] == ' ')
3513 p += 2;
3514 while (isspace(*p)) p++;
3515 if (strncmp((char *)p, "forbid ", 7) == 0)
3517 p += 7;
3518 while (isspace(*p)) p++;
3519 pp = lockout;
3520 while (!isspace(*p) && pp < lockout + sizeof(lockout) - 1)
3521 *pp++ = *p++;
3522 *pp = 0;
3524 else
3526 printf("** Unrecognized special command '%s'\n", p);
3527 yield = 1;
3528 goto EXIT;
3530 continue;
3533 /* See if the pattern is to be loaded pre-compiled from a file. */
3535 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
3537 pcre_uint32 magic;
3538 pcre_uint8 sbuf[8];
3539 FILE *f;
3541 p++;
3542 if (*p == '!')
3544 do_debug = TRUE;
3545 do_showinfo = TRUE;
3546 p++;
3549 pp = p + (int)strlen((char *)p);
3550 while (isspace(pp[-1])) pp--;
3551 *pp = 0;
3553 f = fopen((char *)p, "rb");
3554 if (f == NULL)
3556 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
3557 continue;
3559 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
3561 true_size =
3562 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
3563 true_study_size =
3564 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
3566 re = (pcre *)new_malloc(true_size);
3567 if (re == NULL)
3569 printf("** Failed to get %d bytes of memory for pcre object\n",
3570 (int)true_size);
3571 yield = 1;
3572 goto EXIT;
3574 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
3576 magic = REAL_PCRE_MAGIC(re);
3577 if (magic != MAGIC_NUMBER)
3579 if (swap_uint32(magic) == MAGIC_NUMBER)
3581 do_flip = 1;
3583 else
3585 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
3586 new_free(re);
3587 fclose(f);
3588 continue;
3592 /* We hide the byte-invert info for little and big endian tests. */
3593 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
3594 do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
3596 /* Now see if there is any following study data. */
3598 if (true_study_size != 0)
3600 pcre_study_data *psd;
3602 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
3603 extra->flags = PCRE_EXTRA_STUDY_DATA;
3605 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
3606 extra->study_data = psd;
3608 if (fread(psd, 1, true_study_size, f) != true_study_size)
3610 FAIL_READ:
3611 fprintf(outfile, "Failed to read data from %s\n", p);
3612 if (extra != NULL)
3614 PCRE_FREE_STUDY(extra);
3616 new_free(re);
3617 fclose(f);
3618 continue;
3620 fprintf(outfile, "Study data loaded from %s\n", p);
3621 do_study = 1; /* To get the data output if requested */
3623 else fprintf(outfile, "No study data\n");
3625 /* Flip the necessary bytes. */
3626 if (do_flip)
3628 int rc;
3629 PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
3630 if (rc == PCRE_ERROR_BADMODE)
3632 pcre_uint32 flags_in_host_byte_order;
3633 if (REAL_PCRE_MAGIC(re) == MAGIC_NUMBER)
3634 flags_in_host_byte_order = REAL_PCRE_FLAGS(re);
3635 else
3636 flags_in_host_byte_order = swap_uint32(REAL_PCRE_FLAGS(re));
3637 /* Simulate the result of the function call below. */
3638 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
3639 pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "",
3640 PCRE_INFO_OPTIONS);
3641 fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
3642 "%d-bit mode\n", 8 * CHAR_SIZE, 8 * (flags_in_host_byte_order & PCRE_MODE_MASK));
3643 new_free(re);
3644 fclose(f);
3645 continue;
3649 /* Need to know if UTF-8 for printing data strings. */
3651 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3653 new_free(re);
3654 fclose(f);
3655 continue;
3657 use_utf = (get_options & PCRE_UTF8) != 0;
3659 fclose(f);
3660 goto SHOW_INFO;
3663 /* In-line pattern (the usual case). Get the delimiter and seek the end of
3664 the pattern; if it isn't complete, read more. */
3666 delimiter = *p++;
3668 if (isalnum(delimiter) || delimiter == '\\')
3670 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
3671 goto SKIP_DATA;
3674 pp = p;
3675 poffset = (int)(p - buffer);
3677 for(;;)
3679 while (*pp != 0)
3681 if (*pp == '\\' && pp[1] != 0) pp++;
3682 else if (*pp == delimiter) break;
3683 pp++;
3685 if (*pp != 0) break;
3686 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
3688 fprintf(outfile, "** Unexpected EOF\n");
3689 done = 1;
3690 goto CONTINUE;
3692 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
3695 /* The buffer may have moved while being extended; reset the start of data
3696 pointer to the correct relative point in the buffer. */
3698 p = buffer + poffset;
3700 /* If the first character after the delimiter is backslash, make
3701 the pattern end with backslash. This is purely to provide a way
3702 of testing for the error message when a pattern ends with backslash. */
3704 if (pp[1] == '\\') *pp++ = '\\';
3706 /* Terminate the pattern at the delimiter, and save a copy of the pattern
3707 for callouts. */
3709 *pp++ = 0;
3710 strcpy((char *)pbuffer, (char *)p);
3712 /* Look for modifiers and options after the final delimiter. */
3714 options = default_options;
3715 study_options = force_study_options;
3716 log_store = showstore; /* default from command line */
3718 while (*pp != 0)
3720 /* Check to see whether this modifier has been locked out for this file.
3721 This is complicated for the multi-character options that begin with '<'.
3722 If there is no '>' in the lockout string, all multi-character modifiers are
3723 locked out. */
3725 if (strchr((char *)lockout, *pp) != NULL)
3727 if (*pp == '<' && strchr((char *)lockout, '>') != NULL)
3729 int x = check_mc_option(pp+1, outfile, FALSE, "modifier");
3730 if (x == 0) goto SKIP_DATA;
3732 for (ppp = lockout; *ppp != 0; ppp++)
3734 if (*ppp == '<')
3736 int y = check_mc_option(ppp+1, outfile, FALSE, "modifier");
3737 if (y == 0)
3739 printf("** Error in modifier forbid data - giving up.\n");
3740 yield = 1;
3741 goto EXIT;
3743 if (x == y)
3745 ppp = pp;
3746 while (*ppp != '>') ppp++;
3747 printf("** The %.*s modifier is locked out - giving up.\n",
3748 (int)(ppp - pp + 1), pp);
3749 yield = 1;
3750 goto EXIT;
3756 /* The single-character modifiers are straightforward. */
3758 else
3760 printf("** The /%c modifier is locked out - giving up.\n", *pp);
3761 yield = 1;
3762 goto EXIT;
3766 /* The modifier is not locked out; handle it. */
3768 switch (*pp++)
3770 case 'f': options |= PCRE_FIRSTLINE; break;
3771 case 'g': do_g = 1; break;
3772 case 'i': options |= PCRE_CASELESS; break;
3773 case 'm': options |= PCRE_MULTILINE; break;
3774 case 's': options |= PCRE_DOTALL; break;
3775 case 'x': options |= PCRE_EXTENDED; break;
3777 case '+':
3778 if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
3779 break;
3781 case '=': do_allcaps = 1; break;
3782 case 'A': options |= PCRE_ANCHORED; break;
3783 case 'B': do_debug = 1; break;
3784 case 'C': options |= PCRE_AUTO_CALLOUT; break;
3785 case 'D': do_debug = do_showinfo = 1; break;
3786 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
3787 case 'F': do_flip = 1; break;
3788 case 'G': do_G = 1; break;
3789 case 'I': do_showinfo = 1; break;
3790 case 'J': options |= PCRE_DUPNAMES; break;
3791 case 'K': do_mark = 1; break;
3792 case 'V': options |= PCRE_VERBATIM; break;
3793 case 'M': log_store = 1; break;
3794 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
3795 case 'O': options |= PCRE_NO_AUTO_POSSESS; break;
3797 #if !defined NOPOSIX
3798 case 'P': do_posix += 1; break;
3799 #endif
3801 case 'Q':
3802 switch (*pp)
3804 case '0':
3805 case '1':
3806 stack_guard_return = *pp++ - '0';
3807 break;
3809 default:
3810 fprintf(outfile, "** Missing 0 or 1 after /Q\n");
3811 goto SKIP_DATA;
3813 SET_PCRE_STACK_GUARD(stack_guard);
3814 break;
3816 case 'S':
3817 do_study = 1;
3818 for (;;)
3820 switch (*pp++)
3822 case 'S':
3823 do_study = 0;
3824 no_force_study = 1;
3825 break;
3827 case '!':
3828 study_options |= PCRE_STUDY_EXTRA_NEEDED;
3829 break;
3831 case '+':
3832 if (*pp == '+')
3834 verify_jit = TRUE;
3835 pp++;
3837 if (*pp >= '1' && *pp <= '7')
3838 study_options |= jit_study_bits[*pp++ - '1'];
3839 else
3840 study_options |= jit_study_bits[6];
3841 break;
3843 case '-':
3844 study_options &= ~PCRE_STUDY_ALLJIT;
3845 break;
3847 default:
3848 pp--;
3849 goto ENDLOOP;
3852 ENDLOOP:
3853 break;
3855 case 'U': options |= PCRE_UNGREEDY; break;
3856 case 'W': options |= PCRE_UCP; break;
3857 case 'X': options |= PCRE_EXTRA; break;
3858 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
3859 case 'Z': debug_lengths = 0; break;
3860 case '8': options |= PCRE_UTF8; use_utf = 1; break;
3861 case '9': options |= PCRE_NEVER_UTF; break;
3862 case '?': options |= PCRE_NO_UTF8_CHECK; break;
3864 case 'T':
3865 switch (*pp++)
3867 case '0': tables = tables0; break;
3868 case '1': tables = tables1; break;
3870 case '\r':
3871 case '\n':
3872 case ' ':
3873 case 0:
3874 fprintf(outfile, "** Missing table number after /T\n");
3875 goto SKIP_DATA;
3877 default:
3878 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
3879 goto SKIP_DATA;
3881 break;
3883 case 'L':
3884 ppp = pp;
3885 /* The '\r' test here is so that it works on Windows. */
3886 /* The '0' test is just in case this is an unterminated line. */
3887 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
3888 *ppp = 0;
3889 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
3891 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
3892 goto SKIP_DATA;
3894 locale_set = 1;
3895 tables = PCRE_MAKETABLES;
3896 pp = ppp;
3897 break;
3899 case '>':
3900 to_file = pp;
3901 while (*pp != 0) pp++;
3902 while (isspace(pp[-1])) pp--;
3903 *pp = 0;
3904 break;
3906 case '<':
3908 int x = check_mc_option(pp, outfile, FALSE, "modifier");
3909 if (x == 0) goto SKIP_DATA;
3910 options |= x;
3911 while (*pp++ != '>');
3913 break;
3915 case '\r': /* So that it works in Windows */
3916 case '\n':
3917 case ' ':
3918 break;
3920 default:
3921 fprintf(outfile, "** Unknown modifier '%c'\n", pp[-1]);
3922 goto SKIP_DATA;
3926 /* Handle compiling via the POSIX interface, which doesn't support the
3927 timing, showing, or debugging options, nor the ability to pass over
3928 local character tables. Neither does it have 16-bit support. */
3930 #if !defined NOPOSIX
3931 if (posix || do_posix)
3933 int rc;
3934 cflags = do_posix < 2 ? REG_EXTENDED : REG_BASIC;
3936 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
3937 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
3938 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
3939 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
3940 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
3941 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
3942 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
3944 rc = regcomp(&preg, (char *)p, cflags);
3946 /* Compilation failed; go back for another re, skipping to blank line
3947 if non-interactive. */
3949 if (rc != 0)
3951 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
3952 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
3953 goto SKIP_DATA;
3957 /* Handle compiling via the native interface */
3959 else
3960 #endif /* !defined NOPOSIX */
3963 /* In 16- or 32-bit mode, convert the input. */
3965 #ifdef SUPPORT_PCRE16
3966 if (pcre_mode == PCRE16_MODE)
3968 switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
3970 case -1:
3971 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3972 "converted to UTF-16\n");
3973 goto SKIP_DATA;
3975 case -2:
3976 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3977 "cannot be converted to UTF-16\n");
3978 goto SKIP_DATA;
3980 case -3: /* "Impossible error" when to16 is called arg1 FALSE */
3981 fprintf(outfile, "**Failed: character value greater than 0xffff "
3982 "cannot be converted to 16-bit in non-UTF mode\n");
3983 goto SKIP_DATA;
3985 default:
3986 break;
3988 p = (pcre_uint8 *)buffer16;
3990 #endif
3992 #ifdef SUPPORT_PCRE32
3993 if (pcre_mode == PCRE32_MODE)
3995 switch(to32(FALSE, p, options & PCRE_UTF32, (int)strlen((char *)p)))
3997 case -1:
3998 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3999 "converted to UTF-32\n");
4000 goto SKIP_DATA;
4002 case -2:
4003 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
4004 "cannot be converted to UTF-32\n");
4005 goto SKIP_DATA;
4007 case -3:
4008 fprintf(outfile, "**Failed: character value is ill-formed UTF-32\n");
4009 goto SKIP_DATA;
4011 default:
4012 break;
4014 p = (pcre_uint8 *)buffer32;
4016 #endif
4018 /* Compile many times when timing */
4020 if (timeit > 0)
4022 register int i;
4023 clock_t time_taken;
4024 clock_t start_time = clock();
4025 for (i = 0; i < timeit; i++)
4027 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
4028 if (re != NULL) free(re);
4030 total_compile_time += (time_taken = clock() - start_time);
4031 fprintf(outfile, "Compile time %.4f milliseconds\n",
4032 (((double)time_taken * 1000.0) / (double)timeit) /
4033 (double)CLOCKS_PER_SEC);
4036 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
4038 /* Compilation failed; go back for another re, skipping to blank line
4039 if non-interactive. */
4041 if (re == NULL)
4043 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
4044 SKIP_DATA:
4045 if (infile != stdin)
4047 for (;;)
4049 if (extend_inputline(infile, buffer, NULL) == NULL)
4051 done = 1;
4052 goto CONTINUE;
4054 len = (int)strlen((char *)buffer);
4055 while (len > 0 && isspace(buffer[len-1])) len--;
4056 if (len == 0) break;
4058 fprintf(outfile, "\n");
4060 goto CONTINUE;
4063 /* Compilation succeeded. It is now possible to set the UTF-8 option from
4064 within the regex; check for this so that we know how to process the data
4065 lines. */
4067 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
4068 goto SKIP_DATA;
4069 if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
4071 /* Extract the size for possible writing before possibly flipping it,
4072 and remember the store that was got. */
4074 true_size = REAL_PCRE_SIZE(re);
4076 /* Output code size information if requested */
4078 if (log_store)
4080 int name_count, name_entry_size, real_pcre_size;
4082 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &name_count);
4083 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &name_entry_size);
4084 real_pcre_size = 0;
4085 #ifdef SUPPORT_PCRE8
4086 if (REAL_PCRE_FLAGS(re) & PCRE_MODE8)
4087 real_pcre_size = sizeof(real_pcre);
4088 #endif
4089 #ifdef SUPPORT_PCRE16
4090 if (REAL_PCRE_FLAGS(re) & PCRE_MODE16)
4091 real_pcre_size = sizeof(real_pcre16);
4092 #endif
4093 #ifdef SUPPORT_PCRE32
4094 if (REAL_PCRE_FLAGS(re) & PCRE_MODE32)
4095 real_pcre_size = sizeof(real_pcre32);
4096 #endif
4097 new_info(re, NULL, PCRE_INFO_SIZE, &size);
4098 fprintf(outfile, "Memory allocation (code space): %d\n",
4099 (int)(size - real_pcre_size - name_count * name_entry_size));
4102 /* If -s or /S was present, study the regex to generate additional info to
4103 help with the matching, unless the pattern has the SS option, which
4104 suppresses the effect of /S (used for a few test patterns where studying is
4105 never sensible). */
4107 if (do_study || (force_study >= 0 && !no_force_study))
4109 if (timeit > 0)
4111 register int i;
4112 clock_t time_taken;
4113 clock_t start_time = clock();
4114 for (i = 0; i < timeit; i++)
4116 PCRE_STUDY(extra, re, study_options, &error);
4118 total_study_time = (time_taken = clock() - start_time);
4119 if (extra != NULL)
4121 PCRE_FREE_STUDY(extra);
4123 fprintf(outfile, " Study time %.4f milliseconds\n",
4124 (((double)time_taken * 1000.0) / (double)timeit) /
4125 (double)CLOCKS_PER_SEC);
4127 PCRE_STUDY(extra, re, study_options, &error);
4128 if (error != NULL)
4129 fprintf(outfile, "Failed to study: %s\n", error);
4130 else if (extra != NULL)
4132 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
4133 if (log_store)
4135 size_t jitsize;
4136 if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
4137 jitsize != 0)
4138 fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
4143 /* If /K was present, we set up for handling MARK data. */
4145 if (do_mark)
4147 if (extra == NULL)
4149 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4150 extra->flags = 0;
4152 extra->mark = &markptr;
4153 extra->flags |= PCRE_EXTRA_MARK;
4156 /* Extract and display information from the compiled data if required. */
4158 SHOW_INFO:
4160 if (do_debug)
4162 fprintf(outfile, "------------------------------------------------------------------\n");
4163 PCRE_PRINTINT(re, outfile, debug_lengths);
4166 /* We already have the options in get_options (see above) */
4168 if (do_showinfo)
4170 unsigned long int all_options;
4171 pcre_uint32 first_char, need_char;
4172 pcre_uint32 match_limit, recursion_limit;
4173 int count, backrefmax, first_char_set, need_char_set, okpartial, jchanged,
4174 hascrorlf, maxlookbehind, match_empty;
4175 int nameentrysize, namecount;
4176 const pcre_uint8 *nametable;
4178 if (new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
4179 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
4180 new_info(re, NULL, PCRE_INFO_FIRSTCHARACTER, &first_char) +
4181 new_info(re, NULL, PCRE_INFO_FIRSTCHARACTERFLAGS, &first_char_set) +
4182 new_info(re, NULL, PCRE_INFO_REQUIREDCHAR, &need_char) +
4183 new_info(re, NULL, PCRE_INFO_REQUIREDCHARFLAGS, &need_char_set) +
4184 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
4185 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
4186 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
4187 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
4188 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
4189 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf) +
4190 new_info(re, NULL, PCRE_INFO_MATCH_EMPTY, &match_empty) +
4191 new_info(re, NULL, PCRE_INFO_MAXLOOKBEHIND, &maxlookbehind)
4192 != 0)
4193 goto SKIP_DATA;
4195 fprintf(outfile, "Capturing subpattern count = %d\n", count);
4197 if (backrefmax > 0)
4198 fprintf(outfile, "Max back reference = %d\n", backrefmax);
4200 if (maxlookbehind > 0)
4201 fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
4203 if (new_info(re, NULL, PCRE_INFO_MATCHLIMIT, &match_limit) == 0)
4204 fprintf(outfile, "Match limit = %u\n", match_limit);
4206 if (new_info(re, NULL, PCRE_INFO_RECURSIONLIMIT, &recursion_limit) == 0)
4207 fprintf(outfile, "Recursion limit = %u\n", recursion_limit);
4209 if (namecount > 0)
4211 fprintf(outfile, "Named capturing subpatterns:\n");
4212 while (namecount-- > 0)
4214 int imm2_size = pcre_mode == PCRE8_MODE ? 2 : 1;
4215 int length = (int)STRLEN(nametable + imm2_size);
4216 fprintf(outfile, " ");
4217 PCHARSV(nametable, imm2_size, length, outfile);
4218 while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
4219 #ifdef SUPPORT_PCRE32
4220 if (pcre_mode == PCRE32_MODE)
4221 fprintf(outfile, "%3d\n", (int)(((PCRE_SPTR32)nametable)[0]));
4222 #endif
4223 #ifdef SUPPORT_PCRE16
4224 if (pcre_mode == PCRE16_MODE)
4225 fprintf(outfile, "%3d\n", (int)(((PCRE_SPTR16)nametable)[0]));
4226 #endif
4227 #ifdef SUPPORT_PCRE8
4228 if (pcre_mode == PCRE8_MODE)
4229 fprintf(outfile, "%3d\n", ((int)nametable[0] << 8) | (int)nametable[1]);
4230 #endif
4231 nametable += nameentrysize * CHAR_SIZE;
4235 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
4236 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
4237 if (match_empty) fprintf(outfile, "May match empty string\n");
4239 all_options = REAL_PCRE_OPTIONS(re);
4240 if (do_flip) all_options = swap_uint32(all_options);
4242 if (get_options == 0) fprintf(outfile, "No options\n");
4243 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
4244 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
4245 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
4246 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
4247 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
4248 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
4249 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
4250 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
4251 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
4252 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
4253 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
4254 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
4255 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
4256 ((get_options & PCRE_NO_AUTO_POSSESS) != 0)? " no_auto_possessify" : "",
4257 ((get_options & PCRE_UTF8) != 0)? " utf" : "",
4258 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
4259 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
4260 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
4261 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "",
4262 ((get_options & PCRE_NEVER_UTF) != 0)? " never_utf" : "");
4264 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
4266 switch (get_options & PCRE_NEWLINE_BITS)
4268 case PCRE_NEWLINE_CR:
4269 fprintf(outfile, "Forced newline sequence: CR\n");
4270 break;
4272 case PCRE_NEWLINE_LF:
4273 fprintf(outfile, "Forced newline sequence: LF\n");
4274 break;
4276 case PCRE_NEWLINE_CRLF:
4277 fprintf(outfile, "Forced newline sequence: CRLF\n");
4278 break;
4280 case PCRE_NEWLINE_ANYCRLF:
4281 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
4282 break;
4284 case PCRE_NEWLINE_ANY:
4285 fprintf(outfile, "Forced newline sequence: ANY\n");
4286 break;
4288 default:
4289 break;
4292 if (first_char_set == 2)
4294 fprintf(outfile, "First char at start or follows newline\n");
4296 else if (first_char_set == 1)
4298 const char *caseless =
4299 ((REAL_PCRE_FLAGS(re) & PCRE_FCH_CASELESS) == 0)?
4300 "" : " (caseless)";
4302 if (PRINTOK(first_char))
4303 fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
4304 else
4306 fprintf(outfile, "First char = ");
4307 pchar(first_char, outfile);
4308 fprintf(outfile, "%s\n", caseless);
4311 else
4313 fprintf(outfile, "No first char\n");
4316 if (need_char_set == 0)
4318 fprintf(outfile, "No need char\n");
4320 else
4322 const char *caseless =
4323 ((REAL_PCRE_FLAGS(re) & PCRE_RCH_CASELESS) == 0)?
4324 "" : " (caseless)";
4326 if (PRINTOK(need_char))
4327 fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
4328 else
4330 fprintf(outfile, "Need char = ");
4331 pchar(need_char, outfile);
4332 fprintf(outfile, "%s\n", caseless);
4336 /* Don't output study size; at present it is in any case a fixed
4337 value, but it varies, depending on the computer architecture, and
4338 so messes up the test suite. (And with the /F option, it might be
4339 flipped.) If study was forced by an external -s, don't show this
4340 information unless -i or -d was also present. This means that, except
4341 when auto-callouts are involved, the output from runs with and without
4342 -s should be identical. */
4344 if (do_study || (force_study >= 0 && showinfo && !no_force_study))
4346 if (extra == NULL)
4347 fprintf(outfile, "Study returned NULL\n");
4348 else
4350 pcre_uint8 *start_bits = NULL;
4351 int minlength;
4353 if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
4354 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
4356 if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
4358 if (start_bits == NULL)
4359 fprintf(outfile, "No starting char list\n");
4360 else
4362 int i;
4363 int c = 24;
4364 fprintf(outfile, "Starting chars: ");
4365 for (i = 0; i < 256; i++)
4367 if ((start_bits[i/8] & (1<<(i&7))) != 0)
4369 if (c > 75)
4371 fprintf(outfile, "\n ");
4372 c = 2;
4374 if (PRINTOK(i) && i != ' ')
4376 fprintf(outfile, "%c ", i);
4377 c += 2;
4379 else
4381 fprintf(outfile, "\\x%02x ", i);
4382 c += 5;
4386 fprintf(outfile, "\n");
4391 /* Show this only if the JIT was set by /S, not by -s. */
4393 if ((study_options & PCRE_STUDY_ALLJIT) != 0 &&
4394 (force_study_options & PCRE_STUDY_ALLJIT) == 0)
4396 int jit;
4397 if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
4399 if (jit)
4400 fprintf(outfile, "JIT study was successful\n");
4401 else
4402 #ifdef SUPPORT_JIT
4403 fprintf(outfile, "JIT study was not successful\n");
4404 #else
4405 fprintf(outfile, "JIT support is not available in this version of PCRE\n");
4406 #endif
4412 /* If the '>' option was present, we write out the regex to a file, and
4413 that is all. The first 8 bytes of the file are the regex length and then
4414 the study length, in big-endian order. */
4416 if (to_file != NULL)
4418 FILE *f = fopen((char *)to_file, "wb");
4419 if (f == NULL)
4421 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
4423 else
4425 pcre_uint8 sbuf[8];
4427 if (do_flip) regexflip(re, extra);
4428 sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
4429 sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
4430 sbuf[2] = (pcre_uint8)((true_size >> 8) & 255);
4431 sbuf[3] = (pcre_uint8)((true_size) & 255);
4432 sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
4433 sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
4434 sbuf[6] = (pcre_uint8)((true_study_size >> 8) & 255);
4435 sbuf[7] = (pcre_uint8)((true_study_size) & 255);
4437 if (fwrite(sbuf, 1, 8, f) < 8 ||
4438 fwrite(re, 1, true_size, f) < true_size)
4440 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
4442 else
4444 fprintf(outfile, "Compiled pattern written to %s\n", to_file);
4446 /* If there is study data, write it. */
4448 if (extra != NULL && (extra->flags & PCRE_EXTRA_STUDY_DATA) != 0)
4450 if (fwrite(extra->study_data, 1, true_study_size, f) <
4451 true_study_size)
4453 fprintf(outfile, "Write error on %s: %s\n", to_file,
4454 strerror(errno));
4456 else fprintf(outfile, "Study data written to %s\n", to_file);
4459 fclose(f);
4462 new_free(re);
4463 if (extra != NULL)
4465 PCRE_FREE_STUDY(extra);
4467 if (locale_set)
4469 new_free((void *)tables);
4470 setlocale(LC_CTYPE, "C");
4471 locale_set = 0;
4473 continue; /* With next regex */
4475 } /* End of non-POSIX compile */
4477 /* Read data lines and test them */
4479 for (;;)
4481 #ifdef SUPPORT_PCRE8
4482 pcre_uint8 *q8;
4483 #endif
4484 #ifdef SUPPORT_PCRE16
4485 pcre_uint16 *q16;
4486 #endif
4487 #ifdef SUPPORT_PCRE32
4488 pcre_uint32 *q32;
4489 #endif
4490 pcre_uint8 *bptr;
4491 int *use_offsets = offsets;
4492 int use_size_offsets = size_offsets;
4493 int callout_data = 0;
4494 int callout_data_set = 0;
4495 int count;
4496 pcre_uint32 c;
4497 int copystrings = 0;
4498 int find_match_limit = default_find_match_limit;
4499 int getstrings = 0;
4500 int getlist = 0;
4501 int gmatched = 0;
4502 int start_offset = 0;
4503 int start_offset_sign = 1;
4504 int g_notempty = 0;
4505 int use_dfa = 0;
4507 *copynames = 0;
4508 *getnames = 0;
4510 #ifdef SUPPORT_PCRE32
4511 cn32ptr = copynames;
4512 gn32ptr = getnames;
4513 #endif
4514 #ifdef SUPPORT_PCRE16
4515 cn16ptr = copynames16;
4516 gn16ptr = getnames16;
4517 #endif
4518 #ifdef SUPPORT_PCRE8
4519 cn8ptr = copynames8;
4520 gn8ptr = getnames8;
4521 #endif
4523 SET_PCRE_CALLOUT(callout);
4524 first_callout = 1;
4525 last_callout_mark = NULL;
4526 callout_extra = 0;
4527 callout_count = 0;
4528 callout_fail_count = 999999;
4529 callout_fail_id = -1;
4530 show_malloc = 0;
4531 options = 0;
4533 if (extra != NULL) extra->flags &=
4534 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
4536 len = 0;
4537 for (;;)
4539 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
4541 if (len > 0) /* Reached EOF without hitting a newline */
4543 fprintf(outfile, "\n");
4544 break;
4546 done = 1;
4547 goto CONTINUE;
4549 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
4550 len = (int)strlen((char *)buffer);
4551 if (buffer[len-1] == '\n') break;
4554 while (len > 0 && isspace(buffer[len-1])) len--;
4555 buffer[len] = 0;
4556 if (len == 0) break;
4558 p = buffer;
4559 while (isspace(*p)) p++;
4561 #ifndef NOUTF
4562 /* Check that the data is well-formed UTF-8 if we're in UTF mode. To create
4563 invalid input to pcre_exec, you must use \x?? or \x{} sequences. */
4565 if (use_utf)
4567 pcre_uint8 *q;
4568 pcre_uint32 cc;
4569 int n = 1;
4571 for (q = p; n > 0 && *q; q += n) n = utf82ord(q, &cc);
4572 if (n <= 0)
4574 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be used as input in UTF mode\n");
4575 goto NEXT_DATA;
4578 #endif
4580 #ifdef SUPPORT_VALGRIND
4581 /* Mark the dbuffer as addressable but undefined again. */
4583 if (dbuffer != NULL)
4585 VALGRIND_MAKE_MEM_UNDEFINED(dbuffer, dbuffer_size * CHAR_SIZE);
4587 #endif
4589 /* Allocate a buffer to hold the data line; len+1 is an upper bound on
4590 the number of pcre_uchar units that will be needed. */
4592 while (dbuffer == NULL || (size_t)len >= dbuffer_size)
4594 dbuffer_size *= 2;
4595 dbuffer = (pcre_uint8 *)realloc(dbuffer, dbuffer_size * CHAR_SIZE);
4596 if (dbuffer == NULL)
4598 fprintf(stderr, "pcretest: realloc(%d) failed\n", (int)dbuffer_size);
4599 exit(1);
4603 #ifdef SUPPORT_PCRE8
4604 q8 = (pcre_uint8 *) dbuffer;
4605 #endif
4606 #ifdef SUPPORT_PCRE16
4607 q16 = (pcre_uint16 *) dbuffer;
4608 #endif
4609 #ifdef SUPPORT_PCRE32
4610 q32 = (pcre_uint32 *) dbuffer;
4611 #endif
4613 while ((c = *p++) != 0)
4615 int i = 0;
4616 int n = 0;
4618 /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
4619 In non-UTF mode, allow the value of the byte to fall through to later,
4620 where values greater than 127 are turned into UTF-8 when running in
4621 16-bit or 32-bit mode. */
4623 if (c != '\\')
4625 #ifndef NOUTF
4626 if (use_utf && HASUTF8EXTRALEN(c)) { GETUTF8INC(c, p); }
4627 #endif
4630 /* Handle backslash escapes */
4632 else switch ((c = *p++))
4634 case 'a': c = CHAR_BEL; break;
4635 case 'b': c = '\b'; break;
4636 case 'e': c = CHAR_ESC; break;
4637 case 'f': c = '\f'; break;
4638 case 'n': c = '\n'; break;
4639 case 'r': c = '\r'; break;
4640 case 't': c = '\t'; break;
4641 case 'v': c = '\v'; break;
4643 case '0': case '1': case '2': case '3':
4644 case '4': case '5': case '6': case '7':
4645 c -= '0';
4646 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
4647 c = c * 8 + *p++ - '0';
4648 break;
4650 case 'o':
4651 if (*p == '{')
4653 pcre_uint8 *pt = p;
4654 c = 0;
4655 for (pt++; isdigit(*pt) && *pt != '8' && *pt != '9'; pt++)
4657 if (++i == 12)
4658 fprintf(outfile, "** Too many octal digits in \\o{...} item; "
4659 "using only the first twelve.\n");
4660 else c = c * 8 + *pt - '0';
4662 if (*pt == '}') p = pt + 1;
4663 else fprintf(outfile, "** Missing } after \\o{ (assumed)\n");
4665 break;
4667 case 'x':
4668 if (*p == '{')
4670 pcre_uint8 *pt = p;
4671 c = 0;
4673 /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
4674 when isxdigit() is a macro that refers to its argument more than
4675 once. This is banned by the C Standard, but apparently happens in at
4676 least one MacOS environment. */
4678 for (pt++; isxdigit(*pt); pt++)
4680 if (++i == 9)
4681 fprintf(outfile, "** Too many hex digits in \\x{...} item; "
4682 "using only the first eight.\n");
4683 else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
4685 if (*pt == '}')
4687 p = pt + 1;
4688 break;
4690 /* Not correct form for \x{...}; fall through */
4693 /* \x without {} always defines just one byte in 8-bit mode. This
4694 allows UTF-8 characters to be constructed byte by byte, and also allows
4695 invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
4696 Otherwise, pass it down to later code so that it can be turned into
4697 UTF-8 when running in 16/32-bit mode. */
4699 c = 0;
4700 while (i++ < 2 && isxdigit(*p))
4702 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
4703 p++;
4705 #if !defined NOUTF && defined SUPPORT_PCRE8
4706 if (use_utf && (pcre_mode == PCRE8_MODE))
4708 *q8++ = c;
4709 continue;
4711 #endif
4712 break;
4714 case 0: /* \ followed by EOF allows for an empty line */
4715 p--;
4716 continue;
4718 case '>':
4719 if (*p == '-')
4721 start_offset_sign = -1;
4722 p++;
4724 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
4725 start_offset *= start_offset_sign;
4726 continue;
4728 case 'A': /* Option setting */
4729 options |= PCRE_ANCHORED;
4730 continue;
4732 case 'B':
4733 options |= PCRE_NOTBOL;
4734 continue;
4736 case 'C':
4737 if (isdigit(*p)) /* Set copy string */
4739 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4740 copystrings |= 1U << n;
4742 else if (isalnum(*p))
4744 READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, &cn32ptr, re);
4746 else if (*p == '+')
4748 callout_extra = 1;
4749 p++;
4751 else if (*p == '-')
4753 SET_PCRE_CALLOUT(NULL);
4754 p++;
4756 else if (*p == '!')
4758 callout_fail_id = 0;
4759 p++;
4760 while(isdigit(*p))
4761 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
4762 callout_fail_count = 0;
4763 if (*p == '!')
4765 p++;
4766 while(isdigit(*p))
4767 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
4770 else if (*p == '*')
4772 int sign = 1;
4773 callout_data = 0;
4774 if (*(++p) == '-') { sign = -1; p++; }
4775 while(isdigit(*p))
4776 callout_data = callout_data * 10 + *p++ - '0';
4777 callout_data *= sign;
4778 callout_data_set = 1;
4780 continue;
4782 #if !defined NODFA
4783 case 'D':
4784 #if !defined NOPOSIX
4785 if (posix || do_posix)
4786 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
4787 else
4788 #endif
4789 use_dfa = 1;
4790 continue;
4791 #endif
4793 #if !defined NODFA
4794 case 'F':
4795 options |= PCRE_DFA_SHORTEST;
4796 continue;
4797 #endif
4799 case 'G':
4800 if (isdigit(*p))
4802 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4803 getstrings |= 1U << n;
4805 else if (isalnum(*p))
4807 READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, &gn32ptr, re);
4809 continue;
4811 case 'J':
4812 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4813 if (extra != NULL
4814 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
4815 && extra->executable_jit != NULL)
4817 if (jit_stack != NULL) { PCRE_JIT_STACK_FREE(jit_stack); }
4818 jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024);
4819 PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack);
4821 continue;
4823 case 'L':
4824 getlist = 1;
4825 continue;
4827 case 'M':
4828 find_match_limit = 1;
4829 continue;
4831 case 'N':
4832 if ((options & PCRE_NOTEMPTY) != 0)
4833 options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
4834 else
4835 options |= PCRE_NOTEMPTY;
4836 continue;
4838 case 'O':
4839 while(isdigit(*p))
4841 if (n > (INT_MAX-10)/10) /* Hack to stop fuzzers */
4843 printf("** \\O argument is too big\n");
4844 yield = 1;
4845 goto EXIT;
4847 n = n * 10 + *p++ - '0';
4849 if (n > size_offsets_max)
4851 size_offsets_max = n;
4852 free(offsets);
4853 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
4854 if (offsets == NULL)
4856 printf("** Failed to get %d bytes of memory for offsets vector\n",
4857 (int)(size_offsets_max * sizeof(int)));
4858 yield = 1;
4859 goto EXIT;
4862 use_size_offsets = n;
4863 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
4864 else use_offsets = offsets + size_offsets_max - n; /* To catch overruns */
4865 continue;
4867 case 'P':
4868 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
4869 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
4870 continue;
4872 case 'Q':
4873 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4874 if (extra == NULL)
4876 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4877 extra->flags = 0;
4879 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
4880 extra->match_limit_recursion = n;
4881 continue;
4883 case 'q':
4884 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4885 if (extra == NULL)
4887 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4888 extra->flags = 0;
4890 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
4891 extra->match_limit = n;
4892 continue;
4894 #if !defined NODFA
4895 case 'R':
4896 options |= PCRE_DFA_RESTART;
4897 continue;
4898 #endif
4900 case 'S':
4901 show_malloc = 1;
4902 continue;
4904 case 'Y':
4905 options |= PCRE_NO_START_OPTIMIZE;
4906 continue;
4908 case 'Z':
4909 options |= PCRE_NOTEOL;
4910 continue;
4912 case '?':
4913 options |= PCRE_NO_UTF8_CHECK;
4914 continue;
4916 case '<':
4918 int x = check_mc_option(p, outfile, TRUE, "escape sequence");
4919 if (x == 0) goto NEXT_DATA;
4920 options |= x;
4921 while (*p++ != '>');
4923 continue;
4926 /* We now have a character value in c that may be greater than 255.
4927 In 8-bit mode we convert to UTF-8 if we are in UTF mode. Values greater
4928 than 127 in UTF mode must have come from \x{...} or octal constructs
4929 because values from \x.. get this far only in non-UTF mode. */
4931 #ifdef SUPPORT_PCRE8
4932 if (pcre_mode == PCRE8_MODE)
4934 #ifndef NOUTF
4935 if (use_utf)
4937 if (c > 0x7fffffff)
4939 fprintf(outfile, "** Character \\x{%x} is greater than 0x7fffffff "
4940 "and so cannot be converted to UTF-8\n", c);
4941 goto NEXT_DATA;
4943 q8 += ord2utf8(c, q8);
4945 else
4946 #endif
4948 if (c > 0xffu)
4950 fprintf(outfile, "** Character \\x{%x} is greater than 255 "
4951 "and UTF-8 mode is not enabled.\n", c);
4952 fprintf(outfile, "** Truncation will probably give the wrong "
4953 "result.\n");
4955 *q8++ = c;
4958 #endif
4959 #ifdef SUPPORT_PCRE16
4960 if (pcre_mode == PCRE16_MODE)
4962 #ifndef NOUTF
4963 if (use_utf)
4965 if (c > 0x10ffffu)
4967 fprintf(outfile, "** Failed: character \\x{%x} is greater than "
4968 "0x10ffff and so cannot be converted to UTF-16\n", c);
4969 goto NEXT_DATA;
4971 else if (c >= 0x10000u)
4973 c-= 0x10000u;
4974 *q16++ = 0xD800 | (c >> 10);
4975 *q16++ = 0xDC00 | (c & 0x3ff);
4977 else
4978 *q16++ = c;
4980 else
4981 #endif
4983 if (c > 0xffffu)
4985 fprintf(outfile, "** Character \\x{%x} is greater than 0xffff "
4986 "and UTF-16 mode is not enabled.\n", c);
4987 fprintf(outfile, "** Truncation will probably give the wrong "
4988 "result.\n");
4991 *q16++ = c;
4994 #endif
4995 #ifdef SUPPORT_PCRE32
4996 if (pcre_mode == PCRE32_MODE)
4998 *q32++ = c;
5000 #endif
5004 /* Reached end of subject string */
5006 #ifdef SUPPORT_PCRE8
5007 if (pcre_mode == PCRE8_MODE)
5009 *q8 = 0;
5010 len = (int)(q8 - (pcre_uint8 *)dbuffer);
5012 #endif
5013 #ifdef SUPPORT_PCRE16
5014 if (pcre_mode == PCRE16_MODE)
5016 *q16 = 0;
5017 len = (int)(q16 - (pcre_uint16 *)dbuffer);
5019 #endif
5020 #ifdef SUPPORT_PCRE32
5021 if (pcre_mode == PCRE32_MODE)
5023 *q32 = 0;
5024 len = (int)(q32 - (pcre_uint32 *)dbuffer);
5026 #endif
5028 /* If we're compiling with explicit valgrind support, Mark the data from after
5029 its end to the end of the buffer as unaddressable, so that a read over the end
5030 of the buffer will be seen by valgrind, even if it doesn't cause a crash.
5031 If we're not building with valgrind support, at least move the data to the end
5032 of the buffer so that it might at least cause a crash.
5033 If we are using the POSIX interface, we must include the terminating zero. */
5035 bptr = dbuffer;
5037 #if !defined NOPOSIX
5038 if (posix || do_posix)
5040 #ifdef SUPPORT_VALGRIND
5041 VALGRIND_MAKE_MEM_NOACCESS(dbuffer + len + 1, dbuffer_size - (len + 1));
5042 #else
5043 memmove(bptr + dbuffer_size - len - 1, bptr, len + 1);
5044 bptr += dbuffer_size - len - 1;
5045 #endif
5047 else
5048 #endif
5050 #ifdef SUPPORT_VALGRIND
5051 VALGRIND_MAKE_MEM_NOACCESS(dbuffer + len * CHAR_SIZE, (dbuffer_size - len) * CHAR_SIZE);
5052 #else
5053 bptr = memmove(bptr + (dbuffer_size - len) * CHAR_SIZE, bptr, len * CHAR_SIZE);
5054 #endif
5057 if ((all_use_dfa || use_dfa) && find_match_limit)
5059 printf("** Match limit not relevant for DFA matching: ignored\n");
5060 find_match_limit = 0;
5063 /* Handle matching via the POSIX interface, which does not
5064 support timing or playing with the match limit or callout data. */
5066 #if !defined NOPOSIX
5067 if (posix || do_posix)
5069 int rc;
5070 int eflags = 0;
5071 regmatch_t *pmatch = NULL;
5072 if (use_size_offsets > 0)
5073 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
5074 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
5075 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
5076 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
5078 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
5080 if (rc != 0)
5082 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
5083 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
5085 else if ((cflags & REG_NOSUB) != 0)
5087 fprintf(outfile, "Matched with REG_NOSUB\n");
5089 else
5091 size_t i;
5092 for (i = 0; i < (size_t)use_size_offsets; i++)
5094 if (pmatch[i].rm_so >= 0)
5096 fprintf(outfile, "%2d: ", (int)i);
5097 PCHARSV(dbuffer, pmatch[i].rm_so,
5098 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
5099 fprintf(outfile, "\n");
5100 if (do_showcaprest || (i == 0 && do_showrest))
5102 fprintf(outfile, "%2d+ ", (int)i);
5103 PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
5104 outfile);
5105 fprintf(outfile, "\n");
5110 free(pmatch);
5111 goto NEXT_DATA;
5114 #endif /* !defined NOPOSIX */
5116 /* Handle matching via the native interface - repeats for /g and /G */
5118 /* Ensure that there is a JIT callback if we want to verify that JIT was
5119 actually used. If jit_stack == NULL, no stack has yet been assigned. */
5121 if (verify_jit && jit_stack == NULL && extra != NULL)
5122 { PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack); }
5124 for (;; gmatched++) /* Loop for /g or /G */
5126 markptr = NULL;
5127 jit_was_used = FALSE;
5129 if (timeitm > 0)
5131 register int i;
5132 clock_t time_taken;
5133 clock_t start_time = clock();
5135 #if !defined NODFA
5136 if (all_use_dfa || use_dfa)
5138 if ((options & PCRE_DFA_RESTART) != 0)
5140 fprintf(outfile, "Timing DFA restarts is not supported\n");
5141 break;
5143 if (dfa_workspace == NULL)
5144 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
5145 for (i = 0; i < timeitm; i++)
5147 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
5148 (options | g_notempty), use_offsets, use_size_offsets,
5149 dfa_workspace, DFA_WS_DIMENSION);
5152 else
5153 #endif
5155 for (i = 0; i < timeitm; i++)
5157 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
5158 (options | g_notempty), use_offsets, use_size_offsets);
5160 total_match_time += (time_taken = clock() - start_time);
5161 fprintf(outfile, "Execute time %.4f milliseconds\n",
5162 (((double)time_taken * 1000.0) / (double)timeitm) /
5163 (double)CLOCKS_PER_SEC);
5166 /* If find_match_limit is set, we want to do repeated matches with
5167 varying limits in order to find the minimum value for the match limit and
5168 for the recursion limit. The match limits are relevant only to the normal
5169 running of pcre_exec(), so disable the JIT optimization. This makes it
5170 possible to run the same set of tests with and without JIT externally
5171 requested. */
5173 if (find_match_limit)
5175 if (extra != NULL) { PCRE_FREE_STUDY(extra); }
5176 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
5177 extra->flags = 0;
5179 (void)check_match_limit(re, extra, bptr, len, start_offset,
5180 options|g_notempty, use_offsets, use_size_offsets,
5181 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
5182 PCRE_ERROR_MATCHLIMIT, "match()");
5184 count = check_match_limit(re, extra, bptr, len, start_offset,
5185 options|g_notempty, use_offsets, use_size_offsets,
5186 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
5187 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
5190 /* If callout_data is set, use the interface with additional data */
5192 else if (callout_data_set)
5194 if (extra == NULL)
5196 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
5197 extra->flags = 0;
5199 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
5200 extra->callout_data = &callout_data;
5201 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
5202 options | g_notempty, use_offsets, use_size_offsets);
5203 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
5206 /* The normal case is just to do the match once, with the default
5207 value of match_limit. */
5209 #if !defined NODFA
5210 else if (all_use_dfa || use_dfa)
5212 if (dfa_workspace == NULL)
5213 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
5214 if (dfa_matched++ == 0)
5215 dfa_workspace[0] = -1; /* To catch bad restart */
5216 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
5217 (options | g_notempty), use_offsets, use_size_offsets, dfa_workspace,
5218 DFA_WS_DIMENSION);
5219 if (count == 0)
5221 fprintf(outfile, "Matched, but offsets vector is too small to show all matches\n");
5222 count = use_size_offsets/2;
5225 #endif
5227 else
5229 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
5230 options | g_notempty, use_offsets, use_size_offsets);
5231 if (count == 0)
5233 fprintf(outfile, "Matched, but too many substrings\n");
5234 /* 2 is a special case; match can be returned */
5235 count = (use_size_offsets == 2)? 1 : use_size_offsets/3;
5239 /* Matched */
5241 if (count >= 0)
5243 int i, maxcount;
5244 void *cnptr, *gnptr;
5246 #if !defined NODFA
5247 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
5248 #endif
5249 /* 2 is a special case; match can be returned */
5250 maxcount = (use_size_offsets == 2)? 1 : use_size_offsets/3;
5252 /* This is a check against a lunatic return value. */
5254 if (count > maxcount)
5256 fprintf(outfile,
5257 "** PCRE error: returned count %d is too big for offset size %d\n",
5258 count, use_size_offsets);
5259 count = use_size_offsets/3;
5260 if (do_g || do_G)
5262 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
5263 do_g = do_G = FALSE; /* Break g/G loop */
5267 /* do_allcaps requests showing of all captures in the pattern, to check
5268 unset ones at the end. */
5270 if (do_allcaps)
5272 if (all_use_dfa || use_dfa)
5274 fprintf(outfile, "** Show all captures ignored after DFA matching\n");
5276 else
5278 if (new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) < 0)
5279 goto SKIP_DATA;
5280 count++; /* Allow for full match */
5281 if (count * 2 > use_size_offsets) count = use_size_offsets/2;
5285 /* Output the captured substrings. Note that, for the matched string,
5286 the use of \K in an assertion can make the start later than the end. */
5288 for (i = 0; i < count * 2; i += 2)
5290 if (use_offsets[i] < 0)
5292 if (use_offsets[i] != -1)
5293 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
5294 use_offsets[i], i);
5295 if (use_offsets[i+1] != -1)
5296 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
5297 use_offsets[i+1], i+1);
5298 fprintf(outfile, "%2d: <unset>\n", i/2);
5300 else
5302 int start = use_offsets[i];
5303 int end = use_offsets[i+1];
5305 if (start > end)
5307 start = use_offsets[i+1];
5308 end = use_offsets[i];
5309 fprintf(outfile, "Start of matched string is beyond its end - "
5310 "displaying from end to start.\n");
5313 fprintf(outfile, "%2d: ", i/2);
5314 PCHARSV(bptr, start, end - start, outfile);
5315 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
5316 fprintf(outfile, "\n");
5318 /* Note: don't use the start/end variables here because we want to
5319 show the text from what is reported as the end. */
5321 if (do_showcaprest || (i == 0 && do_showrest))
5323 fprintf(outfile, "%2d+ ", i/2);
5324 PCHARSV(bptr, use_offsets[i+1], len - use_offsets[i+1],
5325 outfile);
5326 fprintf(outfile, "\n");
5331 if (markptr != NULL)
5333 fprintf(outfile, "MK: ");
5334 PCHARSV(markptr, 0, -1, outfile);
5335 fprintf(outfile, "\n");
5338 for (i = 0; i < 32; i++)
5340 if ((copystrings & (1U << i)) != 0)
5342 int rc;
5343 char copybuffer[256];
5344 PCRE_COPY_SUBSTRING(rc, bptr, use_offsets, count, i,
5345 copybuffer, sizeof(copybuffer));
5346 if (rc < 0)
5347 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
5348 else
5350 fprintf(outfile, "%2dC ", i);
5351 PCHARSV(copybuffer, 0, rc, outfile);
5352 fprintf(outfile, " (%d)\n", rc);
5357 cnptr = copynames;
5358 for (;;)
5360 int rc;
5361 char copybuffer[256];
5363 #ifdef SUPPORT_PCRE32
5364 if (pcre_mode == PCRE32_MODE)
5366 if (*(pcre_uint32 *)cnptr == 0) break;
5368 #endif
5369 #ifdef SUPPORT_PCRE16
5370 if (pcre_mode == PCRE16_MODE)
5372 if (*(pcre_uint16 *)cnptr == 0) break;
5374 #endif
5375 #ifdef SUPPORT_PCRE8
5376 if (pcre_mode == PCRE8_MODE)
5378 if (*(pcre_uint8 *)cnptr == 0) break;
5380 #endif
5382 PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
5383 cnptr, copybuffer, sizeof(copybuffer));
5385 if (rc < 0)
5387 fprintf(outfile, "copy substring ");
5388 PCHARSV(cnptr, 0, -1, outfile);
5389 fprintf(outfile, " failed %d\n", rc);
5391 else
5393 fprintf(outfile, " C ");
5394 PCHARSV(copybuffer, 0, rc, outfile);
5395 fprintf(outfile, " (%d) ", rc);
5396 PCHARSV(cnptr, 0, -1, outfile);
5397 putc('\n', outfile);
5400 cnptr = (char *)cnptr + (STRLEN(cnptr) + 1) * CHAR_SIZE;
5403 for (i = 0; i < 32; i++)
5405 if ((getstrings & (1U << i)) != 0)
5407 int rc;
5408 const char *substring;
5409 PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, &substring);
5410 if (rc < 0)
5411 fprintf(outfile, "get substring %d failed %d\n", i, rc);
5412 else
5414 fprintf(outfile, "%2dG ", i);
5415 PCHARSV(substring, 0, rc, outfile);
5416 fprintf(outfile, " (%d)\n", rc);
5417 PCRE_FREE_SUBSTRING(substring);
5422 gnptr = getnames;
5423 for (;;)
5425 int rc;
5426 const char *substring;
5428 #ifdef SUPPORT_PCRE32
5429 if (pcre_mode == PCRE32_MODE)
5431 if (*(pcre_uint32 *)gnptr == 0) break;
5433 #endif
5434 #ifdef SUPPORT_PCRE16
5435 if (pcre_mode == PCRE16_MODE)
5437 if (*(pcre_uint16 *)gnptr == 0) break;
5439 #endif
5440 #ifdef SUPPORT_PCRE8
5441 if (pcre_mode == PCRE8_MODE)
5443 if (*(pcre_uint8 *)gnptr == 0) break;
5445 #endif
5447 PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
5448 gnptr, &substring);
5449 if (rc < 0)
5451 fprintf(outfile, "get substring ");
5452 PCHARSV(gnptr, 0, -1, outfile);
5453 fprintf(outfile, " failed %d\n", rc);
5455 else
5457 fprintf(outfile, " G ");
5458 PCHARSV(substring, 0, rc, outfile);
5459 fprintf(outfile, " (%d) ", rc);
5460 PCHARSV(gnptr, 0, -1, outfile);
5461 PCRE_FREE_SUBSTRING(substring);
5462 putc('\n', outfile);
5465 gnptr = (char *)gnptr + (STRLEN(gnptr) + 1) * CHAR_SIZE;
5468 if (getlist)
5470 int rc;
5471 const char **stringlist;
5472 PCRE_GET_SUBSTRING_LIST(rc, bptr, use_offsets, count, &stringlist);
5473 if (rc < 0)
5474 fprintf(outfile, "get substring list failed %d\n", rc);
5475 else
5477 for (i = 0; i < count; i++)
5479 fprintf(outfile, "%2dL ", i);
5480 PCHARSV(stringlist[i], 0, -1, outfile);
5481 putc('\n', outfile);
5483 if (stringlist[i] != NULL)
5484 fprintf(outfile, "string list not terminated by NULL\n");
5485 PCRE_FREE_SUBSTRING_LIST(stringlist);
5490 /* There was a partial match. If the bumpalong point is not the same as
5491 the first inspected character, show the offset explicitly. */
5493 else if (count == PCRE_ERROR_PARTIAL)
5495 fprintf(outfile, "Partial match");
5496 if (use_size_offsets > 2 && use_offsets[0] != use_offsets[2])
5497 fprintf(outfile, " at offset %d", use_offsets[2]);
5498 if (markptr != NULL)
5500 fprintf(outfile, ", mark=");
5501 PCHARSV(markptr, 0, -1, outfile);
5503 if (use_size_offsets > 1)
5505 fprintf(outfile, ": ");
5506 PCHARSV(bptr, use_offsets[0], use_offsets[1] - use_offsets[0],
5507 outfile);
5509 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
5510 fprintf(outfile, "\n");
5511 break; /* Out of the /g loop */
5514 /* Failed to match. If this is a /g or /G loop and we previously set
5515 g_notempty after a null match, this is not necessarily the end. We want
5516 to advance the start offset, and continue. We won't be at the end of the
5517 string - that was checked before setting g_notempty.
5519 Complication arises in the case when the newline convention is "any",
5520 "crlf", or "anycrlf". If the previous match was at the end of a line
5521 terminated by CRLF, an advance of one character just passes the \r,
5522 whereas we should prefer the longer newline sequence, as does the code in
5523 pcre_exec(). Fudge the offset value to achieve this. We check for a
5524 newline setting in the pattern; if none was set, use PCRE_CONFIG() to
5525 find the default.
5527 Otherwise, in the case of UTF-8 matching, the advance must be one
5528 character, not one byte. */
5530 else
5532 if (g_notempty != 0)
5534 int onechar = 1;
5535 unsigned int obits = REAL_PCRE_OPTIONS(re);
5536 use_offsets[0] = start_offset;
5537 if ((obits & PCRE_NEWLINE_BITS) == 0)
5539 int d;
5540 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &d);
5541 /* Note that these values are always the ASCII ones, even in
5542 EBCDIC environments. CR = 13, NL = 10. */
5543 obits = (d == 13)? PCRE_NEWLINE_CR :
5544 (d == 10)? PCRE_NEWLINE_LF :
5545 (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
5546 (d == -2)? PCRE_NEWLINE_ANYCRLF :
5547 (d == -1)? PCRE_NEWLINE_ANY : 0;
5549 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
5550 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
5551 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
5553 start_offset < len - 1 && (
5554 #ifdef SUPPORT_PCRE8
5555 (pcre_mode == PCRE8_MODE &&
5556 bptr[start_offset] == '\r' &&
5557 bptr[start_offset + 1] == '\n') ||
5558 #endif
5559 #ifdef SUPPORT_PCRE16
5560 (pcre_mode == PCRE16_MODE &&
5561 ((PCRE_SPTR16)bptr)[start_offset] == '\r' &&
5562 ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n') ||
5563 #endif
5564 #ifdef SUPPORT_PCRE32
5565 (pcre_mode == PCRE32_MODE &&
5566 ((PCRE_SPTR32)bptr)[start_offset] == '\r' &&
5567 ((PCRE_SPTR32)bptr)[start_offset + 1] == '\n') ||
5568 #endif
5570 onechar++;
5571 else if (use_utf)
5573 while (start_offset + onechar < len)
5575 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
5576 onechar++;
5579 use_offsets[1] = start_offset + onechar;
5581 else
5583 switch(count)
5585 case PCRE_ERROR_NOMATCH:
5586 if (gmatched == 0)
5588 if (markptr == NULL)
5590 fprintf(outfile, "No match");
5592 else
5594 fprintf(outfile, "No match, mark = ");
5595 PCHARSV(markptr, 0, -1, outfile);
5597 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
5598 putc('\n', outfile);
5600 break;
5602 case PCRE_ERROR_BADUTF8:
5603 case PCRE_ERROR_SHORTUTF8:
5604 fprintf(outfile, "Error %d (%s UTF-%d string)", count,
5605 (count == PCRE_ERROR_BADUTF8)? "bad" : "short",
5606 8 * CHAR_SIZE);
5607 if (use_size_offsets >= 2)
5608 fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
5609 use_offsets[1]);
5610 fprintf(outfile, "\n");
5611 break;
5613 case PCRE_ERROR_BADUTF8_OFFSET:
5614 fprintf(outfile, "Error %d (bad UTF-%d offset)\n", count,
5615 8 * CHAR_SIZE);
5616 break;
5618 default:
5619 if (count < 0 &&
5620 (-count) < (int)(sizeof(errtexts)/sizeof(const char *)))
5621 fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
5622 else
5623 fprintf(outfile, "Error %d (Unexpected value)\n", count);
5624 break;
5627 break; /* Out of the /g loop */
5631 /* If not /g or /G we are done */
5633 if (!do_g && !do_G) break;
5635 if (use_offsets == NULL)
5637 fprintf(outfile, "Cannot do global matching without an ovector\n");
5638 break;
5641 if (use_size_offsets < 2)
5643 fprintf(outfile, "Cannot do global matching with an ovector size < 2\n");
5644 break;
5647 /* If we have matched an empty string, first check to see if we are at
5648 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
5649 Perl's /g options does. This turns out to be rather cunning. First we set
5650 PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
5651 same point. If this fails (picked up above) we advance to the next
5652 character. */
5654 g_notempty = 0;
5656 if (use_offsets[0] == use_offsets[1])
5658 if (use_offsets[0] == len) break;
5659 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
5662 /* For /g, update the start offset, leaving the rest alone. There is a
5663 tricky case when \K is used in a positive lookbehind assertion. This can
5664 cause the end of the match to be less than or equal to the start offset.
5665 In this case we restart at one past the start offset. This may return the
5666 same match if the original start offset was bumped along during the
5667 match, but eventually the new start offset will hit the actual start
5668 offset. (In PCRE2 the true start offset is available, and this can be
5669 done better. It is not worth doing more than making sure we do not loop
5670 at this stage in the life of PCRE1.) */
5672 if (do_g)
5674 if (g_notempty == 0 && use_offsets[1] <= start_offset)
5676 if (start_offset >= len) break; /* End of subject */
5677 start_offset++;
5678 if (use_utf)
5680 while (start_offset < len)
5682 if ((bptr[start_offset] & 0xc0) != 0x80) break;
5683 start_offset++;
5687 else start_offset = use_offsets[1];
5690 /* For /G, update the pointer and length */
5692 else
5694 bptr += use_offsets[1] * CHAR_SIZE;
5695 len -= use_offsets[1];
5697 } /* End of loop for /g and /G */
5699 NEXT_DATA: continue;
5700 } /* End of loop for data lines */
5702 CONTINUE:
5704 #if !defined NOPOSIX
5705 if ((posix || do_posix)) regfree(&preg);
5706 #endif
5708 if (re != NULL) new_free(re);
5709 if (extra != NULL)
5711 PCRE_FREE_STUDY(extra);
5713 if (locale_set)
5715 new_free((void *)tables);
5716 setlocale(LC_CTYPE, "C");
5717 locale_set = 0;
5719 if (jit_stack != NULL)
5721 PCRE_JIT_STACK_FREE(jit_stack);
5722 jit_stack = NULL;
5726 if (infile == stdin) fprintf(outfile, "\n");
5728 if (showtotaltimes)
5730 fprintf(outfile, "--------------------------------------\n");
5731 if (timeit > 0)
5733 fprintf(outfile, "Total compile time %.4f milliseconds\n",
5734 (((double)total_compile_time * 1000.0) / (double)timeit) /
5735 (double)CLOCKS_PER_SEC);
5736 fprintf(outfile, "Total study time %.4f milliseconds\n",
5737 (((double)total_study_time * 1000.0) / (double)timeit) /
5738 (double)CLOCKS_PER_SEC);
5740 fprintf(outfile, "Total execute time %.4f milliseconds\n",
5741 (((double)total_match_time * 1000.0) / (double)timeitm) /
5742 (double)CLOCKS_PER_SEC);
5745 EXIT:
5747 if (infile != NULL && infile != stdin) fclose(infile);
5748 if (outfile != NULL && outfile != stdout) fclose(outfile);
5750 free(buffer);
5751 free(dbuffer);
5752 free(pbuffer);
5753 free(offsets);
5755 #ifdef SUPPORT_PCRE16
5756 if (buffer16 != NULL) free(buffer16);
5757 #endif
5758 #ifdef SUPPORT_PCRE32
5759 if (buffer32 != NULL) free(buffer32);
5760 #endif
5762 #if !defined NODFA
5763 if (dfa_workspace != NULL)
5764 free(dfa_workspace);
5765 #endif
5767 #if defined(__VMS)
5768 yield = SS$_NORMAL; /* Return values via DCL symbols */
5769 #endif
5771 return yield;
5774 /* End of pcretest.c */