1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places. The
8 addition of 16-bit support has made it even worse. :-(
10 -----------------------------------------------------------------------------
11 Redistribution and use in source and binary forms, with or without
12 modification, are permitted provided that the following conditions are met:
14 * Redistributions of source code must retain the above copyright notice,
15 this list of conditions and the following disclaimer.
17 * Redistributions in binary form must reproduce the above copyright
18 notice, this list of conditions and the following disclaimer in the
19 documentation and/or other materials provided with the distribution.
21 * Neither the name of the University of Cambridge nor the names of its
22 contributors may be used to endorse or promote products derived from
23 this software without specific prior written permission.
25 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 POSSIBILITY OF SUCH DAMAGE.
36 -----------------------------------------------------------------------------
39 /* This program now supports the testing of all of the 8-bit, 16-bit, and
40 32-bit PCRE libraries in a single program. This is different from the modules
41 such as pcre_compile.c in the library itself, which are compiled separately for
42 each mode. If two modes are enabled, for example, pcre_compile.c is compiled
43 twice. By contrast, pcretest.c is compiled only once. Therefore, it must not
44 make use of any of the macros from pcre_internal.h that depend on
45 COMPILE_PCRE8, COMPILE_PCRE16, or COMPILE_PCRE32. It does, however, make use of
46 SUPPORT_PCRE8, SUPPORT_PCRE16, and SUPPORT_PCRE32 to ensure that it calls only
47 supported library functions. */
61 /* Both libreadline and libedit are optionally supported. The user-supplied
62 original patch uses readline/readline.h for libedit, but in at least one system
63 it is installed as editline/readline.h, so the configuration code now looks for
64 that first, falling back to readline/readline.h. */
66 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
70 #if defined(SUPPORT_LIBREADLINE)
71 #include <readline/readline.h>
72 #include <readline/history.h>
74 #if defined(HAVE_EDITLINE_READLINE_H)
75 #include <editline/readline.h>
77 #include <readline/readline.h>
82 /* A number of things vary for Windows builds. Originally, pcretest opened its
83 input and output without "b"; then I was told that "b" was needed in some
84 environments, so it was added for release 5.0 to both the input and output. (It
85 makes no difference on Unix-like systems.) Later I was told that it is wrong
86 for the input on Windows. I've now abstracted the modes into two macros that
87 are set here, to make it easier to fiddle with them, and removed "b" from the
88 input mode under Windows. */
90 #if defined(_WIN32) || defined(WIN32)
91 #include <io.h> /* For _setmode() */
92 #include <fcntl.h> /* For _O_BINARY */
93 #define INPUT_MODE "r"
94 #define OUTPUT_MODE "wb"
97 #define isatty _isatty /* This is what Windows calls them, I'm told, */
98 #endif /* though in some environments they seem to */
99 /* be already defined, hence the #ifndefs. */
101 #define fileno _fileno
104 /* A user sent this fix for Borland Builder 5 under Windows. */
107 #define _setmode(handle, mode) setmode(handle, mode)
113 #include <sys/time.h> /* These two includes are needed */
114 #include <sys/resource.h> /* for setrlimit(). */
115 #if defined NATIVE_ZOS /* z/OS uses non-binary I/O */
116 #define INPUT_MODE "r"
117 #define OUTPUT_MODE "w"
119 #define INPUT_MODE "rb"
120 #define OUTPUT_MODE "wb"
124 #define PRIV(name) name
126 /* We have to include pcre_internal.h because we need the internal info for
127 displaying the results of pcre_study() and we also need to know about the
128 internal macros, structures, and other internal data values; pcretest has
129 "inside information" compared to a program that strictly follows the PCRE API.
131 Although pcre_internal.h does itself include pcre.h, we explicitly include it
132 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
133 appropriately for an application, not for building PCRE. */
136 #include "pcre_internal.h"
138 /* The pcre_printint() function, which prints the internal form of a compiled
139 regex, is held in a separate file so that (a) it can be compiled in either
140 8-, 16- or 32-bit mode, and (b) it can be #included directly in pcre_compile.c
141 when that is compiled in debug mode. */
144 void pcre_printint(pcre
*external_re
, FILE *f
, BOOL print_lengths
);
146 #ifdef SUPPORT_PCRE16
147 void pcre16_printint(pcre
*external_re
, FILE *f
, BOOL print_lengths
);
149 #ifdef SUPPORT_PCRE32
150 void pcre32_printint(pcre
*external_re
, FILE *f
, BOOL print_lengths
);
153 /* We need access to some of the data tables that PCRE uses. So as not to have
154 to keep two copies, we include the source files here, changing the names of the
155 external symbols to prevent clashes. */
157 #define PCRE_INCLUDED
159 #include "pcre_tables.c"
160 #include "pcre_ucd.c"
162 /* The definition of the macro PRINTABLE, which determines whether to print an
163 output character as-is or as a hex value when showing compiled patterns, is
164 the same as in the printint.src file. We uses it here in cases when the locale
165 has not been explicitly changed, so as to get consistent output from systems
166 that differ in their output from isprint() even in the "C" locale. */
169 #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
171 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
174 #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
176 /* Posix support is disabled in 16 or 32 bit only mode. */
177 #if !defined SUPPORT_PCRE8 && !defined NOPOSIX
181 /* It is possible to compile this test program without including support for
182 testing the POSIX interface, though this is not available via the standard
186 #include "pcreposix.h"
189 /* It is also possible, originally for the benefit of a version that was
190 imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
191 NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
192 automatically cut out the UTF support if PCRE is built without it. */
200 /* To make the code a bit tidier for 8/16/32-bit support, we define macros
201 for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
202 only from one place and is handled differently). I couldn't dream up any way of
203 using a single macro to do this in a generic way, because of the many different
204 argument requirements. We know that at least one of SUPPORT_PCRE8 and
205 SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
206 use these in the definitions of generic macros.
208 **** Special note about the PCHARSxxx macros: the address of the string to be
209 printed is always given as two arguments: a base address followed by an offset.
210 The base address is cast to the correct data size for 8 or 16 bit data; the
211 offset is in units of this size. If the string were given as base+offset in one
212 argument, the casting might be incorrectly applied. */
216 #define PCHARS8(lv, p, offset, len, f) \
217 lv = pchars((pcre_uint8 *)(p) + offset, len, f)
219 #define PCHARSV8(p, offset, len, f) \
220 (void)pchars((pcre_uint8 *)(p) + offset, len, f)
222 #define READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re) \
223 p = read_capture_name8(p, cn8, re)
225 #define STRLEN8(p) ((int)strlen((char *)p))
227 #define SET_PCRE_CALLOUT8(callout) \
228 pcre_callout = callout
230 #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
231 pcre_assign_jit_stack(extra, callback, userdata)
233 #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
234 re = pcre_compile((char *)pat, options, error, erroffset, tables)
236 #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
237 namesptr, cbuffer, size) \
238 rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
239 (char *)namesptr, cbuffer, size)
241 #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
242 rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
244 #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
245 offsets, size_offsets, workspace, size_workspace) \
246 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
247 offsets, size_offsets, workspace, size_workspace)
249 #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
250 offsets, size_offsets) \
251 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
252 offsets, size_offsets)
254 #define PCRE_FREE_STUDY8(extra) \
255 pcre_free_study(extra)
257 #define PCRE_FREE_SUBSTRING8(substring) \
258 pcre_free_substring(substring)
260 #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
261 pcre_free_substring_list(listptr)
263 #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
264 getnamesptr, subsptr) \
265 rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
266 (char *)getnamesptr, subsptr)
268 #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
269 n = pcre_get_stringnumber(re, (char *)ptr)
271 #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
272 rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
274 #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
275 rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
277 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
278 rc = pcre_pattern_to_host_byte_order(re, extra, tables)
280 #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
281 pcre_printint(re, outfile, debug_lengths)
283 #define PCRE_STUDY8(extra, re, options, error) \
284 extra = pcre_study(re, options, error)
286 #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
287 pcre_jit_stack_alloc(startsize, maxsize)
289 #define PCRE_JIT_STACK_FREE8(stack) \
290 pcre_jit_stack_free(stack)
292 #define pcre8_maketables pcre_maketables
294 #endif /* SUPPORT_PCRE8 */
296 /* -----------------------------------------------------------*/
298 #ifdef SUPPORT_PCRE16
300 #define PCHARS16(lv, p, offset, len, f) \
301 lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
303 #define PCHARSV16(p, offset, len, f) \
304 (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
306 #define READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re) \
307 p = read_capture_name16(p, cn16, re)
309 #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
311 #define SET_PCRE_CALLOUT16(callout) \
312 pcre16_callout = (int (*)(pcre16_callout_block *))callout
314 #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
315 pcre16_assign_jit_stack((pcre16_extra *)extra, \
316 (pcre16_jit_callback)callback, userdata)
318 #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
319 re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
322 #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
323 namesptr, cbuffer, size) \
324 rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
325 count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
327 #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
328 rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
329 (PCRE_UCHAR16 *)cbuffer, size/2)
331 #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
332 offsets, size_offsets, workspace, size_workspace) \
333 count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
334 (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
335 workspace, size_workspace)
337 #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
338 offsets, size_offsets) \
339 count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
340 len, start_offset, options, offsets, size_offsets)
342 #define PCRE_FREE_STUDY16(extra) \
343 pcre16_free_study((pcre16_extra *)extra)
345 #define PCRE_FREE_SUBSTRING16(substring) \
346 pcre16_free_substring((PCRE_SPTR16)substring)
348 #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
349 pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
351 #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
352 getnamesptr, subsptr) \
353 rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
354 count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
356 #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
357 n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
359 #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
360 rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
361 (PCRE_SPTR16 *)(void*)subsptr)
363 #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
364 rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
365 (PCRE_SPTR16 **)(void*)listptr)
367 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
368 rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
371 #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
372 pcre16_printint(re, outfile, debug_lengths)
374 #define PCRE_STUDY16(extra, re, options, error) \
375 extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
377 #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
378 (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
380 #define PCRE_JIT_STACK_FREE16(stack) \
381 pcre16_jit_stack_free((pcre16_jit_stack *)stack)
383 #endif /* SUPPORT_PCRE16 */
385 /* -----------------------------------------------------------*/
387 #ifdef SUPPORT_PCRE32
389 #define PCHARS32(lv, p, offset, len, f) \
390 lv = pchars32((PCRE_SPTR32)(p) + offset, len, use_utf, f)
392 #define PCHARSV32(p, offset, len, f) \
393 (void)pchars32((PCRE_SPTR32)(p) + offset, len, use_utf, f)
395 #define READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re) \
396 p = read_capture_name32(p, cn32, re)
398 #define STRLEN32(p) ((int)strlen32((PCRE_SPTR32)p))
400 #define SET_PCRE_CALLOUT32(callout) \
401 pcre32_callout = (int (*)(pcre32_callout_block *))callout
403 #define PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata) \
404 pcre32_assign_jit_stack((pcre32_extra *)extra, \
405 (pcre32_jit_callback)callback, userdata)
407 #define PCRE_COMPILE32(re, pat, options, error, erroffset, tables) \
408 re = (pcre *)pcre32_compile((PCRE_SPTR32)pat, options, error, erroffset, \
411 #define PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
412 namesptr, cbuffer, size) \
413 rc = pcre32_copy_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
414 count, (PCRE_SPTR32)namesptr, (PCRE_UCHAR32 *)cbuffer, size/2)
416 #define PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size) \
417 rc = pcre32_copy_substring((PCRE_SPTR32)bptr, offsets, count, i, \
418 (PCRE_UCHAR32 *)cbuffer, size/2)
420 #define PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
421 offsets, size_offsets, workspace, size_workspace) \
422 count = pcre32_dfa_exec((pcre32 *)re, (pcre32_extra *)extra, \
423 (PCRE_SPTR32)bptr, len, start_offset, options, offsets, size_offsets, \
424 workspace, size_workspace)
426 #define PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
427 offsets, size_offsets) \
428 count = pcre32_exec((pcre32 *)re, (pcre32_extra *)extra, (PCRE_SPTR32)bptr, \
429 len, start_offset, options, offsets, size_offsets)
431 #define PCRE_FREE_STUDY32(extra) \
432 pcre32_free_study((pcre32_extra *)extra)
434 #define PCRE_FREE_SUBSTRING32(substring) \
435 pcre32_free_substring((PCRE_SPTR32)substring)
437 #define PCRE_FREE_SUBSTRING_LIST32(listptr) \
438 pcre32_free_substring_list((PCRE_SPTR32 *)listptr)
440 #define PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
441 getnamesptr, subsptr) \
442 rc = pcre32_get_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
443 count, (PCRE_SPTR32)getnamesptr, (PCRE_SPTR32 *)(void*)subsptr)
445 #define PCRE_GET_STRINGNUMBER32(n, rc, ptr) \
446 n = pcre32_get_stringnumber(re, (PCRE_SPTR32)ptr)
448 #define PCRE_GET_SUBSTRING32(rc, bptr, offsets, count, i, subsptr) \
449 rc = pcre32_get_substring((PCRE_SPTR32)bptr, offsets, count, i, \
450 (PCRE_SPTR32 *)(void*)subsptr)
452 #define PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr) \
453 rc = pcre32_get_substring_list((PCRE_SPTR32)bptr, offsets, count, \
454 (PCRE_SPTR32 **)(void*)listptr)
456 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables) \
457 rc = pcre32_pattern_to_host_byte_order((pcre32 *)re, (pcre32_extra *)extra, \
460 #define PCRE_PRINTINT32(re, outfile, debug_lengths) \
461 pcre32_printint(re, outfile, debug_lengths)
463 #define PCRE_STUDY32(extra, re, options, error) \
464 extra = (pcre_extra *)pcre32_study((pcre32 *)re, options, error)
466 #define PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
467 (pcre_jit_stack *)pcre32_jit_stack_alloc(startsize, maxsize)
469 #define PCRE_JIT_STACK_FREE32(stack) \
470 pcre32_jit_stack_free((pcre32_jit_stack *)stack)
472 #endif /* SUPPORT_PCRE32 */
475 /* ----- More than one mode is supported; a runtime test is needed, except for
476 pcre_config(), and the JIT stack functions, when it doesn't matter which
477 available version is called. ----- */
485 #if (defined (SUPPORT_PCRE8) + defined (SUPPORT_PCRE16) + \
486 defined (SUPPORT_PCRE32)) >= 2
488 #define CHAR_SIZE (1 << pcre_mode)
490 /* There doesn't seem to be an easy way of writing these macros that can cope
491 with the 3 pairs of bit sizes plus all three bit sizes. So just handle all the
494 /* ----- All three modes supported ----- */
496 #if defined(SUPPORT_PCRE8) && defined(SUPPORT_PCRE16) && defined(SUPPORT_PCRE32)
498 #define PCHARS(lv, p, offset, len, f) \
499 if (pcre_mode == PCRE32_MODE) \
500 PCHARS32(lv, p, offset, len, f); \
501 else if (pcre_mode == PCRE16_MODE) \
502 PCHARS16(lv, p, offset, len, f); \
504 PCHARS8(lv, p, offset, len, f)
506 #define PCHARSV(p, offset, len, f) \
507 if (pcre_mode == PCRE32_MODE) \
508 PCHARSV32(p, offset, len, f); \
509 else if (pcre_mode == PCRE16_MODE) \
510 PCHARSV16(p, offset, len, f); \
512 PCHARSV8(p, offset, len, f)
514 #define READ_CAPTURE_NAME(p, cn8, cn16, cn32, re) \
515 if (pcre_mode == PCRE32_MODE) \
516 READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re); \
517 else if (pcre_mode == PCRE16_MODE) \
518 READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re); \
520 READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re)
522 #define SET_PCRE_CALLOUT(callout) \
523 if (pcre_mode == PCRE32_MODE) \
524 SET_PCRE_CALLOUT32(callout); \
525 else if (pcre_mode == PCRE16_MODE) \
526 SET_PCRE_CALLOUT16(callout); \
528 SET_PCRE_CALLOUT8(callout)
530 #define STRLEN(p) (pcre_mode == PCRE32_MODE ? STRLEN32(p) : pcre_mode == PCRE16_MODE ? STRLEN16(p) : STRLEN8(p))
532 #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
533 if (pcre_mode == PCRE32_MODE) \
534 PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata); \
535 else if (pcre_mode == PCRE16_MODE) \
536 PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
538 PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
540 #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
541 if (pcre_mode == PCRE32_MODE) \
542 PCRE_COMPILE32(re, pat, options, error, erroffset, tables); \
543 else if (pcre_mode == PCRE16_MODE) \
544 PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
546 PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
548 #define PCRE_CONFIG pcre_config
550 #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
551 namesptr, cbuffer, size) \
552 if (pcre_mode == PCRE32_MODE) \
553 PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
554 namesptr, cbuffer, size); \
555 else if (pcre_mode == PCRE16_MODE) \
556 PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
557 namesptr, cbuffer, size); \
559 PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
560 namesptr, cbuffer, size)
562 #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
563 if (pcre_mode == PCRE32_MODE) \
564 PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size); \
565 else if (pcre_mode == PCRE16_MODE) \
566 PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
568 PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
570 #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
571 offsets, size_offsets, workspace, size_workspace) \
572 if (pcre_mode == PCRE32_MODE) \
573 PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
574 offsets, size_offsets, workspace, size_workspace); \
575 else if (pcre_mode == PCRE16_MODE) \
576 PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
577 offsets, size_offsets, workspace, size_workspace); \
579 PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
580 offsets, size_offsets, workspace, size_workspace)
582 #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
583 offsets, size_offsets) \
584 if (pcre_mode == PCRE32_MODE) \
585 PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
586 offsets, size_offsets); \
587 else if (pcre_mode == PCRE16_MODE) \
588 PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
589 offsets, size_offsets); \
591 PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
592 offsets, size_offsets)
594 #define PCRE_FREE_STUDY(extra) \
595 if (pcre_mode == PCRE32_MODE) \
596 PCRE_FREE_STUDY32(extra); \
597 else if (pcre_mode == PCRE16_MODE) \
598 PCRE_FREE_STUDY16(extra); \
600 PCRE_FREE_STUDY8(extra)
602 #define PCRE_FREE_SUBSTRING(substring) \
603 if (pcre_mode == PCRE32_MODE) \
604 PCRE_FREE_SUBSTRING32(substring); \
605 else if (pcre_mode == PCRE16_MODE) \
606 PCRE_FREE_SUBSTRING16(substring); \
608 PCRE_FREE_SUBSTRING8(substring)
610 #define PCRE_FREE_SUBSTRING_LIST(listptr) \
611 if (pcre_mode == PCRE32_MODE) \
612 PCRE_FREE_SUBSTRING_LIST32(listptr); \
613 else if (pcre_mode == PCRE16_MODE) \
614 PCRE_FREE_SUBSTRING_LIST16(listptr); \
616 PCRE_FREE_SUBSTRING_LIST8(listptr)
618 #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
619 getnamesptr, subsptr) \
620 if (pcre_mode == PCRE32_MODE) \
621 PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
622 getnamesptr, subsptr); \
623 else if (pcre_mode == PCRE16_MODE) \
624 PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
625 getnamesptr, subsptr); \
627 PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
628 getnamesptr, subsptr)
630 #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
631 if (pcre_mode == PCRE32_MODE) \
632 PCRE_GET_STRINGNUMBER32(n, rc, ptr); \
633 else if (pcre_mode == PCRE16_MODE) \
634 PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
636 PCRE_GET_STRINGNUMBER8(n, rc, ptr)
638 #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
639 if (pcre_mode == PCRE32_MODE) \
640 PCRE_GET_SUBSTRING32(rc, bptr, use_offsets, count, i, subsptr); \
641 else if (pcre_mode == PCRE16_MODE) \
642 PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
644 PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
646 #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
647 if (pcre_mode == PCRE32_MODE) \
648 PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr); \
649 else if (pcre_mode == PCRE16_MODE) \
650 PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
652 PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
654 #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
655 (pcre_mode == PCRE32_MODE ? \
656 PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
657 : pcre_mode == PCRE16_MODE ? \
658 PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
659 : PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
661 #define PCRE_JIT_STACK_FREE(stack) \
662 if (pcre_mode == PCRE32_MODE) \
663 PCRE_JIT_STACK_FREE32(stack); \
664 else if (pcre_mode == PCRE16_MODE) \
665 PCRE_JIT_STACK_FREE16(stack); \
667 PCRE_JIT_STACK_FREE8(stack)
669 #define PCRE_MAKETABLES \
670 (pcre_mode == PCRE32_MODE ? pcre32_maketables() : pcre_mode == PCRE16_MODE ? pcre16_maketables() : pcre_maketables())
672 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
673 if (pcre_mode == PCRE32_MODE) \
674 PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables); \
675 else if (pcre_mode == PCRE16_MODE) \
676 PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
678 PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
680 #define PCRE_PRINTINT(re, outfile, debug_lengths) \
681 if (pcre_mode == PCRE32_MODE) \
682 PCRE_PRINTINT32(re, outfile, debug_lengths); \
683 else if (pcre_mode == PCRE16_MODE) \
684 PCRE_PRINTINT16(re, outfile, debug_lengths); \
686 PCRE_PRINTINT8(re, outfile, debug_lengths)
688 #define PCRE_STUDY(extra, re, options, error) \
689 if (pcre_mode == PCRE32_MODE) \
690 PCRE_STUDY32(extra, re, options, error); \
691 else if (pcre_mode == PCRE16_MODE) \
692 PCRE_STUDY16(extra, re, options, error); \
694 PCRE_STUDY8(extra, re, options, error)
697 /* ----- Two out of three modes are supported ----- */
701 /* We can use some macro trickery to make a single set of definitions work in
702 the three different cases. */
704 /* ----- 32-bit and 16-bit but not 8-bit supported ----- */
706 #if defined(SUPPORT_PCRE32) && defined(SUPPORT_PCRE16)
710 /* ----- 32-bit and 8-bit but not 16-bit supported ----- */
712 #elif defined(SUPPORT_PCRE32) && defined(SUPPORT_PCRE8)
716 /* ----- 16-bit and 8-bit but not 32-bit supported ----- */
723 #define glue(a,b) a##b
724 #define G(a,b) glue(a,b)
727 /* ----- Common macros for two-mode cases ----- */
729 #define PCHARS(lv, p, offset, len, f) \
730 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
731 G(PCHARS,BITONE)(lv, p, offset, len, f); \
733 G(PCHARS,BITTWO)(lv, p, offset, len, f)
735 #define PCHARSV(p, offset, len, f) \
736 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
737 G(PCHARSV,BITONE)(p, offset, len, f); \
739 G(PCHARSV,BITTWO)(p, offset, len, f)
741 #define READ_CAPTURE_NAME(p, cn8, cn16, cn32, re) \
742 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
743 G(READ_CAPTURE_NAME,BITONE)(p, cn8, cn16, cn32, re); \
745 G(READ_CAPTURE_NAME,BITTWO)(p, cn8, cn16, cn32, re)
747 #define SET_PCRE_CALLOUT(callout) \
748 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
749 G(SET_PCRE_CALLOUT,BITONE)(callout); \
751 G(SET_PCRE_CALLOUT,BITTWO)(callout)
753 #define STRLEN(p) ((pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
754 G(STRLEN,BITONE)(p) : G(STRLEN,BITTWO)(p))
756 #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
757 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
758 G(PCRE_ASSIGN_JIT_STACK,BITONE)(extra, callback, userdata); \
760 G(PCRE_ASSIGN_JIT_STACK,BITTWO)(extra, callback, userdata)
762 #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
763 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
764 G(PCRE_COMPILE,BITONE)(re, pat, options, error, erroffset, tables); \
766 G(PCRE_COMPILE,BITTWO)(re, pat, options, error, erroffset, tables)
768 #define PCRE_CONFIG G(G(pcre,BITONE),_config)
770 #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
771 namesptr, cbuffer, size) \
772 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
773 G(PCRE_COPY_NAMED_SUBSTRING,BITONE)(rc, re, bptr, offsets, count, \
774 namesptr, cbuffer, size); \
776 G(PCRE_COPY_NAMED_SUBSTRING,BITTWO)(rc, re, bptr, offsets, count, \
777 namesptr, cbuffer, size)
779 #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
780 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
781 G(PCRE_COPY_SUBSTRING,BITONE)(rc, bptr, offsets, count, i, cbuffer, size); \
783 G(PCRE_COPY_SUBSTRING,BITTWO)(rc, bptr, offsets, count, i, cbuffer, size)
785 #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
786 offsets, size_offsets, workspace, size_workspace) \
787 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
788 G(PCRE_DFA_EXEC,BITONE)(count, re, extra, bptr, len, start_offset, options, \
789 offsets, size_offsets, workspace, size_workspace); \
791 G(PCRE_DFA_EXEC,BITTWO)(count, re, extra, bptr, len, start_offset, options, \
792 offsets, size_offsets, workspace, size_workspace)
794 #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
795 offsets, size_offsets) \
796 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
797 G(PCRE_EXEC,BITONE)(count, re, extra, bptr, len, start_offset, options, \
798 offsets, size_offsets); \
800 G(PCRE_EXEC,BITTWO)(count, re, extra, bptr, len, start_offset, options, \
801 offsets, size_offsets)
803 #define PCRE_FREE_STUDY(extra) \
804 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
805 G(PCRE_FREE_STUDY,BITONE)(extra); \
807 G(PCRE_FREE_STUDY,BITTWO)(extra)
809 #define PCRE_FREE_SUBSTRING(substring) \
810 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
811 G(PCRE_FREE_SUBSTRING,BITONE)(substring); \
813 G(PCRE_FREE_SUBSTRING,BITTWO)(substring)
815 #define PCRE_FREE_SUBSTRING_LIST(listptr) \
816 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
817 G(PCRE_FREE_SUBSTRING_LIST,BITONE)(listptr); \
819 G(PCRE_FREE_SUBSTRING_LIST,BITTWO)(listptr)
821 #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
822 getnamesptr, subsptr) \
823 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
824 G(PCRE_GET_NAMED_SUBSTRING,BITONE)(rc, re, bptr, offsets, count, \
825 getnamesptr, subsptr); \
827 G(PCRE_GET_NAMED_SUBSTRING,BITTWO)(rc, re, bptr, offsets, count, \
828 getnamesptr, subsptr)
830 #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
831 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
832 G(PCRE_GET_STRINGNUMBER,BITONE)(n, rc, ptr); \
834 G(PCRE_GET_STRINGNUMBER,BITTWO)(n, rc, ptr)
836 #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
837 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
838 G(PCRE_GET_SUBSTRING,BITONE)(rc, bptr, use_offsets, count, i, subsptr); \
840 G(PCRE_GET_SUBSTRING,BITTWO)(rc, bptr, use_offsets, count, i, subsptr)
842 #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
843 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
844 G(PCRE_GET_SUBSTRING_LIST,BITONE)(rc, bptr, offsets, count, listptr); \
846 G(PCRE_GET_SUBSTRING_LIST,BITTWO)(rc, bptr, offsets, count, listptr)
848 #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
849 (pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
850 G(PCRE_JIT_STACK_ALLOC,BITONE)(startsize, maxsize) \
851 : G(PCRE_JIT_STACK_ALLOC,BITTWO)(startsize, maxsize)
853 #define PCRE_JIT_STACK_FREE(stack) \
854 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
855 G(PCRE_JIT_STACK_FREE,BITONE)(stack); \
857 G(PCRE_JIT_STACK_FREE,BITTWO)(stack)
859 #define PCRE_MAKETABLES \
860 (pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
861 G(G(pcre,BITONE),_maketables)() : G(G(pcre,BITTWO),_maketables)()
863 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
864 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
865 G(PCRE_PATTERN_TO_HOST_BYTE_ORDER,BITONE)(rc, re, extra, tables); \
867 G(PCRE_PATTERN_TO_HOST_BYTE_ORDER,BITTWO)(rc, re, extra, tables)
869 #define PCRE_PRINTINT(re, outfile, debug_lengths) \
870 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
871 G(PCRE_PRINTINT,BITONE)(re, outfile, debug_lengths); \
873 G(PCRE_PRINTINT,BITTWO)(re, outfile, debug_lengths)
875 #define PCRE_STUDY(extra, re, options, error) \
876 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
877 G(PCRE_STUDY,BITONE)(extra, re, options, error); \
879 G(PCRE_STUDY,BITTWO)(extra, re, options, error)
881 #endif /* Two out of three modes */
883 /* ----- End of cases where more than one mode is supported ----- */
886 /* ----- Only 8-bit mode is supported ----- */
888 #elif defined SUPPORT_PCRE8
890 #define PCHARS PCHARS8
891 #define PCHARSV PCHARSV8
892 #define READ_CAPTURE_NAME READ_CAPTURE_NAME8
893 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT8
894 #define STRLEN STRLEN8
895 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK8
896 #define PCRE_COMPILE PCRE_COMPILE8
897 #define PCRE_CONFIG pcre_config
898 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
899 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING8
900 #define PCRE_DFA_EXEC PCRE_DFA_EXEC8
901 #define PCRE_EXEC PCRE_EXEC8
902 #define PCRE_FREE_STUDY PCRE_FREE_STUDY8
903 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING8
904 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST8
905 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING8
906 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER8
907 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING8
908 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST8
909 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC8
910 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE8
911 #define PCRE_MAKETABLES pcre_maketables()
912 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
913 #define PCRE_PRINTINT PCRE_PRINTINT8
914 #define PCRE_STUDY PCRE_STUDY8
916 /* ----- Only 16-bit mode is supported ----- */
918 #elif defined SUPPORT_PCRE16
920 #define PCHARS PCHARS16
921 #define PCHARSV PCHARSV16
922 #define READ_CAPTURE_NAME READ_CAPTURE_NAME16
923 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT16
924 #define STRLEN STRLEN16
925 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK16
926 #define PCRE_COMPILE PCRE_COMPILE16
927 #define PCRE_CONFIG pcre16_config
928 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
929 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING16
930 #define PCRE_DFA_EXEC PCRE_DFA_EXEC16
931 #define PCRE_EXEC PCRE_EXEC16
932 #define PCRE_FREE_STUDY PCRE_FREE_STUDY16
933 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING16
934 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST16
935 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING16
936 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER16
937 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING16
938 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST16
939 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC16
940 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE16
941 #define PCRE_MAKETABLES pcre16_maketables()
942 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
943 #define PCRE_PRINTINT PCRE_PRINTINT16
944 #define PCRE_STUDY PCRE_STUDY16
946 /* ----- Only 32-bit mode is supported ----- */
948 #elif defined SUPPORT_PCRE32
950 #define PCHARS PCHARS32
951 #define PCHARSV PCHARSV32
952 #define READ_CAPTURE_NAME READ_CAPTURE_NAME32
953 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT32
954 #define STRLEN STRLEN32
955 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK32
956 #define PCRE_COMPILE PCRE_COMPILE32
957 #define PCRE_CONFIG pcre32_config
958 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING32
959 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING32
960 #define PCRE_DFA_EXEC PCRE_DFA_EXEC32
961 #define PCRE_EXEC PCRE_EXEC32
962 #define PCRE_FREE_STUDY PCRE_FREE_STUDY32
963 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING32
964 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST32
965 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING32
966 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER32
967 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING32
968 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST32
969 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC32
970 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE32
971 #define PCRE_MAKETABLES pcre32_maketables()
972 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER32
973 #define PCRE_PRINTINT PCRE_PRINTINT32
974 #define PCRE_STUDY PCRE_STUDY32
978 /* ----- End of mode-specific function call macros ----- */
981 /* Other parameters */
983 #ifndef CLOCKS_PER_SEC
985 #define CLOCKS_PER_SEC CLK_TCK
987 #define CLOCKS_PER_SEC 100
992 #define DFA_WS_DIMENSION 1000
995 /* This is the default loop count for timing. */
997 #define LOOPREPEAT 500000
999 /* Static variables */
1001 static FILE *outfile
;
1002 static int log_store
= 0;
1003 static int callout_count
;
1004 static int callout_extra
;
1005 static int callout_fail_count
;
1006 static int callout_fail_id
;
1007 static int debug_lengths
;
1008 static int first_callout
;
1009 static int jit_was_used
;
1010 static int locale_set
= 0;
1011 static int show_malloc
;
1013 static size_t gotten_store
;
1014 static size_t first_gotten_store
= 0;
1015 static const unsigned char *last_callout_mark
= NULL
;
1017 /* The buffers grow automatically if very long input lines are encountered. */
1019 static int buffer_size
= 50000;
1020 static pcre_uint8
*buffer
= NULL
;
1021 static pcre_uint8
*pbuffer
= NULL
;
1023 /* Just as a safety check, make sure that COMPILE_PCRE[16|32] are *not* set. */
1025 #ifdef COMPILE_PCRE16
1026 #error COMPILE_PCRE16 must not be set when compiling pcretest.c
1029 #ifdef COMPILE_PCRE32
1030 #error COMPILE_PCRE32 must not be set when compiling pcretest.c
1033 /* We need buffers for building 16/32-bit strings, and the tables of operator
1034 lengths that are used for 16/32-bit compiling, in order to swap bytes in a
1035 pattern for saving/reloading testing. Luckily, the data for these tables is
1036 defined as a macro. However, we must ensure that LINK_SIZE and IMM2_SIZE (which
1037 are used in the tables) are adjusted appropriately for the 16/32-bit world.
1038 LINK_SIZE is also used later in this program. */
1040 #ifdef SUPPORT_PCRE16
1047 #elif LINK_SIZE == 3 || LINK_SIZE == 4
1051 #error LINK_SIZE must be either 2, 3, or 4
1054 static int buffer16_size
= 0;
1055 static pcre_uint16
*buffer16
= NULL
;
1056 static const pcre_uint16 OP_lengths16
[] = { OP_LENGTHS
};
1057 #endif /* SUPPORT_PCRE16 */
1059 #ifdef SUPPORT_PCRE32
1065 static int buffer32_size
= 0;
1066 static pcre_uint32
*buffer32
= NULL
;
1067 static const pcre_uint32 OP_lengths32
[] = { OP_LENGTHS
};
1068 #endif /* SUPPORT_PCRE32 */
1070 /* If we have 8-bit support, default to it; if there is also 16-or 32-bit
1071 support, it can be changed by an option. If there is no 8-bit support, there
1072 must be 16-or 32-bit support, so default it to 1. */
1074 #if defined SUPPORT_PCRE8
1075 static int pcre_mode
= PCRE8_MODE
;
1076 #elif defined SUPPORT_PCRE16
1077 static int pcre_mode
= PCRE16_MODE
;
1078 #elif defined SUPPORT_PCRE32
1079 static int pcre_mode
= PCRE32_MODE
;
1082 /* JIT study options for -s+n and /S+n where '1' <= n <= '7'. */
1084 static int jit_study_bits
[] =
1086 PCRE_STUDY_JIT_COMPILE
,
1087 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE
,
1088 PCRE_STUDY_JIT_COMPILE
+ PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE
,
1089 PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
,
1090 PCRE_STUDY_JIT_COMPILE
+ PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
,
1091 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE
+ PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
,
1092 PCRE_STUDY_JIT_COMPILE
+ PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE
+
1093 PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
1096 #define PCRE_STUDY_ALLJIT (PCRE_STUDY_JIT_COMPILE | \
1097 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE | PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE)
1099 /* Textual explanations for runtime error codes */
1101 static const char *errtexts
[] = {
1102 NULL
, /* 0 is no error */
1103 NULL
, /* NOMATCH is handled specially */
1104 "NULL argument passed",
1106 "magic number missing",
1107 "unknown opcode - pattern overwritten?",
1109 NULL
, /* never returned by pcre_exec() or pcre_dfa_exec() */
1110 "match limit exceeded",
1111 "callout error code",
1112 NULL
, /* BADUTF8/16 is handled specially */
1113 NULL
, /* BADUTF8/16 offset is handled specially */
1114 NULL
, /* PARTIAL is handled specially */
1115 "not used - internal error",
1116 "internal error - pattern overwritten?",
1118 "item unsupported for DFA matching",
1119 "backreference condition or recursion test not supported for DFA matching",
1120 "match limit not supported for DFA matching",
1121 "workspace size exceeded in DFA matching",
1122 "too much recursion for DFA matching",
1123 "recursion limit exceeded",
1124 "not used - internal error",
1125 "invalid combination of newline options",
1127 NULL
, /* SHORTUTF8/16 is handled specially */
1128 "nested recursion at the same subject position",
1129 "JIT stack limit reached",
1130 "pattern compiled in wrong mode: 8-bit/16-bit error",
1131 "pattern compiled with other endianness",
1132 "invalid data in workspace for DFA restart",
1138 /*************************************************
1139 * Alternate character tables *
1140 *************************************************/
1142 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
1143 using the default tables of the library. However, the T option can be used to
1144 select alternate sets of tables, for different kinds of testing. Note also that
1145 the L (locale) option also adjusts the tables. */
1147 /* This is the set of tables distributed as default with PCRE. It recognizes
1148 only ASCII characters. */
1150 static const pcre_uint8 tables0
[] = {
1152 /* This table is a lower casing table. */
1154 0, 1, 2, 3, 4, 5, 6, 7,
1155 8, 9, 10, 11, 12, 13, 14, 15,
1156 16, 17, 18, 19, 20, 21, 22, 23,
1157 24, 25, 26, 27, 28, 29, 30, 31,
1158 32, 33, 34, 35, 36, 37, 38, 39,
1159 40, 41, 42, 43, 44, 45, 46, 47,
1160 48, 49, 50, 51, 52, 53, 54, 55,
1161 56, 57, 58, 59, 60, 61, 62, 63,
1162 64, 97, 98, 99,100,101,102,103,
1163 104,105,106,107,108,109,110,111,
1164 112,113,114,115,116,117,118,119,
1165 120,121,122, 91, 92, 93, 94, 95,
1166 96, 97, 98, 99,100,101,102,103,
1167 104,105,106,107,108,109,110,111,
1168 112,113,114,115,116,117,118,119,
1169 120,121,122,123,124,125,126,127,
1170 128,129,130,131,132,133,134,135,
1171 136,137,138,139,140,141,142,143,
1172 144,145,146,147,148,149,150,151,
1173 152,153,154,155,156,157,158,159,
1174 160,161,162,163,164,165,166,167,
1175 168,169,170,171,172,173,174,175,
1176 176,177,178,179,180,181,182,183,
1177 184,185,186,187,188,189,190,191,
1178 192,193,194,195,196,197,198,199,
1179 200,201,202,203,204,205,206,207,
1180 208,209,210,211,212,213,214,215,
1181 216,217,218,219,220,221,222,223,
1182 224,225,226,227,228,229,230,231,
1183 232,233,234,235,236,237,238,239,
1184 240,241,242,243,244,245,246,247,
1185 248,249,250,251,252,253,254,255,
1187 /* This table is a case flipping table. */
1189 0, 1, 2, 3, 4, 5, 6, 7,
1190 8, 9, 10, 11, 12, 13, 14, 15,
1191 16, 17, 18, 19, 20, 21, 22, 23,
1192 24, 25, 26, 27, 28, 29, 30, 31,
1193 32, 33, 34, 35, 36, 37, 38, 39,
1194 40, 41, 42, 43, 44, 45, 46, 47,
1195 48, 49, 50, 51, 52, 53, 54, 55,
1196 56, 57, 58, 59, 60, 61, 62, 63,
1197 64, 97, 98, 99,100,101,102,103,
1198 104,105,106,107,108,109,110,111,
1199 112,113,114,115,116,117,118,119,
1200 120,121,122, 91, 92, 93, 94, 95,
1201 96, 65, 66, 67, 68, 69, 70, 71,
1202 72, 73, 74, 75, 76, 77, 78, 79,
1203 80, 81, 82, 83, 84, 85, 86, 87,
1204 88, 89, 90,123,124,125,126,127,
1205 128,129,130,131,132,133,134,135,
1206 136,137,138,139,140,141,142,143,
1207 144,145,146,147,148,149,150,151,
1208 152,153,154,155,156,157,158,159,
1209 160,161,162,163,164,165,166,167,
1210 168,169,170,171,172,173,174,175,
1211 176,177,178,179,180,181,182,183,
1212 184,185,186,187,188,189,190,191,
1213 192,193,194,195,196,197,198,199,
1214 200,201,202,203,204,205,206,207,
1215 208,209,210,211,212,213,214,215,
1216 216,217,218,219,220,221,222,223,
1217 224,225,226,227,228,229,230,231,
1218 232,233,234,235,236,237,238,239,
1219 240,241,242,243,244,245,246,247,
1220 248,249,250,251,252,253,254,255,
1222 /* This table contains bit maps for various character classes. Each map is 32
1223 bytes long and the bits run from the least significant end of each byte. The
1224 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
1225 graph, print, punct, and cntrl. Other classes are built from combinations. */
1227 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
1228 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1229 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1230 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1232 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1233 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
1234 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1235 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1237 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1238 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1239 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1240 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1242 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1243 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
1244 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1245 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1247 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1248 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
1249 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1250 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1252 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1253 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
1254 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1255 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1257 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
1258 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
1259 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1260 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1262 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
1263 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
1264 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1265 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1267 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
1268 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
1269 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1270 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1272 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
1273 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
1274 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1275 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1277 /* This table identifies various classes of character by individual bits:
1278 0x01 white space character
1281 0x08 hexadecimal digit
1282 0x10 alphanumeric or '_'
1283 0x80 regular expression metacharacter or binary zero
1286 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
1287 0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
1288 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
1289 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
1290 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
1291 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
1292 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
1293 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
1294 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
1295 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
1296 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
1297 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
1298 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
1299 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
1300 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
1301 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
1302 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
1303 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
1304 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
1305 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
1306 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
1307 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
1308 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
1309 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
1310 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
1311 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
1312 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
1313 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
1314 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
1315 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
1316 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
1317 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
1319 /* This is a set of tables that came orginally from a Windows user. It seems to
1320 be at least an approximation of ISO 8859. In particular, there are characters
1321 greater than 128 that are marked as spaces, letters, etc. */
1323 static const pcre_uint8 tables1
[] = {
1325 8,9,10,11,12,13,14,15,
1326 16,17,18,19,20,21,22,23,
1327 24,25,26,27,28,29,30,31,
1328 32,33,34,35,36,37,38,39,
1329 40,41,42,43,44,45,46,47,
1330 48,49,50,51,52,53,54,55,
1331 56,57,58,59,60,61,62,63,
1332 64,97,98,99,100,101,102,103,
1333 104,105,106,107,108,109,110,111,
1334 112,113,114,115,116,117,118,119,
1335 120,121,122,91,92,93,94,95,
1336 96,97,98,99,100,101,102,103,
1337 104,105,106,107,108,109,110,111,
1338 112,113,114,115,116,117,118,119,
1339 120,121,122,123,124,125,126,127,
1340 128,129,130,131,132,133,134,135,
1341 136,137,138,139,140,141,142,143,
1342 144,145,146,147,148,149,150,151,
1343 152,153,154,155,156,157,158,159,
1344 160,161,162,163,164,165,166,167,
1345 168,169,170,171,172,173,174,175,
1346 176,177,178,179,180,181,182,183,
1347 184,185,186,187,188,189,190,191,
1348 224,225,226,227,228,229,230,231,
1349 232,233,234,235,236,237,238,239,
1350 240,241,242,243,244,245,246,215,
1351 248,249,250,251,252,253,254,223,
1352 224,225,226,227,228,229,230,231,
1353 232,233,234,235,236,237,238,239,
1354 240,241,242,243,244,245,246,247,
1355 248,249,250,251,252,253,254,255,
1357 8,9,10,11,12,13,14,15,
1358 16,17,18,19,20,21,22,23,
1359 24,25,26,27,28,29,30,31,
1360 32,33,34,35,36,37,38,39,
1361 40,41,42,43,44,45,46,47,
1362 48,49,50,51,52,53,54,55,
1363 56,57,58,59,60,61,62,63,
1364 64,97,98,99,100,101,102,103,
1365 104,105,106,107,108,109,110,111,
1366 112,113,114,115,116,117,118,119,
1367 120,121,122,91,92,93,94,95,
1368 96,65,66,67,68,69,70,71,
1369 72,73,74,75,76,77,78,79,
1370 80,81,82,83,84,85,86,87,
1371 88,89,90,123,124,125,126,127,
1372 128,129,130,131,132,133,134,135,
1373 136,137,138,139,140,141,142,143,
1374 144,145,146,147,148,149,150,151,
1375 152,153,154,155,156,157,158,159,
1376 160,161,162,163,164,165,166,167,
1377 168,169,170,171,172,173,174,175,
1378 176,177,178,179,180,181,182,183,
1379 184,185,186,187,188,189,190,191,
1380 224,225,226,227,228,229,230,231,
1381 232,233,234,235,236,237,238,239,
1382 240,241,242,243,244,245,246,215,
1383 248,249,250,251,252,253,254,223,
1384 192,193,194,195,196,197,198,199,
1385 200,201,202,203,204,205,206,207,
1386 208,209,210,211,212,213,214,247,
1387 216,217,218,219,220,221,222,255,
1393 126,0,0,0,126,0,0,0,
1401 254,255,255,7,0,0,0,0,
1403 255,255,127,127,0,0,0,0,
1405 0,0,0,0,254,255,255,7,
1407 0,0,0,128,255,255,127,255,
1409 254,255,255,135,254,255,255,7,
1411 255,255,127,255,255,255,127,255,
1412 0,0,0,0,254,255,255,255,
1413 255,255,255,255,255,255,255,127,
1414 0,0,0,0,254,255,255,255,
1415 255,255,255,255,255,255,255,255,
1416 0,2,0,0,255,255,255,255,
1417 255,255,255,255,255,255,255,127,
1418 0,0,0,0,255,255,255,255,
1419 255,255,255,255,255,255,255,255,
1420 0,0,0,0,254,255,0,252,
1421 1,0,0,248,1,0,0,120,
1422 0,0,0,0,254,255,255,255,
1423 0,0,128,0,0,0,128,0,
1424 255,255,255,255,0,0,0,0,
1426 255,255,255,255,0,0,0,0,
1433 128,128,128,128,0,0,128,0,
1434 28,28,28,28,28,28,28,28,
1435 28,28,0,0,0,0,0,128,
1436 0,26,26,26,26,26,26,18,
1437 18,18,18,18,18,18,18,18,
1438 18,18,18,18,18,18,18,18,
1439 18,18,18,128,128,0,128,16,
1440 0,26,26,26,26,26,26,18,
1441 18,18,18,18,18,18,18,18,
1442 18,18,18,18,18,18,18,18,
1443 18,18,18,128,128,0,0,0,
1452 18,18,18,18,18,18,18,18,
1453 18,18,18,18,18,18,18,18,
1454 18,18,18,18,18,18,18,0,
1455 18,18,18,18,18,18,18,18,
1456 18,18,18,18,18,18,18,18,
1457 18,18,18,18,18,18,18,18,
1458 18,18,18,18,18,18,18,0,
1459 18,18,18,18,18,18,18,18
1465 #ifndef HAVE_STRERROR
1466 /*************************************************
1467 * Provide strerror() for non-ANSI libraries *
1468 *************************************************/
1470 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1471 in their libraries, but can provide the same facility by this simple
1472 alternative function. */
1474 extern int sys_nerr
;
1475 extern char *sys_errlist
[];
1480 if (n
< 0 || n
>= sys_nerr
) return "unknown error number";
1481 return sys_errlist
[n
];
1483 #endif /* HAVE_STRERROR */
1487 /*************************************************
1488 * Print newline configuration *
1489 *************************************************/
1493 rc the return code from PCRE_CONFIG_NEWLINE
1494 isc TRUE if called from "-C newline"
1499 print_newline_config(int rc
, BOOL isc
)
1501 const char *s
= NULL
;
1502 if (!isc
) printf(" Newline sequence is ");
1505 case CHAR_CR
: s
= "CR"; break;
1506 case CHAR_LF
: s
= "LF"; break;
1507 case (CHAR_CR
<<8 | CHAR_LF
): s
= "CRLF"; break;
1508 case -1: s
= "ANY"; break;
1509 case -2: s
= "ANYCRLF"; break;
1512 printf("a non-standard value: 0x%04x\n", rc
);
1521 /*************************************************
1522 * JIT memory callback *
1523 *************************************************/
1525 static pcre_jit_stack
* jit_callback(void *arg
)
1527 jit_was_used
= TRUE
;
1528 return (pcre_jit_stack
*)arg
;
1532 #if !defined NOUTF || defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32
1533 /*************************************************
1534 * Convert UTF-8 string to value *
1535 *************************************************/
1537 /* This function takes one or more bytes that represents a UTF-8 character,
1538 and returns the value of the character.
1541 utf8bytes a pointer to the byte vector
1542 vptr a pointer to an int to receive the value
1544 Returns: > 0 => the number of bytes consumed
1545 -6 to 0 => malformed UTF-8 character at offset = (-return)
1549 utf82ord(pcre_uint8
*utf8bytes
, pcre_uint32
*vptr
)
1551 pcre_uint32 c
= *utf8bytes
++;
1555 for (i
= -1; i
< 6; i
++) /* i is number of additional bytes */
1557 if ((d
& 0x80) == 0) break;
1561 if (i
== -1) { *vptr
= c
; return 1; } /* ascii character */
1562 if (i
== 0 || i
== 6) return 0; /* invalid UTF-8 */
1564 /* i now has a value in the range 1-5 */
1567 d
= (c
& utf8_table3
[i
]) << s
;
1569 for (j
= 0; j
< i
; j
++)
1572 if ((c
& 0xc0) != 0x80) return -(j
+1);
1574 d
|= (c
& 0x3f) << s
;
1577 /* Check that encoding was the correct unique one */
1579 for (j
= 0; j
< utf8_table1_size
; j
++)
1580 if (d
<= (pcre_uint32
)utf8_table1
[j
]) break;
1581 if (j
!= i
) return -(i
+1);
1588 #endif /* NOUTF || SUPPORT_PCRE16 */
1592 #if defined SUPPORT_PCRE8 && !defined NOUTF
1593 /*************************************************
1594 * Convert character value to UTF-8 *
1595 *************************************************/
1597 /* This function takes an integer value in the range 0 - 0x7fffffff
1598 and encodes it as a UTF-8 character in 0 to 6 bytes.
1601 cvalue the character value
1602 utf8bytes pointer to buffer for result - at least 6 bytes long
1604 Returns: number of characters placed in the buffer
1608 ord2utf8(pcre_uint32 cvalue
, pcre_uint8
*utf8bytes
)
1611 if (cvalue
> 0x7fffffffu
)
1613 for (i
= 0; i
< utf8_table1_size
; i
++)
1614 if (cvalue
<= (pcre_uint32
)utf8_table1
[i
]) break;
1616 for (j
= i
; j
> 0; j
--)
1618 *utf8bytes
-- = 0x80 | (cvalue
& 0x3f);
1621 *utf8bytes
= utf8_table2
[i
] | cvalue
;
1627 #ifdef SUPPORT_PCRE16
1628 /*************************************************
1629 * Convert a string to 16-bit *
1630 *************************************************/
1632 /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1633 8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1634 double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1635 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1636 result is always left in buffer16.
1638 Note that this function does not object to surrogate values. This is
1639 deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1640 for the purpose of testing that they are correctly faulted.
1642 Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1643 in UTF-8 so that values greater than 255 can be handled.
1646 data TRUE if converting a data line; FALSE for a regex
1647 p points to a byte string
1648 utf true if UTF-8 (to be converted to UTF-16)
1649 len number of bytes in the string (excluding trailing zero)
1651 Returns: number of 16-bit data items used (excluding trailing zero)
1652 OR -1 if a UTF-8 string is malformed
1653 OR -2 if a value > 0x10ffff is encountered
1654 OR -3 if a value > 0xffff is encountered when not in UTF mode
1658 to16(int data
, pcre_uint8
*p
, int utf
, int len
)
1662 if (buffer16_size
< 2*len
+ 2)
1664 if (buffer16
!= NULL
) free(buffer16
);
1665 buffer16_size
= 2*len
+ 2;
1666 buffer16
= (pcre_uint16
*)malloc(buffer16_size
);
1667 if (buffer16
== NULL
)
1669 fprintf(stderr
, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size
);
1678 while (len
-- > 0) *pp
++ = *p
++;
1686 int chlen
= utf82ord(p
, &c
);
1687 if (chlen
<= 0) return -1;
1688 if (c
> 0x10ffff) return -2;
1691 if (c
< 0x10000) *pp
++ = c
; else
1693 if (!utf
) return -3;
1695 *pp
++ = 0xD800 | (c
>> 10);
1696 *pp
++ = 0xDC00 | (c
& 0x3ff);
1702 return pp
- buffer16
;
1706 #ifdef SUPPORT_PCRE32
1707 /*************************************************
1708 * Convert a string to 32-bit *
1709 *************************************************/
1711 /* In non-UTF mode, the space needed for a 32-bit string is exactly four times the
1712 8-bit size. For a UTF-8 string, the size needed for UTF-32 is no more than four
1713 times, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1714 in UTF-32. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-32. The
1715 result is always left in buffer32.
1717 Note that this function does not object to surrogate values. This is
1718 deliberate; it makes it possible to construct UTF-32 strings that are invalid,
1719 for the purpose of testing that they are correctly faulted.
1721 Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1722 in UTF-8 so that values greater than 255 can be handled.
1725 data TRUE if converting a data line; FALSE for a regex
1726 p points to a byte string
1727 utf true if UTF-8 (to be converted to UTF-32)
1728 len number of bytes in the string (excluding trailing zero)
1730 Returns: number of 32-bit data items used (excluding trailing zero)
1731 OR -1 if a UTF-8 string is malformed
1732 OR -2 if a value > 0x10ffff is encountered
1733 OR -3 if an ill-formed value is encountered (i.e. a surrogate)
1737 to32(int data
, pcre_uint8
*p
, int utf
, int len
)
1741 if (buffer32_size
< 4*len
+ 4)
1743 if (buffer32
!= NULL
) free(buffer32
);
1744 buffer32_size
= 4*len
+ 4;
1745 buffer32
= (pcre_uint32
*)malloc(buffer32_size
);
1746 if (buffer32
== NULL
)
1748 fprintf(stderr
, "pcretest: malloc(%d) failed for buffer32\n", buffer32_size
);
1757 while (len
-- > 0) *pp
++ = *p
++;
1765 int chlen
= utf82ord(p
, &c
);
1766 if (chlen
<= 0) return -1;
1769 if (c
> 0x10ffff) return -2;
1770 if (!data
&& (c
& 0xfffff800u
) == 0xd800u
) return -3;
1780 return pp
- buffer32
;
1783 /* Check that a 32-bit character string is valid UTF-32.
1786 string points to the string
1787 length length of string, or -1 if the string is zero-terminated
1789 Returns: TRUE if the string is a valid UTF-32 string
1797 valid_utf32(pcre_uint32
*string
, int length
)
1799 register pcre_uint32
*p
;
1800 register pcre_uint32 c
;
1802 for (p
= string
; length
-- > 0; p
++)
1810 if ((c
& 0xfffff800u
) == 0xd800u
)
1814 if ((c
& 0xfffeu
) == 0xfffeu
|| (c
>= 0xfdd0u
&& c
<= 0xfdefu
))
1820 #endif /* SUPPORT_UTF */
1827 /*************************************************
1828 * Read or extend an input line *
1829 *************************************************/
1831 /* Input lines are read into buffer, but both patterns and data lines can be
1832 continued over multiple input lines. In addition, if the buffer fills up, we
1833 want to automatically expand it so as to be able to handle extremely large
1834 lines that are needed for certain stress tests. When the input buffer is
1835 expanded, the other two buffers must also be expanded likewise, and the
1836 contents of pbuffer, which are a copy of the input for callouts, must be
1837 preserved (for when expansion happens for a data line). This is not the most
1838 optimal way of handling this, but hey, this is just a test program!
1842 start where in buffer to start (this *must* be within buffer)
1843 prompt for stdin or readline()
1845 Returns: pointer to the start of new data
1846 could be a copy of start, or could be moved
1847 NULL if no data read and EOF reached
1851 extend_inputline(FILE *f
, pcre_uint8
*start
, const char *prompt
)
1853 pcre_uint8
*here
= start
;
1857 size_t rlen
= (size_t)(buffer_size
- (here
- buffer
));
1863 /* If libreadline or libedit support is required, use readline() to read a
1864 line if the input is a terminal. Note that readline() removes the trailing
1865 newline, so we must put it back again, to be compatible with fgets(). */
1867 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
1868 if (isatty(fileno(f
)))
1871 char *s
= readline(prompt
);
1872 if (s
== NULL
) return (here
== start
)? NULL
: start
;
1874 if (len
> 0) add_history(s
);
1875 if (len
> rlen
- 1) len
= rlen
- 1;
1876 memcpy(here
, s
, len
);
1884 /* Read the next line by normal means, prompting if the file is stdin. */
1887 if (f
== stdin
) printf("%s", prompt
);
1888 if (fgets((char *)here
, rlen
, f
) == NULL
)
1889 return (here
== start
)? NULL
: start
;
1892 dlen
= (int)strlen((char *)here
);
1893 if (dlen
> 0 && here
[dlen
- 1] == '\n') return start
;
1899 int new_buffer_size
= 2*buffer_size
;
1900 pcre_uint8
*new_buffer
= (pcre_uint8
*)malloc(new_buffer_size
);
1901 pcre_uint8
*new_pbuffer
= (pcre_uint8
*)malloc(new_buffer_size
);
1903 if (new_buffer
== NULL
|| new_pbuffer
== NULL
)
1905 fprintf(stderr
, "pcretest: malloc(%d) failed\n", new_buffer_size
);
1909 memcpy(new_buffer
, buffer
, buffer_size
);
1910 memcpy(new_pbuffer
, pbuffer
, buffer_size
);
1912 buffer_size
= new_buffer_size
;
1914 start
= new_buffer
+ (start
- buffer
);
1915 here
= new_buffer
+ (here
- buffer
);
1920 buffer
= new_buffer
;
1921 pbuffer
= new_pbuffer
;
1925 return NULL
; /* Control never gets here */
1930 /*************************************************
1931 * Read number from string *
1932 *************************************************/
1934 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1935 around with conditional compilation, just do the job by hand. It is only used
1936 for unpicking arguments, so just keep it simple.
1939 str string to be converted
1940 endptr where to put the end pointer
1942 Returns: the unsigned long
1946 get_value(pcre_uint8
*str
, pcre_uint8
**endptr
)
1949 while(*str
!= 0 && isspace(*str
)) str
++;
1950 while (isdigit(*str
)) result
= result
* 10 + (int)(*str
++ - '0');
1957 /*************************************************
1958 * Print one character *
1959 *************************************************/
1961 /* Print a single character either literally, or as a hex escape. */
1963 static int pchar(pcre_uint32 c
, FILE *f
)
1968 if (f
!= NULL
) fprintf(f
, "%c", c
);
1976 if (f
!= NULL
) fprintf(f
, "\\x{%02x}", c
);
1981 if (f
!= NULL
) fprintf(f
, "\\x%02x", c
);
1986 if (f
!= NULL
) n
= fprintf(f
, "\\x{%02x}", c
);
1987 return n
>= 0 ? n
: 0;
1992 #ifdef SUPPORT_PCRE8
1993 /*************************************************
1994 * Print 8-bit character string *
1995 *************************************************/
1997 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1998 If handed a NULL file, just counts chars without printing. */
2000 static int pchars(pcre_uint8
*p
, int length
, FILE *f
)
2006 length
= strlen((char *)p
);
2008 while (length
-- > 0)
2013 int rc
= utf82ord(p
, &c
);
2014 if (rc
> 0 && rc
<= length
+ 1) /* Mustn't run over the end */
2018 yield
+= pchar(c
, f
);
2024 yield
+= pchar(c
, f
);
2033 #ifdef SUPPORT_PCRE16
2034 /*************************************************
2035 * Find length of 0-terminated 16-bit string *
2036 *************************************************/
2038 static int strlen16(PCRE_SPTR16 p
)
2041 while (*p
++ != 0) len
++;
2044 #endif /* SUPPORT_PCRE16 */
2048 #ifdef SUPPORT_PCRE32
2049 /*************************************************
2050 * Find length of 0-terminated 32-bit string *
2051 *************************************************/
2053 static int strlen32(PCRE_SPTR32 p
)
2056 while (*p
++ != 0) len
++;
2059 #endif /* SUPPORT_PCRE32 */
2063 #ifdef SUPPORT_PCRE16
2064 /*************************************************
2065 * Print 16-bit character string *
2066 *************************************************/
2068 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
2069 If handed a NULL file, just counts chars without printing. */
2071 static int pchars16(PCRE_SPTR16 p
, int length
, FILE *f
)
2076 length
= strlen16(p
);
2078 while (length
-- > 0)
2080 pcre_uint32 c
= *p
++ & 0xffff;
2082 if (use_utf
&& c
>= 0xD800 && c
< 0xDC00 && length
> 0)
2084 int d
= *p
& 0xffff;
2085 if (d
>= 0xDC00 && d
< 0xDFFF)
2087 c
= ((c
& 0x3ff) << 10) + (d
& 0x3ff) + 0x10000;
2093 yield
+= pchar(c
, f
);
2098 #endif /* SUPPORT_PCRE16 */
2102 #ifdef SUPPORT_PCRE32
2103 /*************************************************
2104 * Print 32-bit character string *
2105 *************************************************/
2107 /* Must handle UTF-32 strings in utf mode. Yields number of characters printed.
2108 If handed a NULL file, just counts chars without printing. */
2110 static int pchars32(PCRE_SPTR32 p
, int length
, BOOL utf
, FILE *f
)
2114 (void)(utf
); /* Avoid compiler warning */
2117 length
= strlen32(p
);
2119 while (length
-- > 0)
2121 pcre_uint32 c
= *p
++;
2122 yield
+= pchar(c
, f
);
2127 #endif /* SUPPORT_PCRE32 */
2131 #ifdef SUPPORT_PCRE8
2132 /*************************************************
2133 * Read a capture name (8-bit) and check it *
2134 *************************************************/
2137 read_capture_name8(pcre_uint8
*p
, pcre_uint8
**pp
, pcre
*re
)
2139 pcre_uint8
*npp
= *pp
;
2140 while (isalnum(*p
)) *npp
++ = *p
++;
2143 if (pcre_get_stringnumber(re
, (char *)(*pp
)) < 0)
2145 fprintf(outfile
, "no parentheses with name \"");
2146 PCHARSV(*pp
, 0, -1, outfile
);
2147 fprintf(outfile
, "\"\n");
2153 #endif /* SUPPORT_PCRE8 */
2157 #ifdef SUPPORT_PCRE16
2158 /*************************************************
2159 * Read a capture name (16-bit) and check it *
2160 *************************************************/
2162 /* Note that the text being read is 8-bit. */
2165 read_capture_name16(pcre_uint8
*p
, pcre_uint16
**pp
, pcre
*re
)
2167 pcre_uint16
*npp
= *pp
;
2168 while (isalnum(*p
)) *npp
++ = *p
++;
2171 if (pcre16_get_stringnumber((pcre16
*)re
, (PCRE_SPTR16
)(*pp
)) < 0)
2173 fprintf(outfile
, "no parentheses with name \"");
2174 PCHARSV(*pp
, 0, -1, outfile
);
2175 fprintf(outfile
, "\"\n");
2180 #endif /* SUPPORT_PCRE16 */
2184 #ifdef SUPPORT_PCRE32
2185 /*************************************************
2186 * Read a capture name (32-bit) and check it *
2187 *************************************************/
2189 /* Note that the text being read is 8-bit. */
2192 read_capture_name32(pcre_uint8
*p
, pcre_uint32
**pp
, pcre
*re
)
2194 pcre_uint32
*npp
= *pp
;
2195 while (isalnum(*p
)) *npp
++ = *p
++;
2198 if (pcre32_get_stringnumber((pcre32
*)re
, (PCRE_SPTR32
)(*pp
)) < 0)
2200 fprintf(outfile
, "no parentheses with name \"");
2201 PCHARSV(*pp
, 0, -1, outfile
);
2202 fprintf(outfile
, "\"\n");
2207 #endif /* SUPPORT_PCRE32 */
2211 /*************************************************
2212 * Callout function *
2213 *************************************************/
2215 /* Called from PCRE as a result of the (?C) item. We print out where we are in
2216 the match. Yield zero unless more callouts than the fail count, or the callout
2217 data is not zero. */
2219 static int callout(pcre_callout_block
*cb
)
2221 FILE *f
= (first_callout
| callout_extra
)? outfile
: NULL
;
2222 int i
, pre_start
, post_start
, subject_length
;
2226 fprintf(f
, "Callout %d: last capture = %d\n",
2227 cb
->callout_number
, cb
->capture_last
);
2229 for (i
= 0; i
< cb
->capture_top
* 2; i
+= 2)
2231 if (cb
->offset_vector
[i
] < 0)
2232 fprintf(f
, "%2d: <unset>\n", i
/2);
2235 fprintf(f
, "%2d: ", i
/2);
2236 PCHARSV(cb
->subject
, cb
->offset_vector
[i
],
2237 cb
->offset_vector
[i
+1] - cb
->offset_vector
[i
], f
);
2243 /* Re-print the subject in canonical form, the first time or if giving full
2244 datails. On subsequent calls in the same match, we use pchars just to find the
2245 printed lengths of the substrings. */
2247 if (f
!= NULL
) fprintf(f
, "--->");
2249 PCHARS(pre_start
, cb
->subject
, 0, cb
->start_match
, f
);
2250 PCHARS(post_start
, cb
->subject
, cb
->start_match
,
2251 cb
->current_position
- cb
->start_match
, f
);
2253 PCHARS(subject_length
, cb
->subject
, 0, cb
->subject_length
, NULL
);
2255 PCHARSV(cb
->subject
, cb
->current_position
,
2256 cb
->subject_length
- cb
->current_position
, f
);
2258 if (f
!= NULL
) fprintf(f
, "\n");
2260 /* Always print appropriate indicators, with callout number if not already
2261 shown. For automatic callouts, show the pattern offset. */
2263 if (cb
->callout_number
== 255)
2265 fprintf(outfile
, "%+3d ", cb
->pattern_position
);
2266 if (cb
->pattern_position
> 99) fprintf(outfile
, "\n ");
2270 if (callout_extra
) fprintf(outfile
, " ");
2271 else fprintf(outfile
, "%3d ", cb
->callout_number
);
2274 for (i
= 0; i
< pre_start
; i
++) fprintf(outfile
, " ");
2275 fprintf(outfile
, "^");
2279 for (i
= 0; i
< post_start
- 1; i
++) fprintf(outfile
, " ");
2280 fprintf(outfile
, "^");
2283 for (i
= 0; i
< subject_length
- pre_start
- post_start
+ 4; i
++)
2284 fprintf(outfile
, " ");
2286 fprintf(outfile
, "%.*s", (cb
->next_item_length
== 0)? 1 : cb
->next_item_length
,
2287 pbuffer
+ cb
->pattern_position
);
2289 fprintf(outfile
, "\n");
2292 if (cb
->mark
!= last_callout_mark
)
2294 if (cb
->mark
== NULL
)
2295 fprintf(outfile
, "Latest Mark: <unset>\n");
2298 fprintf(outfile
, "Latest Mark: ");
2299 PCHARSV(cb
->mark
, 0, -1, outfile
);
2300 putc('\n', outfile
);
2302 last_callout_mark
= cb
->mark
;
2305 if (cb
->callout_data
!= NULL
)
2307 int callout_data
= *((int *)(cb
->callout_data
));
2308 if (callout_data
!= 0)
2310 fprintf(outfile
, "Callout data = %d\n", callout_data
);
2311 return callout_data
;
2315 return (cb
->callout_number
!= callout_fail_id
)? 0 :
2316 (++callout_count
>= callout_fail_count
)? 1 : 0;
2320 /*************************************************
2321 * Local malloc functions *
2322 *************************************************/
2324 /* Alternative malloc function, to test functionality and save the size of a
2325 compiled re, which is the first store request that pcre_compile() makes. The
2326 show_malloc variable is set only during matching. */
2328 static void *new_malloc(size_t size
)
2330 void *block
= malloc(size
);
2331 gotten_store
= size
;
2332 if (first_gotten_store
== 0) first_gotten_store
= size
;
2334 fprintf(outfile
, "malloc %3d %p\n", (int)size
, block
);
2338 static void new_free(void *block
)
2341 fprintf(outfile
, "free %p\n", block
);
2345 /* For recursion malloc/free, to test stacking calls */
2347 static void *stack_malloc(size_t size
)
2349 void *block
= malloc(size
);
2351 fprintf(outfile
, "stack_malloc %3d %p\n", (int)size
, block
);
2355 static void stack_free(void *block
)
2358 fprintf(outfile
, "stack_free %p\n", block
);
2363 /*************************************************
2364 * Call pcre_fullinfo() *
2365 *************************************************/
2367 /* Get one piece of information from the pcre_fullinfo() function. When only
2368 one of 8-, 16- or 32-bit is supported, pcre_mode should always have the correct
2369 value, but the code is defensive.
2374 option PCRE_INFO_xxx option
2375 ptr where to put the data
2377 Returns: 0 when OK, < 0 on error
2381 new_info(pcre
*re
, pcre_extra
*study
, int option
, void *ptr
)
2385 if (pcre_mode
== PCRE32_MODE
)
2386 #ifdef SUPPORT_PCRE32
2387 rc
= pcre32_fullinfo((pcre32
*)re
, (pcre32_extra
*)study
, option
, ptr
);
2389 rc
= PCRE_ERROR_BADMODE
;
2391 else if (pcre_mode
== PCRE16_MODE
)
2392 #ifdef SUPPORT_PCRE16
2393 rc
= pcre16_fullinfo((pcre16
*)re
, (pcre16_extra
*)study
, option
, ptr
);
2395 rc
= PCRE_ERROR_BADMODE
;
2398 #ifdef SUPPORT_PCRE8
2399 rc
= pcre_fullinfo(re
, study
, option
, ptr
);
2401 rc
= PCRE_ERROR_BADMODE
;
2406 fprintf(outfile
, "Error %d from pcre%s_fullinfo(%d)\n", rc
,
2407 pcre_mode
== PCRE32_MODE
? "32" : pcre_mode
== PCRE16_MODE
? "16" : "", option
);
2408 if (rc
== PCRE_ERROR_BADMODE
)
2409 fprintf(outfile
, "Running in %d-bit mode but pattern was compiled in "
2410 "%d-bit mode\n", 8 * CHAR_SIZE
,
2411 8 * (REAL_PCRE_FLAGS(re
) & PCRE_MODE_MASK
));
2419 /*************************************************
2420 * Swap byte functions *
2421 *************************************************/
2423 /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
2424 value, respectively.
2429 Returns: the byte swapped value
2433 swap_uint32(pcre_uint32 value
)
2435 return ((value
& 0x000000ff) << 24) |
2436 ((value
& 0x0000ff00) << 8) |
2437 ((value
& 0x00ff0000) >> 8) |
2442 swap_uint16(pcre_uint16 value
)
2444 return (value
>> 8) | (value
<< 8);
2449 /*************************************************
2450 * Flip bytes in a compiled pattern *
2451 *************************************************/
2453 /* This function is called if the 'F' option was present on a pattern that is
2454 to be written to a file. We flip the bytes of all the integer fields in the
2455 regex data block and the study block. In 16-bit mode this also flips relevant
2456 bytes in the pattern itself. This is to make it possible to test PCRE's
2457 ability to reload byte-flipped patterns, e.g. those compiled on a different
2460 #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
2462 regexflip8_or_16(pcre
*ere
, pcre_extra
*extra
)
2464 real_pcre8_or_16
*re
= (real_pcre8_or_16
*)ere
;
2465 #ifdef SUPPORT_PCRE16
2467 pcre_uint16
*ptr
= (pcre_uint16
*)re
+ re
->name_table_offset
;
2468 int length
= re
->name_count
* re
->name_entry_size
;
2470 BOOL utf
= (re
->options
& PCRE_UTF16
) != 0;
2471 BOOL utf16_char
= FALSE
;
2472 #endif /* SUPPORT_UTF */
2473 #endif /* SUPPORT_PCRE16 */
2475 /* Always flip the bytes in the main data block and study blocks. */
2477 re
->magic_number
= REVERSED_MAGIC_NUMBER
;
2478 re
->size
= swap_uint32(re
->size
);
2479 re
->options
= swap_uint32(re
->options
);
2480 re
->flags
= swap_uint16(re
->flags
);
2481 re
->top_bracket
= swap_uint16(re
->top_bracket
);
2482 re
->top_backref
= swap_uint16(re
->top_backref
);
2483 re
->first_char
= swap_uint16(re
->first_char
);
2484 re
->req_char
= swap_uint16(re
->req_char
);
2485 re
->name_table_offset
= swap_uint16(re
->name_table_offset
);
2486 re
->name_entry_size
= swap_uint16(re
->name_entry_size
);
2487 re
->name_count
= swap_uint16(re
->name_count
);
2491 pcre_study_data
*rsd
= (pcre_study_data
*)(extra
->study_data
);
2492 rsd
->size
= swap_uint32(rsd
->size
);
2493 rsd
->flags
= swap_uint32(rsd
->flags
);
2494 rsd
->minlength
= swap_uint32(rsd
->minlength
);
2497 /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
2498 in the name table, if present, and then in the pattern itself. */
2500 #ifdef SUPPORT_PCRE16
2501 if (pcre_mode
!= PCRE16_MODE
) return;
2505 /* Swap previous characters. */
2506 while (length
-- > 0)
2508 *ptr
= swap_uint16(*ptr
);
2514 if ((ptr
[-1] & 0xfc00) == 0xd800)
2516 /* We know that there is only one extra character in UTF-16. */
2517 *ptr
= swap_uint16(*ptr
);
2522 #endif /* SUPPORT_UTF */
2524 /* Get next opcode. */
2528 *ptr
++ = swap_uint16(op
);
2571 case OP_NOTMINQUERY
:
2577 case OP_NOTPOSQUERY
:
2580 case OP_NOTMINSTARI
:
2582 case OP_NOTMINPLUSI
:
2584 case OP_NOTMINQUERYI
:
2586 case OP_NOTMINUPTOI
:
2588 case OP_NOTPOSSTARI
:
2589 case OP_NOTPOSPLUSI
:
2590 case OP_NOTPOSQUERYI
:
2591 case OP_NOTPOSUPTOI
:
2592 if (utf
) utf16_char
= TRUE
;
2597 length
= OP_lengths16
[op
] - 1;
2602 /* Skip the character bit map. */
2603 ptr
+= 32/sizeof(pcre_uint16
);
2608 /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
2610 length
= (int)((((unsigned int)(ptr
[0]) << 16) | (unsigned int)(ptr
[1]))
2611 - (1 + LINK_SIZE
+ 1));
2613 length
= (int)((unsigned int)(ptr
[0]) - (1 + LINK_SIZE
+ 1));
2615 /* Reverse the size of the XCLASS instance. */
2616 *ptr
= swap_uint16(*ptr
);
2620 *ptr
= swap_uint16(*ptr
);
2625 *ptr
= swap_uint16(op
);
2627 if ((op
& XCL_MAP
) != 0)
2629 /* Skip the character bit map. */
2630 ptr
+= 32/sizeof(pcre_uint16
);
2631 length
-= 32/sizeof(pcre_uint16
);
2636 /* Control should never reach here in 16 bit mode. */
2637 #endif /* SUPPORT_PCRE16 */
2639 #endif /* SUPPORT_PCRE[8|16] */
2643 #if defined SUPPORT_PCRE32
2645 regexflip_32(pcre
*ere
, pcre_extra
*extra
)
2647 real_pcre32
*re
= (real_pcre32
*)ere
;
2649 pcre_uint32
*ptr
= (pcre_uint32
*)re
+ re
->name_table_offset
;
2650 int length
= re
->name_count
* re
->name_entry_size
;
2652 /* Always flip the bytes in the main data block and study blocks. */
2654 re
->magic_number
= REVERSED_MAGIC_NUMBER
;
2655 re
->size
= swap_uint32(re
->size
);
2656 re
->options
= swap_uint32(re
->options
);
2657 re
->flags
= swap_uint16(re
->flags
);
2658 re
->top_bracket
= swap_uint16(re
->top_bracket
);
2659 re
->top_backref
= swap_uint16(re
->top_backref
);
2660 re
->first_char
= swap_uint32(re
->first_char
);
2661 re
->req_char
= swap_uint32(re
->req_char
);
2662 re
->name_table_offset
= swap_uint16(re
->name_table_offset
);
2663 re
->name_entry_size
= swap_uint16(re
->name_entry_size
);
2664 re
->name_count
= swap_uint16(re
->name_count
);
2668 pcre_study_data
*rsd
= (pcre_study_data
*)(extra
->study_data
);
2669 rsd
->size
= swap_uint32(rsd
->size
);
2670 rsd
->flags
= swap_uint32(rsd
->flags
);
2671 rsd
->minlength
= swap_uint32(rsd
->minlength
);
2674 /* In 32-bit mode we must swap bytes in the name table, if present, and then in
2675 the pattern itself. */
2679 /* Swap previous characters. */
2680 while (length
-- > 0)
2682 *ptr
= swap_uint32(*ptr
);
2686 /* Get next opcode. */
2690 *ptr
++ = swap_uint32(op
);
2698 length
= OP_lengths32
[op
] - 1;
2703 /* Skip the character bit map. */
2704 ptr
+= 32/sizeof(pcre_uint32
);
2709 /* LINK_SIZE can only be 1 in 32-bit mode. */
2710 length
= (int)((unsigned int)(ptr
[0]) - (1 + LINK_SIZE
+ 1));
2712 /* Reverse the size of the XCLASS instance. */
2713 *ptr
= swap_uint32(*ptr
);
2717 *ptr
= swap_uint32(op
);
2719 if ((op
& XCL_MAP
) != 0)
2721 /* Skip the character bit map. */
2722 ptr
+= 32/sizeof(pcre_uint32
);
2723 length
-= 32/sizeof(pcre_uint32
);
2728 /* Control should never reach here in 32 bit mode. */
2731 #endif /* SUPPORT_PCRE32 */
2736 regexflip(pcre
*ere
, pcre_extra
*extra
)
2738 #if defined SUPPORT_PCRE32
2739 if (REAL_PCRE_FLAGS(ere
) & PCRE_MODE32
)
2740 regexflip_32(ere
, extra
);
2742 #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
2743 if (REAL_PCRE_FLAGS(ere
) & (PCRE_MODE8
| PCRE_MODE16
))
2744 regexflip8_or_16(ere
, extra
);
2750 /*************************************************
2751 * Check match or recursion limit *
2752 *************************************************/
2755 check_match_limit(pcre
*re
, pcre_extra
*extra
, pcre_uint8
*bptr
, int len
,
2756 int start_offset
, int options
, int *use_offsets
, int use_size_offsets
,
2757 int flag
, unsigned long int *limit
, int errnumber
, const char *msg
)
2764 extra
->flags
|= flag
;
2770 PCRE_EXEC(count
, re
, extra
, bptr
, len
, start_offset
, options
,
2771 use_offsets
, use_size_offsets
);
2773 if (count
== errnumber
)
2775 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2777 mid
= (mid
== max
- 1)? max
: (max
> 0)? (min
+ max
)/2 : mid
*2;
2780 else if (count
>= 0 || count
== PCRE_ERROR_NOMATCH
||
2781 count
== PCRE_ERROR_PARTIAL
)
2785 fprintf(outfile
, "Minimum %s limit = %d\n", msg
, mid
);
2788 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2790 mid
= (min
+ mid
)/2;
2792 else break; /* Some other error */
2795 extra
->flags
&= ~flag
;
2801 /*************************************************
2802 * Case-independent strncmp() function *
2803 *************************************************/
2809 n number of characters to compare
2811 Returns: < 0, = 0, or > 0, according to the comparison
2815 strncmpic(pcre_uint8
*s
, pcre_uint8
*t
, int n
)
2819 int c
= tolower(*s
++) - tolower(*t
++);
2827 /*************************************************
2828 * Check newline indicator *
2829 *************************************************/
2831 /* This is used both at compile and run-time to check for <xxx> escapes. Print
2832 a message and return 0 if there is no match.
2835 p points after the leading '<'
2836 f file for error message
2838 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
2842 check_newline(pcre_uint8
*p
, FILE *f
)
2844 if (strncmpic(p
, (pcre_uint8
*)"cr>", 3) == 0) return PCRE_NEWLINE_CR
;
2845 if (strncmpic(p
, (pcre_uint8
*)"lf>", 3) == 0) return PCRE_NEWLINE_LF
;
2846 if (strncmpic(p
, (pcre_uint8
*)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF
;
2847 if (strncmpic(p
, (pcre_uint8
*)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF
;
2848 if (strncmpic(p
, (pcre_uint8
*)"any>", 4) == 0) return PCRE_NEWLINE_ANY
;
2849 if (strncmpic(p
, (pcre_uint8
*)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF
;
2850 if (strncmpic(p
, (pcre_uint8
*)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE
;
2851 fprintf(f
, "Unknown newline type at: <%s\n", p
);
2857 /*************************************************
2859 *************************************************/
2864 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
2865 printf("Input and output default to stdin and stdout.\n");
2866 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
2867 printf("If input is a terminal, readline() is used to read from it.\n");
2869 printf("This version of pcretest is not linked with readline().\n");
2871 printf("\nOptions:\n");
2872 #ifdef SUPPORT_PCRE16
2873 printf(" -16 use the 16-bit library\n");
2875 #ifdef SUPPORT_PCRE32
2876 printf(" -32 use the 32-bit library\n");
2878 printf(" -b show compiled code\n");
2879 printf(" -C show PCRE compile-time options and exit\n");
2880 printf(" -C arg show a specific compile-time option\n");
2881 printf(" and exit with its value. The arg can be:\n");
2882 printf(" linksize internal link size [2, 3, 4]\n");
2883 printf(" pcre8 8 bit library support enabled [0, 1]\n");
2884 printf(" pcre16 16 bit library support enabled [0, 1]\n");
2885 printf(" pcre32 32 bit library support enabled [0, 1]\n");
2886 printf(" utf Unicode Transformation Format supported [0, 1]\n");
2887 printf(" ucp Unicode Properties supported [0, 1]\n");
2888 printf(" jit Just-in-time compiler supported [0, 1]\n");
2889 printf(" newline Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n");
2890 printf(" -d debug: show compiled code and information (-b and -i)\n");
2892 printf(" -dfa force DFA matching for all subjects\n");
2894 printf(" -help show usage information\n");
2895 printf(" -i show information about compiled patterns\n"
2896 " -M find MATCH_LIMIT minimum for each subject\n"
2897 " -m output memory used information\n"
2898 " -o <n> set size of offsets vector to <n>\n");
2899 #if !defined NOPOSIX
2900 printf(" -p use POSIX interface\n");
2902 printf(" -q quiet: do not output PCRE version number at start\n");
2903 printf(" -S <n> set stack size to <n> megabytes\n");
2904 printf(" -s force each pattern to be studied at basic level\n"
2905 " -s+ force each pattern to be studied, using JIT if available\n"
2906 " -s++ ditto, verifying when JIT was actually used\n"
2907 " -s+n force each pattern to be studied, using JIT if available,\n"
2908 " where 1 <= n <= 7 selects JIT options\n"
2909 " -s++n ditto, verifying when JIT was actually used\n"
2910 " -t time compilation and execution\n");
2911 printf(" -t <n> time compilation and execution, repeating <n> times\n");
2912 printf(" -tm time execution (matching) only\n");
2913 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
2918 /*************************************************
2920 *************************************************/
2922 /* Read lines from named file or stdin and write to named file or stdout; lines
2923 consist of a regular expression, in delimiters and optionally followed by
2924 options, followed by a set of test data, terminated by an empty line. */
2926 int main(int argc
, char **argv
)
2928 FILE *infile
= stdin
;
2929 const char *version
;
2931 int study_options
= 0;
2932 int default_find_match_limit
= FALSE
;
2938 int force_study
= -1;
2939 int force_study_options
= 0;
2941 int size_offsets
= 45;
2942 int size_offsets_max
;
2943 int *offsets
= NULL
;
2946 int all_use_dfa
= 0;
2950 pcre_uint8
*dbuffer
= NULL
;
2951 size_t dbuffer_size
= 1u << 14;
2953 #if !defined NOPOSIX
2957 int *dfa_workspace
= NULL
;
2960 pcre_jit_stack
*jit_stack
= NULL
;
2962 /* These vectors store, end-to-end, a list of zero-terminated captured
2963 substring names, each list itself being terminated by an empty name. Assume
2964 that 1024 is plenty long enough for the few names we'll be testing. It is
2965 easiest to keep separate 8-, 16- and 32-bit versions, using the 32-bit version
2966 for the actual memory, to ensure alignment. */
2968 pcre_uint32 copynames
[1024];
2969 pcre_uint32 getnames
[1024];
2971 #ifdef SUPPORT_PCRE32
2972 pcre_uint32
*cn32ptr
;
2973 pcre_uint32
*gn32ptr
;
2976 #ifdef SUPPORT_PCRE16
2977 pcre_uint16
*copynames16
= (pcre_uint16
*)copynames
;
2978 pcre_uint16
*getnames16
= (pcre_uint16
*)getnames
;
2979 pcre_uint16
*cn16ptr
;
2980 pcre_uint16
*gn16ptr
;
2983 #ifdef SUPPORT_PCRE8
2984 pcre_uint8
*copynames8
= (pcre_uint8
*)copynames
;
2985 pcre_uint8
*getnames8
= (pcre_uint8
*)getnames
;
2990 /* Get buffers from malloc() so that valgrind will check their misuse when
2991 debugging. They grow automatically when very long lines are read. The 16-
2992 and 32-bit buffers (buffer16, buffer32) are obtained only if needed. */
2994 buffer
= (pcre_uint8
*)malloc(buffer_size
);
2995 pbuffer
= (pcre_uint8
*)malloc(buffer_size
);
2997 /* The outfile variable is static so that new_malloc can use it. */
3001 /* The following _setmode() stuff is some Windows magic that tells its runtime
3002 library to translate CRLF into a single LF character. At least, that's what
3003 I've been told: never having used Windows I take this all on trust. Originally
3004 it set 0x8000, but then I was advised that _O_BINARY was better. */
3006 #if defined(_WIN32) || defined(WIN32)
3007 _setmode( _fileno( stdout
), _O_BINARY
);
3010 /* Get the version number: both pcre_version() and pcre16_version() give the
3011 same answer. We just need to ensure that we call one that is available. */
3013 #if defined SUPPORT_PCRE8
3014 version
= pcre_version();
3015 #elif defined SUPPORT_PCRE16
3016 version
= pcre16_version();
3017 #elif defined SUPPORT_PCRE32
3018 version
= pcre32_version();
3023 while (argc
> 1 && argv
[op
][0] == '-')
3026 char *arg
= argv
[op
];
3028 if (strcmp(arg
, "-m") == 0) showstore
= 1;
3029 else if (strcmp(arg
, "-s") == 0) force_study
= 0;
3031 else if (strncmp(arg
, "-s+", 3) == 0)
3034 if (*arg
== '+') { arg
++; verify_jit
= TRUE
; }
3037 force_study_options
= jit_study_bits
[6];
3038 else if (*arg
>= '1' && *arg
<= '7')
3039 force_study_options
= jit_study_bits
[*arg
- '1'];
3042 else if (strcmp(arg
, "-8") == 0)
3044 #ifdef SUPPORT_PCRE8
3045 pcre_mode
= PCRE8_MODE
;
3047 printf("** This version of PCRE was built without 8-bit support\n");
3051 else if (strcmp(arg
, "-16") == 0)
3053 #ifdef SUPPORT_PCRE16
3054 pcre_mode
= PCRE16_MODE
;
3056 printf("** This version of PCRE was built without 16-bit support\n");
3060 else if (strcmp(arg
, "-32") == 0)
3062 #ifdef SUPPORT_PCRE32
3063 pcre_mode
= PCRE32_MODE
;
3065 printf("** This version of PCRE was built without 32-bit support\n");
3069 else if (strcmp(arg
, "-q") == 0) quiet
= 1;
3070 else if (strcmp(arg
, "-b") == 0) debug
= 1;
3071 else if (strcmp(arg
, "-i") == 0) showinfo
= 1;
3072 else if (strcmp(arg
, "-d") == 0) showinfo
= debug
= 1;
3073 else if (strcmp(arg
, "-M") == 0) default_find_match_limit
= TRUE
;
3075 else if (strcmp(arg
, "-dfa") == 0) all_use_dfa
= 1;
3077 else if (strcmp(arg
, "-o") == 0 && argc
> 2 &&
3078 ((size_offsets
= get_value((pcre_uint8
*)argv
[op
+1], &endptr
)),
3084 else if (strcmp(arg
, "-t") == 0 || strcmp(arg
, "-tm") == 0)
3086 int both
= arg
[2] == 0;
3088 if (argc
> 2 && (temp
= get_value((pcre_uint8
*)argv
[op
+1], &endptr
),
3095 else timeitm
= LOOPREPEAT
;
3096 if (both
) timeit
= timeitm
;
3098 else if (strcmp(arg
, "-S") == 0 && argc
> 2 &&
3099 ((stack_size
= get_value((pcre_uint8
*)argv
[op
+1], &endptr
)),
3102 #if defined(_WIN32) || defined(WIN32) || defined(__minix) || defined(NATIVE_ZOS)
3103 printf("PCRE: -S not supported on this OS\n");
3108 getrlimit(RLIMIT_STACK
, &rlim
);
3109 rlim
.rlim_cur
= stack_size
* 1024 * 1024;
3110 rc
= setrlimit(RLIMIT_STACK
, &rlim
);
3113 printf("PCRE: setrlimit() failed with error %d\n", rc
);
3120 #if !defined NOPOSIX
3121 else if (strcmp(arg
, "-p") == 0) posix
= 1;
3123 else if (strcmp(arg
, "-C") == 0)
3126 unsigned long int lrc
;
3130 if (strcmp(argv
[op
+ 1], "linksize") == 0)
3132 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE
, &rc
);
3136 else if (strcmp(argv
[op
+ 1], "pcre8") == 0)
3138 #ifdef SUPPORT_PCRE8
3147 else if (strcmp(argv
[op
+ 1], "pcre16") == 0)
3149 #ifdef SUPPORT_PCRE16
3158 else if (strcmp(argv
[op
+ 1], "pcre32") == 0)
3160 #ifdef SUPPORT_PCRE32
3169 if (strcmp(argv
[op
+ 1], "utf") == 0)
3171 #ifdef SUPPORT_PCRE8
3172 if (pcre_mode
== PCRE8_MODE
)
3173 (void)pcre_config(PCRE_CONFIG_UTF8
, &rc
);
3175 #ifdef SUPPORT_PCRE16
3176 if (pcre_mode
== PCRE16_MODE
)
3177 (void)pcre16_config(PCRE_CONFIG_UTF16
, &rc
);
3179 #ifdef SUPPORT_PCRE32
3180 if (pcre_mode
== PCRE32_MODE
)
3181 (void)pcre32_config(PCRE_CONFIG_UTF32
, &rc
);
3187 else if (strcmp(argv
[op
+ 1], "ucp") == 0)
3189 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES
, &rc
);
3193 else if (strcmp(argv
[op
+ 1], "jit") == 0)
3195 (void)PCRE_CONFIG(PCRE_CONFIG_JIT
, &rc
);
3199 else if (strcmp(argv
[op
+ 1], "newline") == 0)
3201 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE
, &rc
);
3202 print_newline_config(rc
, TRUE
);
3204 else if (strcmp(argv
[op
+ 1], "ebcdic") == 0)
3213 else if (strcmp(argv
[op
+ 1], "ebcdic-nl") == 0)
3216 printf("0x%02x\n", CHAR_LF
);
3223 printf("Unknown -C option: %s\n", argv
[op
+ 1]);
3228 /* No argument for -C: output all configuration information. */
3230 printf("PCRE version %s\n", version
);
3231 printf("Compiled with\n");
3234 printf(" EBCDIC code support: LF is 0x%02x\n", CHAR_LF
);
3237 /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
3238 are set, either both UTFs are supported or both are not supported. */
3240 #ifdef SUPPORT_PCRE8
3241 printf(" 8-bit support\n");
3242 (void)pcre_config(PCRE_CONFIG_UTF8
, &rc
);
3243 printf (" %sUTF-8 support\n", rc
? "" : "No ");
3245 #ifdef SUPPORT_PCRE16
3246 printf(" 16-bit support\n");
3247 (void)pcre16_config(PCRE_CONFIG_UTF16
, &rc
);
3248 printf (" %sUTF-16 support\n", rc
? "" : "No ");
3250 #ifdef SUPPORT_PCRE32
3251 printf(" 32-bit support\n");
3252 (void)pcre32_config(PCRE_CONFIG_UTF32
, &rc
);
3253 printf (" %sUTF-32 support\n", rc
? "" : "No ");
3256 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES
, &rc
);
3257 printf(" %sUnicode properties support\n", rc
? "" : "No ");
3258 (void)PCRE_CONFIG(PCRE_CONFIG_JIT
, &rc
);
3262 (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET
, (void *)(&arch
));
3263 printf(" Just-in-time compiler support: %s\n", arch
);
3266 printf(" No just-in-time compiler support\n");
3267 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE
, &rc
);
3268 print_newline_config(rc
, FALSE
);
3269 (void)PCRE_CONFIG(PCRE_CONFIG_BSR
, &rc
);
3270 printf(" \\R matches %s\n", rc
? "CR, LF, or CRLF only" :
3271 "all Unicode newlines");
3272 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE
, &rc
);
3273 printf(" Internal link size = %d\n", rc
);
3274 (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD
, &rc
);
3275 printf(" POSIX malloc threshold = %d\n", rc
);
3276 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT
, &lrc
);
3277 printf(" Default match limit = %ld\n", lrc
);
3278 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION
, &lrc
);
3279 printf(" Default recursion depth limit = %ld\n", lrc
);
3280 (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE
, &rc
);
3281 printf(" Match recursion uses %s", rc
? "stack" : "heap");
3284 PCRE_EXEC(stack_size
, NULL
, NULL
, NULL
, -999, -999, 0, NULL
, 0);
3285 printf(": %sframe size = %d bytes", rc
? "approximate " : "", -stack_size
);
3290 else if (strcmp(arg
, "-help") == 0 ||
3291 strcmp(arg
, "--help") == 0)
3299 printf("** Unknown or malformed option %s\n", arg
);
3308 /* Get the store for the offsets vector, and remember what it was */
3310 size_offsets_max
= size_offsets
;
3311 offsets
= (int *)malloc(size_offsets_max
* sizeof(int));
3312 if (offsets
== NULL
)
3314 printf("** Failed to get %d bytes of memory for offsets vector\n",
3315 (int)(size_offsets_max
* sizeof(int)));
3320 /* Sort out the input and output files */
3324 infile
= fopen(argv
[op
], INPUT_MODE
);
3327 printf("** Failed to open %s\n", argv
[op
]);
3335 outfile
= fopen(argv
[op
+1], OUTPUT_MODE
);
3336 if (outfile
== NULL
)
3338 printf("** Failed to open %s\n", argv
[op
+1]);
3344 /* Set alternative malloc function */
3346 #ifdef SUPPORT_PCRE8
3347 pcre_malloc
= new_malloc
;
3348 pcre_free
= new_free
;
3349 pcre_stack_malloc
= stack_malloc
;
3350 pcre_stack_free
= stack_free
;
3353 #ifdef SUPPORT_PCRE16
3354 pcre16_malloc
= new_malloc
;
3355 pcre16_free
= new_free
;
3356 pcre16_stack_malloc
= stack_malloc
;
3357 pcre16_stack_free
= stack_free
;
3360 #ifdef SUPPORT_PCRE32
3361 pcre32_malloc
= new_malloc
;
3362 pcre32_free
= new_free
;
3363 pcre32_stack_malloc
= stack_malloc
;
3364 pcre32_stack_free
= stack_free
;
3367 /* Heading line unless quiet, then prompt for first regex if stdin */
3369 if (!quiet
) fprintf(outfile
, "PCRE version %s\n\n", version
);
3376 pcre_extra
*extra
= NULL
;
3378 #if !defined NOPOSIX /* There are still compilers that require no indent */
3384 pcre_uint8
*markptr
;
3385 pcre_uint8
*p
, *pp
, *ppp
;
3386 pcre_uint8
*to_file
= NULL
;
3387 const pcre_uint8
*tables
= NULL
;
3388 unsigned long int get_options
;
3389 unsigned long int true_size
, true_study_size
= 0;
3390 size_t size
, regex_gotten_store
;
3394 int no_force_study
= 0;
3395 int do_debug
= debug
;
3398 int do_showinfo
= showinfo
;
3399 int do_showrest
= 0;
3400 int do_showcaprest
= 0;
3402 int erroroffset
, len
, delimiter
, poffset
;
3405 int dfa_matched
= 0;
3411 if (extend_inputline(infile
, buffer
, " re> ") == NULL
) break;
3412 if (infile
!= stdin
) fprintf(outfile
, "%s", (char *)buffer
);
3416 while (isspace(*p
)) p
++;
3417 if (*p
== 0) continue;
3419 /* See if the pattern is to be loaded pre-compiled from a file. */
3421 if (*p
== '<' && strchr((char *)(p
+1), '<') == NULL
)
3435 pp
= p
+ (int)strlen((char *)p
);
3436 while (isspace(pp
[-1])) pp
--;
3439 f
= fopen((char *)p
, "rb");
3442 fprintf(outfile
, "Failed to open %s: %s\n", p
, strerror(errno
));
3446 first_gotten_store
= 0;
3447 if (fread(sbuf
, 1, 8, f
) != 8) goto FAIL_READ
;
3450 (sbuf
[0] << 24) | (sbuf
[1] << 16) | (sbuf
[2] << 8) | sbuf
[3];
3452 (sbuf
[4] << 24) | (sbuf
[5] << 16) | (sbuf
[6] << 8) | sbuf
[7];
3454 re
= (pcre
*)new_malloc(true_size
);
3457 printf("** Failed to get %d bytes of memory for pcre object\n",
3462 regex_gotten_store
= first_gotten_store
;
3464 if (fread(re
, 1, true_size
, f
) != true_size
) goto FAIL_READ
;
3466 magic
= REAL_PCRE_MAGIC(re
);
3467 if (magic
!= MAGIC_NUMBER
)
3469 if (swap_uint32(magic
) == MAGIC_NUMBER
)
3475 fprintf(outfile
, "Data in %s is not a compiled PCRE regex\n", p
);
3482 /* We hide the byte-invert info for little and big endian tests. */
3483 fprintf(outfile
, "Compiled pattern%s loaded from %s\n",
3484 do_flip
&& (p
[-1] == '<') ? " (byte-inverted)" : "", p
);
3486 /* Now see if there is any following study data. */
3488 if (true_study_size
!= 0)
3490 pcre_study_data
*psd
;
3492 extra
= (pcre_extra
*)new_malloc(sizeof(pcre_extra
) + true_study_size
);
3493 extra
->flags
= PCRE_EXTRA_STUDY_DATA
;
3495 psd
= (pcre_study_data
*)(((char *)extra
) + sizeof(pcre_extra
));
3496 extra
->study_data
= psd
;
3498 if (fread(psd
, 1, true_study_size
, f
) != true_study_size
)
3501 fprintf(outfile
, "Failed to read data from %s\n", p
);
3504 PCRE_FREE_STUDY(extra
);
3510 fprintf(outfile
, "Study data loaded from %s\n", p
);
3511 do_study
= 1; /* To get the data output if requested */
3513 else fprintf(outfile
, "No study data\n");
3515 /* Flip the necessary bytes. */
3519 PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc
, re
, extra
, NULL
);
3520 if (rc
== PCRE_ERROR_BADMODE
)
3522 pcre_uint16 flags_in_host_byte_order
;
3523 if (REAL_PCRE_MAGIC(re
) == MAGIC_NUMBER
)
3524 flags_in_host_byte_order
= REAL_PCRE_FLAGS(re
);
3526 flags_in_host_byte_order
= swap_uint16(REAL_PCRE_FLAGS(re
));
3527 /* Simulate the result of the function call below. */
3528 fprintf(outfile
, "Error %d from pcre%s_fullinfo(%d)\n", rc
,
3529 pcre_mode
== PCRE32_MODE
? "32" : pcre_mode
== PCRE16_MODE
? "16" : "",
3531 fprintf(outfile
, "Running in %d-bit mode but pattern was compiled in "
3532 "%d-bit mode\n", 8 * CHAR_SIZE
, 8 * (flags_in_host_byte_order
& PCRE_MODE_MASK
));
3539 /* Need to know if UTF-8 for printing data strings. */
3541 if (new_info(re
, NULL
, PCRE_INFO_OPTIONS
, &get_options
) < 0)
3547 use_utf
= (get_options
& PCRE_UTF8
) != 0;
3553 /* In-line pattern (the usual case). Get the delimiter and seek the end of
3554 the pattern; if it isn't complete, read more. */
3558 if (isalnum(delimiter
) || delimiter
== '\\')
3560 fprintf(outfile
, "** Delimiter must not be alphanumeric or \\\n");
3565 poffset
= (int)(p
- buffer
);
3571 if (*pp
== '\\' && pp
[1] != 0) pp
++;
3572 else if (*pp
== delimiter
) break;
3575 if (*pp
!= 0) break;
3576 if ((pp
= extend_inputline(infile
, pp
, " > ")) == NULL
)
3578 fprintf(outfile
, "** Unexpected EOF\n");
3582 if (infile
!= stdin
) fprintf(outfile
, "%s", (char *)pp
);
3585 /* The buffer may have moved while being extended; reset the start of data
3586 pointer to the correct relative point in the buffer. */
3588 p
= buffer
+ poffset
;
3590 /* If the first character after the delimiter is backslash, make
3591 the pattern end with backslash. This is purely to provide a way
3592 of testing for the error message when a pattern ends with backslash. */
3594 if (pp
[1] == '\\') *pp
++ = '\\';
3596 /* Terminate the pattern at the delimiter, and save a copy of the pattern
3600 strcpy((char *)pbuffer
, (char *)p
);
3602 /* Look for options after final delimiter */
3605 study_options
= force_study_options
;
3606 log_store
= showstore
; /* default from command line */
3612 case 'f': options
|= PCRE_FIRSTLINE
; break;
3613 case 'g': do_g
= 1; break;
3614 case 'i': options
|= PCRE_CASELESS
; break;
3615 case 'm': options
|= PCRE_MULTILINE
; break;
3616 case 's': options
|= PCRE_DOTALL
; break;
3617 case 'x': options
|= PCRE_EXTENDED
; break;
3620 if (do_showrest
) do_showcaprest
= 1; else do_showrest
= 1;
3623 case '=': do_allcaps
= 1; break;
3624 case 'A': options
|= PCRE_ANCHORED
; break;
3625 case 'B': do_debug
= 1; break;
3626 case 'C': options
|= PCRE_AUTO_CALLOUT
; break;
3627 case 'D': do_debug
= do_showinfo
= 1; break;
3628 case 'E': options
|= PCRE_DOLLAR_ENDONLY
; break;
3629 case 'F': do_flip
= 1; break;
3630 case 'G': do_G
= 1; break;
3631 case 'I': do_showinfo
= 1; break;
3632 case 'J': options
|= PCRE_DUPNAMES
; break;
3633 case 'K': do_mark
= 1; break;
3634 case 'M': log_store
= 1; break;
3635 case 'N': options
|= PCRE_NO_AUTO_CAPTURE
; break;
3637 #if !defined NOPOSIX
3638 case 'P': do_posix
= 1; break;
3653 study_options
|= PCRE_STUDY_EXTRA_NEEDED
;
3662 if (*pp
>= '1' && *pp
<= '7')
3663 study_options
|= jit_study_bits
[*pp
++ - '1'];
3665 study_options
|= jit_study_bits
[6];
3669 study_options
&= ~PCRE_STUDY_ALLJIT
;
3680 case 'U': options
|= PCRE_UNGREEDY
; break;
3681 case 'W': options
|= PCRE_UCP
; break;
3682 case 'X': options
|= PCRE_EXTRA
; break;
3683 case 'Y': options
|= PCRE_NO_START_OPTIMISE
; break;
3684 case 'Z': debug_lengths
= 0; break;
3685 case '8': options
|= PCRE_UTF8
; use_utf
= 1; break;
3686 case '?': options
|= PCRE_NO_UTF8_CHECK
; break;
3691 case '0': tables
= tables0
; break;
3692 case '1': tables
= tables1
; break;
3698 fprintf(outfile
, "** Missing table number after /T\n");
3702 fprintf(outfile
, "** Bad table number \"%c\" after /T\n", pp
[-1]);
3709 /* The '\r' test here is so that it works on Windows. */
3710 /* The '0' test is just in case this is an unterminated line. */
3711 while (*ppp
!= 0 && *ppp
!= '\n' && *ppp
!= '\r' && *ppp
!= ' ') ppp
++;
3713 if (setlocale(LC_CTYPE
, (const char *)pp
) == NULL
)
3715 fprintf(outfile
, "** Failed to set locale \"%s\"\n", pp
);
3719 tables
= PCRE_MAKETABLES
;
3725 while (*pp
!= 0) pp
++;
3726 while (isspace(pp
[-1])) pp
--;
3732 if (strncmpic(pp
, (pcre_uint8
*)"JS>", 3) == 0)
3734 options
|= PCRE_JAVASCRIPT_COMPAT
;
3739 int x
= check_newline(pp
, outfile
);
3740 if (x
== 0) goto SKIP_DATA
;
3742 while (*pp
++ != '>');
3747 case '\r': /* So that it works in Windows */
3753 fprintf(outfile
, "** Unknown option '%c'\n", pp
[-1]);
3758 /* Handle compiling via the POSIX interface, which doesn't support the
3759 timing, showing, or debugging options, nor the ability to pass over
3760 local character tables. Neither does it have 16-bit support. */
3762 #if !defined NOPOSIX
3763 if (posix
|| do_posix
)
3768 if ((options
& PCRE_CASELESS
) != 0) cflags
|= REG_ICASE
;
3769 if ((options
& PCRE_MULTILINE
) != 0) cflags
|= REG_NEWLINE
;
3770 if ((options
& PCRE_DOTALL
) != 0) cflags
|= REG_DOTALL
;
3771 if ((options
& PCRE_NO_AUTO_CAPTURE
) != 0) cflags
|= REG_NOSUB
;
3772 if ((options
& PCRE_UTF8
) != 0) cflags
|= REG_UTF8
;
3773 if ((options
& PCRE_UCP
) != 0) cflags
|= REG_UCP
;
3774 if ((options
& PCRE_UNGREEDY
) != 0) cflags
|= REG_UNGREEDY
;
3776 first_gotten_store
= 0;
3777 rc
= regcomp(&preg
, (char *)p
, cflags
);
3779 /* Compilation failed; go back for another re, skipping to blank line
3780 if non-interactive. */
3784 (void)regerror(rc
, &preg
, (char *)buffer
, buffer_size
);
3785 fprintf(outfile
, "Failed: POSIX code %d: %s\n", rc
, buffer
);
3790 /* Handle compiling via the native interface */
3793 #endif /* !defined NOPOSIX */
3796 /* In 16- or 32-bit mode, convert the input. */
3798 #ifdef SUPPORT_PCRE16
3799 if (pcre_mode
== PCRE16_MODE
)
3801 switch(to16(FALSE
, p
, options
& PCRE_UTF8
, (int)strlen((char *)p
)))
3804 fprintf(outfile
, "**Failed: invalid UTF-8 string cannot be "
3805 "converted to UTF-16\n");
3809 fprintf(outfile
, "**Failed: character value greater than 0x10ffff "
3810 "cannot be converted to UTF-16\n");
3813 case -3: /* "Impossible error" when to16 is called arg1 FALSE */
3814 fprintf(outfile
, "**Failed: character value greater than 0xffff "
3815 "cannot be converted to 16-bit in non-UTF mode\n");
3821 p
= (pcre_uint8
*)buffer16
;
3825 #ifdef SUPPORT_PCRE32
3826 if (pcre_mode
== PCRE32_MODE
)
3828 switch(to32(FALSE
, p
, options
& PCRE_UTF32
, (int)strlen((char *)p
)))
3831 fprintf(outfile
, "**Failed: invalid UTF-8 string cannot be "
3832 "converted to UTF-32\n");
3836 fprintf(outfile
, "**Failed: character value greater than 0x10ffff "
3837 "cannot be converted to UTF-32\n");
3841 fprintf(outfile
, "**Failed: character value is ill-formed UTF-32\n");
3847 p
= (pcre_uint8
*)buffer32
;
3851 /* Compile many times when timing */
3857 clock_t start_time
= clock();
3858 for (i
= 0; i
< timeit
; i
++)
3860 PCRE_COMPILE(re
, p
, options
, &error
, &erroroffset
, tables
);
3861 if (re
!= NULL
) free(re
);
3863 time_taken
= clock() - start_time
;
3864 fprintf(outfile
, "Compile time %.4f milliseconds\n",
3865 (((double)time_taken
* 1000.0) / (double)timeit
) /
3866 (double)CLOCKS_PER_SEC
);
3869 first_gotten_store
= 0;
3870 PCRE_COMPILE(re
, p
, options
, &error
, &erroroffset
, tables
);
3872 /* Compilation failed; go back for another re, skipping to blank line
3873 if non-interactive. */
3877 fprintf(outfile
, "Failed: %s at offset %d\n", error
, erroroffset
);
3879 if (infile
!= stdin
)
3883 if (extend_inputline(infile
, buffer
, NULL
) == NULL
)
3888 len
= (int)strlen((char *)buffer
);
3889 while (len
> 0 && isspace(buffer
[len
-1])) len
--;
3890 if (len
== 0) break;
3892 fprintf(outfile
, "\n");
3897 /* Compilation succeeded. It is now possible to set the UTF-8 option from
3898 within the regex; check for this so that we know how to process the data
3901 if (new_info(re
, NULL
, PCRE_INFO_OPTIONS
, &get_options
) < 0)
3903 if ((get_options
& PCRE_UTF8
) != 0) use_utf
= 1;
3905 /* Extract the size for possible writing before possibly flipping it,
3906 and remember the store that was got. */
3908 true_size
= REAL_PCRE_SIZE(re
);
3909 regex_gotten_store
= first_gotten_store
;
3911 /* Output code size information if requested */
3915 int name_count
, name_entry_size
, real_pcre_size
;
3917 new_info(re
, NULL
, PCRE_INFO_NAMECOUNT
, &name_count
);
3918 new_info(re
, NULL
, PCRE_INFO_NAMEENTRYSIZE
, &name_entry_size
);
3920 #ifdef SUPPORT_PCRE8
3921 if (REAL_PCRE_FLAGS(re
) & PCRE_MODE8
)
3922 real_pcre_size
= sizeof(real_pcre
);
3924 #ifdef SUPPORT_PCRE16
3925 if (REAL_PCRE_FLAGS(re
) & PCRE_MODE16
)
3926 real_pcre_size
= sizeof(real_pcre16
);
3928 #ifdef SUPPORT_PCRE32
3929 if (REAL_PCRE_FLAGS(re
) & PCRE_MODE32
)
3930 real_pcre_size
= sizeof(real_pcre32
);
3932 fprintf(outfile
, "Memory allocation (code space): %d\n",
3933 (int)(first_gotten_store
- real_pcre_size
- name_count
* name_entry_size
));
3936 /* If -s or /S was present, study the regex to generate additional info to
3937 help with the matching, unless the pattern has the SS option, which
3938 suppresses the effect of /S (used for a few test patterns where studying is
3941 if (do_study
|| (force_study
>= 0 && !no_force_study
))
3947 clock_t start_time
= clock();
3948 for (i
= 0; i
< timeit
; i
++)
3950 PCRE_STUDY(extra
, re
, study_options
, &error
);
3952 time_taken
= clock() - start_time
;
3955 PCRE_FREE_STUDY(extra
);
3957 fprintf(outfile
, " Study time %.4f milliseconds\n",
3958 (((double)time_taken
* 1000.0) / (double)timeit
) /
3959 (double)CLOCKS_PER_SEC
);
3961 PCRE_STUDY(extra
, re
, study_options
, &error
);
3963 fprintf(outfile
, "Failed to study: %s\n", error
);
3964 else if (extra
!= NULL
)
3966 true_study_size
= ((pcre_study_data
*)(extra
->study_data
))->size
;
3970 if (new_info(re
, extra
, PCRE_INFO_JITSIZE
, &jitsize
) == 0 &&
3972 fprintf(outfile
, "Memory allocation (JIT code): %d\n", (int)jitsize
);
3977 /* If /K was present, we set up for handling MARK data. */
3983 extra
= (pcre_extra
*)malloc(sizeof(pcre_extra
));
3986 extra
->mark
= &markptr
;
3987 extra
->flags
|= PCRE_EXTRA_MARK
;
3990 /* Extract and display information from the compiled data if required. */
3996 fprintf(outfile
, "------------------------------------------------------------------\n");
3997 PCRE_PRINTINT(re
, outfile
, debug_lengths
);
4000 /* We already have the options in get_options (see above) */
4004 unsigned long int all_options
;
4005 pcre_uint32 first_char
, need_char
;
4006 int count
, backrefmax
, first_char_set
, need_char_set
, okpartial
, jchanged
,
4007 hascrorlf
, maxlookbehind
;
4008 int nameentrysize
, namecount
;
4009 const pcre_uint8
*nametable
;
4011 if (new_info(re
, NULL
, PCRE_INFO_SIZE
, &size
) +
4012 new_info(re
, NULL
, PCRE_INFO_CAPTURECOUNT
, &count
) +
4013 new_info(re
, NULL
, PCRE_INFO_BACKREFMAX
, &backrefmax
) +
4014 new_info(re
, NULL
, PCRE_INFO_FIRSTCHARACTER
, &first_char
) +
4015 new_info(re
, NULL
, PCRE_INFO_FIRSTCHARACTERFLAGS
, &first_char_set
) +
4016 new_info(re
, NULL
, PCRE_INFO_REQUIREDCHAR
, &need_char
) +
4017 new_info(re
, NULL
, PCRE_INFO_REQUIREDCHARFLAGS
, &need_char_set
) +
4018 new_info(re
, NULL
, PCRE_INFO_NAMEENTRYSIZE
, &nameentrysize
) +
4019 new_info(re
, NULL
, PCRE_INFO_NAMECOUNT
, &namecount
) +
4020 new_info(re
, NULL
, PCRE_INFO_NAMETABLE
, (void *)&nametable
) +
4021 new_info(re
, NULL
, PCRE_INFO_OKPARTIAL
, &okpartial
) +
4022 new_info(re
, NULL
, PCRE_INFO_JCHANGED
, &jchanged
) +
4023 new_info(re
, NULL
, PCRE_INFO_HASCRORLF
, &hascrorlf
) +
4024 new_info(re
, NULL
, PCRE_INFO_MAXLOOKBEHIND
, &maxlookbehind
)
4028 if (size
!= regex_gotten_store
) fprintf(outfile
,
4029 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
4030 (int)size
, (int)regex_gotten_store
);
4032 fprintf(outfile
, "Capturing subpattern count = %d\n", count
);
4034 fprintf(outfile
, "Max back reference = %d\n", backrefmax
);
4038 fprintf(outfile
, "Named capturing subpatterns:\n");
4039 while (namecount
-- > 0)
4041 int imm2_size
= pcre_mode
== PCRE8_MODE
? 2 : 1;
4042 int length
= (int)STRLEN(nametable
+ imm2_size
);
4043 fprintf(outfile
, " ");
4044 PCHARSV(nametable
, imm2_size
, length
, outfile
);
4045 while (length
++ < nameentrysize
- imm2_size
) putc(' ', outfile
);
4046 #ifdef SUPPORT_PCRE32
4047 if (pcre_mode
== PCRE32_MODE
)
4048 fprintf(outfile
, "%3d\n", (int)(((PCRE_SPTR32
)nametable
)[0]));
4050 #ifdef SUPPORT_PCRE16
4051 if (pcre_mode
== PCRE16_MODE
)
4052 fprintf(outfile
, "%3d\n", (int)(((PCRE_SPTR16
)nametable
)[0]));
4054 #ifdef SUPPORT_PCRE8
4055 if (pcre_mode
== PCRE8_MODE
)
4056 fprintf(outfile
, "%3d\n", ((int)nametable
[0] << 8) | (int)nametable
[1]);
4058 nametable
+= nameentrysize
* CHAR_SIZE
;
4062 if (!okpartial
) fprintf(outfile
, "Partial matching not supported\n");
4063 if (hascrorlf
) fprintf(outfile
, "Contains explicit CR or LF match\n");
4065 all_options
= REAL_PCRE_OPTIONS(re
);
4066 if (do_flip
) all_options
= swap_uint32(all_options
);
4068 if (get_options
== 0) fprintf(outfile
, "No options\n");
4069 else fprintf(outfile
, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
4070 ((get_options
& PCRE_ANCHORED
) != 0)? " anchored" : "",
4071 ((get_options
& PCRE_CASELESS
) != 0)? " caseless" : "",
4072 ((get_options
& PCRE_EXTENDED
) != 0)? " extended" : "",
4073 ((get_options
& PCRE_MULTILINE
) != 0)? " multiline" : "",
4074 ((get_options
& PCRE_FIRSTLINE
) != 0)? " firstline" : "",
4075 ((get_options
& PCRE_DOTALL
) != 0)? " dotall" : "",
4076 ((get_options
& PCRE_BSR_ANYCRLF
) != 0)? " bsr_anycrlf" : "",
4077 ((get_options
& PCRE_BSR_UNICODE
) != 0)? " bsr_unicode" : "",
4078 ((get_options
& PCRE_DOLLAR_ENDONLY
) != 0)? " dollar_endonly" : "",
4079 ((get_options
& PCRE_EXTRA
) != 0)? " extra" : "",
4080 ((get_options
& PCRE_UNGREEDY
) != 0)? " ungreedy" : "",
4081 ((get_options
& PCRE_NO_AUTO_CAPTURE
) != 0)? " no_auto_capture" : "",
4082 ((get_options
& PCRE_UTF8
) != 0)? " utf" : "",
4083 ((get_options
& PCRE_UCP
) != 0)? " ucp" : "",
4084 ((get_options
& PCRE_NO_UTF8_CHECK
) != 0)? " no_utf_check" : "",
4085 ((get_options
& PCRE_NO_START_OPTIMIZE
) != 0)? " no_start_optimize" : "",
4086 ((get_options
& PCRE_DUPNAMES
) != 0)? " dupnames" : "");
4088 if (jchanged
) fprintf(outfile
, "Duplicate name status changes\n");
4090 switch (get_options
& PCRE_NEWLINE_BITS
)
4092 case PCRE_NEWLINE_CR
:
4093 fprintf(outfile
, "Forced newline sequence: CR\n");
4096 case PCRE_NEWLINE_LF
:
4097 fprintf(outfile
, "Forced newline sequence: LF\n");
4100 case PCRE_NEWLINE_CRLF
:
4101 fprintf(outfile
, "Forced newline sequence: CRLF\n");
4104 case PCRE_NEWLINE_ANYCRLF
:
4105 fprintf(outfile
, "Forced newline sequence: ANYCRLF\n");
4108 case PCRE_NEWLINE_ANY
:
4109 fprintf(outfile
, "Forced newline sequence: ANY\n");
4116 if (first_char_set
== 2)
4118 fprintf(outfile
, "First char at start or follows newline\n");
4120 else if (first_char_set
== 1)
4122 const char *caseless
=
4123 ((REAL_PCRE_FLAGS(re
) & PCRE_FCH_CASELESS
) == 0)?
4126 if (PRINTOK(first_char
))
4127 fprintf(outfile
, "First char = \'%c\'%s\n", first_char
, caseless
);
4130 fprintf(outfile
, "First char = ");
4131 pchar(first_char
, outfile
);
4132 fprintf(outfile
, "%s\n", caseless
);
4137 fprintf(outfile
, "No first char\n");
4140 if (need_char_set
== 0)
4142 fprintf(outfile
, "No need char\n");
4146 const char *caseless
=
4147 ((REAL_PCRE_FLAGS(re
) & PCRE_RCH_CASELESS
) == 0)?
4150 if (PRINTOK(need_char
))
4151 fprintf(outfile
, "Need char = \'%c\'%s\n", need_char
, caseless
);
4154 fprintf(outfile
, "Need char = ");
4155 pchar(need_char
, outfile
);
4156 fprintf(outfile
, "%s\n", caseless
);
4160 if (maxlookbehind
> 0)
4161 fprintf(outfile
, "Max lookbehind = %d\n", maxlookbehind
);
4163 /* Don't output study size; at present it is in any case a fixed
4164 value, but it varies, depending on the computer architecture, and
4165 so messes up the test suite. (And with the /F option, it might be
4166 flipped.) If study was forced by an external -s, don't show this
4167 information unless -i or -d was also present. This means that, except
4168 when auto-callouts are involved, the output from runs with and without
4169 -s should be identical. */
4171 if (do_study
|| (force_study
>= 0 && showinfo
&& !no_force_study
))
4174 fprintf(outfile
, "Study returned NULL\n");
4177 pcre_uint8
*start_bits
= NULL
;
4180 if (new_info(re
, extra
, PCRE_INFO_MINLENGTH
, &minlength
) == 0)
4181 fprintf(outfile
, "Subject length lower bound = %d\n", minlength
);
4183 if (new_info(re
, extra
, PCRE_INFO_FIRSTTABLE
, &start_bits
) == 0)
4185 if (start_bits
== NULL
)
4186 fprintf(outfile
, "No set of starting bytes\n");
4191 fprintf(outfile
, "Starting byte set: ");
4192 for (i
= 0; i
< 256; i
++)
4194 if ((start_bits
[i
/8] & (1<<(i
&7))) != 0)
4198 fprintf(outfile
, "\n ");
4201 if (PRINTOK(i
) && i
!= ' ')
4203 fprintf(outfile
, "%c ", i
);
4208 fprintf(outfile
, "\\x%02x ", i
);
4213 fprintf(outfile
, "\n");
4218 /* Show this only if the JIT was set by /S, not by -s. */
4220 if ((study_options
& PCRE_STUDY_ALLJIT
) != 0 &&
4221 (force_study_options
& PCRE_STUDY_ALLJIT
) == 0)
4224 if (new_info(re
, extra
, PCRE_INFO_JIT
, &jit
) == 0)
4227 fprintf(outfile
, "JIT study was successful\n");
4230 fprintf(outfile
, "JIT study was not successful\n");
4232 fprintf(outfile
, "JIT support is not available in this version of PCRE\n");
4239 /* If the '>' option was present, we write out the regex to a file, and
4240 that is all. The first 8 bytes of the file are the regex length and then
4241 the study length, in big-endian order. */
4243 if (to_file
!= NULL
)
4245 FILE *f
= fopen((char *)to_file
, "wb");
4248 fprintf(outfile
, "Unable to open %s: %s\n", to_file
, strerror(errno
));
4254 if (do_flip
) regexflip(re
, extra
);
4255 sbuf
[0] = (pcre_uint8
)((true_size
>> 24) & 255);
4256 sbuf
[1] = (pcre_uint8
)((true_size
>> 16) & 255);
4257 sbuf
[2] = (pcre_uint8
)((true_size
>> 8) & 255);
4258 sbuf
[3] = (pcre_uint8
)((true_size
) & 255);
4259 sbuf
[4] = (pcre_uint8
)((true_study_size
>> 24) & 255);
4260 sbuf
[5] = (pcre_uint8
)((true_study_size
>> 16) & 255);
4261 sbuf
[6] = (pcre_uint8
)((true_study_size
>> 8) & 255);
4262 sbuf
[7] = (pcre_uint8
)((true_study_size
) & 255);
4264 if (fwrite(sbuf
, 1, 8, f
) < 8 ||
4265 fwrite(re
, 1, true_size
, f
) < true_size
)
4267 fprintf(outfile
, "Write error on %s: %s\n", to_file
, strerror(errno
));
4271 fprintf(outfile
, "Compiled pattern written to %s\n", to_file
);
4273 /* If there is study data, write it. */
4277 if (fwrite(extra
->study_data
, 1, true_study_size
, f
) <
4280 fprintf(outfile
, "Write error on %s: %s\n", to_file
,
4283 else fprintf(outfile
, "Study data written to %s\n", to_file
);
4292 PCRE_FREE_STUDY(extra
);
4296 new_free((void *)tables
);
4297 setlocale(LC_CTYPE
, "C");
4300 continue; /* With next regex */
4302 } /* End of non-POSIX compile */
4304 /* Read data lines and test them */
4308 #ifdef SUPPORT_PCRE8
4311 #ifdef SUPPORT_PCRE16
4314 #ifdef SUPPORT_PCRE32
4318 int *use_offsets
= offsets
;
4319 int use_size_offsets
= size_offsets
;
4320 int callout_data
= 0;
4321 int callout_data_set
= 0;
4324 int copystrings
= 0;
4325 int find_match_limit
= default_find_match_limit
;
4329 int start_offset
= 0;
4330 int start_offset_sign
= 1;
4337 #ifdef SUPPORT_PCRE32
4338 cn32ptr
= copynames
;
4341 #ifdef SUPPORT_PCRE16
4342 cn16ptr
= copynames16
;
4343 gn16ptr
= getnames16
;
4345 #ifdef SUPPORT_PCRE8
4346 cn8ptr
= copynames8
;
4350 SET_PCRE_CALLOUT(callout
);
4352 last_callout_mark
= NULL
;
4355 callout_fail_count
= 999999;
4356 callout_fail_id
= -1;
4360 if (extra
!= NULL
) extra
->flags
&=
4361 ~(PCRE_EXTRA_MATCH_LIMIT
|PCRE_EXTRA_MATCH_LIMIT_RECURSION
);
4366 if (extend_inputline(infile
, buffer
+ len
, "data> ") == NULL
)
4368 if (len
> 0) /* Reached EOF without hitting a newline */
4370 fprintf(outfile
, "\n");
4376 if (infile
!= stdin
) fprintf(outfile
, "%s", (char *)buffer
);
4377 len
= (int)strlen((char *)buffer
);
4378 if (buffer
[len
-1] == '\n') break;
4381 while (len
> 0 && isspace(buffer
[len
-1])) len
--;
4383 if (len
== 0) break;
4386 while (isspace(*p
)) p
++;
4389 /* Check that the data is well-formed UTF-8 if we're in UTF mode. To create
4390 invalid input to pcre_exec, you must use \x?? or \x{} sequences. */
4397 for (q
= p
; n
> 0 && *q
; q
+= n
) n
= utf82ord(q
, &cc
);
4400 fprintf(outfile
, "**Failed: invalid UTF-8 string cannot be used as input in UTF mode\n");
4406 #ifdef SUPPORT_VALGRIND
4407 /* Mark the dbuffer as addressable but undefined again. */
4408 if (dbuffer
!= NULL
)
4410 VALGRIND_MAKE_MEM_UNDEFINED(dbuffer
, dbuffer_size
* CHAR_SIZE
);
4414 /* Allocate a buffer to hold the data line. len+1 is an upper bound on
4415 the number of pcre_uchar units that will be needed. */
4416 if (dbuffer
== NULL
|| (size_t)len
>= dbuffer_size
)
4419 dbuffer
= (pcre_uint8
*)realloc(dbuffer
, dbuffer_size
* CHAR_SIZE
);
4420 if (dbuffer
== NULL
)
4422 fprintf(stderr
, "pcretest: malloc(%d) failed\n", (int)dbuffer_size
);
4427 #ifdef SUPPORT_PCRE8
4428 q8
= (pcre_uint8
*) dbuffer
;
4430 #ifdef SUPPORT_PCRE16
4431 q16
= (pcre_uint16
*) dbuffer
;
4433 #ifdef SUPPORT_PCRE32
4434 q32
= (pcre_uint32
*) dbuffer
;
4437 while ((c
= *p
++) != 0)
4442 /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
4443 In non-UTF mode, allow the value of the byte to fall through to later,
4444 where values greater than 127 are turned into UTF-8 when running in
4445 16-bit or 32-bit mode. */
4450 if (use_utf
&& HASUTF8EXTRALEN(c
)) { GETUTF8INC(c
, p
); }
4454 /* Handle backslash escapes */
4456 else switch ((c
= *p
++))
4458 case 'a': c
= 7; break;
4459 case 'b': c
= '\b'; break;
4460 case 'e': c
= 27; break;
4461 case 'f': c
= '\f'; break;
4462 case 'n': c
= '\n'; break;
4463 case 'r': c
= '\r'; break;
4464 case 't': c
= '\t'; break;
4465 case 'v': c
= '\v'; break;
4467 case '0': case '1': case '2': case '3':
4468 case '4': case '5': case '6': case '7':
4470 while (i
++ < 2 && isdigit(*p
) && *p
!= '8' && *p
!= '9')
4471 c
= c
* 8 + *p
++ - '0';
4480 /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
4481 when isxdigit() is a macro that refers to its argument more than
4482 once. This is banned by the C Standard, but apparently happens in at
4483 least one MacOS environment. */
4485 for (pt
++; isxdigit(*pt
); pt
++)
4488 fprintf(outfile
, "** Too many hex digits in \\x{...} item; "
4489 "using only the first eight.\n");
4490 else c
= c
* 16 + tolower(*pt
) - ((isdigit(*pt
))? '0' : 'a' - 10);
4497 /* Not correct form for \x{...}; fall through */
4500 /* \x without {} always defines just one byte in 8-bit mode. This
4501 allows UTF-8 characters to be constructed byte by byte, and also allows
4502 invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
4503 Otherwise, pass it down to later code so that it can be turned into
4504 UTF-8 when running in 16/32-bit mode. */
4507 while (i
++ < 2 && isxdigit(*p
))
4509 c
= c
* 16 + tolower(*p
) - ((isdigit(*p
))? '0' : 'a' - 10);
4512 #if !defined NOUTF && defined SUPPORT_PCRE8
4513 if (use_utf
&& (pcre_mode
== PCRE8_MODE
))
4521 case 0: /* \ followed by EOF allows for an empty line */
4528 start_offset_sign
= -1;
4531 while(isdigit(*p
)) start_offset
= start_offset
* 10 + *p
++ - '0';
4532 start_offset
*= start_offset_sign
;
4535 case 'A': /* Option setting */
4536 options
|= PCRE_ANCHORED
;
4540 options
|= PCRE_NOTBOL
;
4544 if (isdigit(*p
)) /* Set copy string */
4546 while(isdigit(*p
)) n
= n
* 10 + *p
++ - '0';
4547 copystrings
|= 1 << n
;
4549 else if (isalnum(*p
))
4551 READ_CAPTURE_NAME(p
, &cn8ptr
, &cn16ptr
, &cn32ptr
, re
);
4560 SET_PCRE_CALLOUT(NULL
);
4565 callout_fail_id
= 0;
4568 callout_fail_id
= callout_fail_id
* 10 + *p
++ - '0';
4569 callout_fail_count
= 0;
4574 callout_fail_count
= callout_fail_count
* 10 + *p
++ - '0';
4581 if (*(++p
) == '-') { sign
= -1; p
++; }
4583 callout_data
= callout_data
* 10 + *p
++ - '0';
4584 callout_data
*= sign
;
4585 callout_data_set
= 1;
4591 #if !defined NOPOSIX
4592 if (posix
|| do_posix
)
4593 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
4602 options
|= PCRE_DFA_SHORTEST
;
4609 while(isdigit(*p
)) n
= n
* 10 + *p
++ - '0';
4610 getstrings
|= 1 << n
;
4612 else if (isalnum(*p
))
4614 READ_CAPTURE_NAME(p
, &gn8ptr
, &gn16ptr
, &gn32ptr
, re
);
4619 while(isdigit(*p
)) n
= n
* 10 + *p
++ - '0';
4621 && (extra
->flags
& PCRE_EXTRA_EXECUTABLE_JIT
) != 0
4622 && extra
->executable_jit
!= NULL
)
4624 if (jit_stack
!= NULL
) { PCRE_JIT_STACK_FREE(jit_stack
); }
4625 jit_stack
= PCRE_JIT_STACK_ALLOC(1, n
* 1024);
4626 PCRE_ASSIGN_JIT_STACK(extra
, jit_callback
, jit_stack
);
4635 find_match_limit
= 1;
4639 if ((options
& PCRE_NOTEMPTY
) != 0)
4640 options
= (options
& ~PCRE_NOTEMPTY
) | PCRE_NOTEMPTY_ATSTART
;
4642 options
|= PCRE_NOTEMPTY
;
4646 while(isdigit(*p
)) n
= n
* 10 + *p
++ - '0';
4647 if (n
> size_offsets_max
)
4649 size_offsets_max
= n
;
4651 use_offsets
= offsets
= (int *)malloc(size_offsets_max
* sizeof(int));
4652 if (offsets
== NULL
)
4654 printf("** Failed to get %d bytes of memory for offsets vector\n",
4655 (int)(size_offsets_max
* sizeof(int)));
4660 use_size_offsets
= n
;
4661 if (n
== 0) use_offsets
= NULL
; /* Ensures it can't write to it */
4662 else use_offsets
= offsets
+ size_offsets_max
- n
; /* To catch overruns */
4666 options
|= ((options
& PCRE_PARTIAL_SOFT
) == 0)?
4667 PCRE_PARTIAL_SOFT
: PCRE_PARTIAL_HARD
;
4671 while(isdigit(*p
)) n
= n
* 10 + *p
++ - '0';
4674 extra
= (pcre_extra
*)malloc(sizeof(pcre_extra
));
4677 extra
->flags
|= PCRE_EXTRA_MATCH_LIMIT_RECURSION
;
4678 extra
->match_limit_recursion
= n
;
4682 while(isdigit(*p
)) n
= n
* 10 + *p
++ - '0';
4685 extra
= (pcre_extra
*)malloc(sizeof(pcre_extra
));
4688 extra
->flags
|= PCRE_EXTRA_MATCH_LIMIT
;
4689 extra
->match_limit
= n
;
4694 options
|= PCRE_DFA_RESTART
;
4703 options
|= PCRE_NO_START_OPTIMIZE
;
4707 options
|= PCRE_NOTEOL
;
4711 options
|= PCRE_NO_UTF8_CHECK
;
4716 int x
= check_newline(p
, outfile
);
4717 if (x
== 0) goto NEXT_DATA
;
4719 while (*p
++ != '>');
4724 /* We now have a character value in c that may be greater than 255.
4725 In 8-bit mode we convert to UTF-8 if we are in UTF mode. Values greater
4726 than 127 in UTF mode must have come from \x{...} or octal constructs
4727 because values from \x.. get this far only in non-UTF mode. */
4729 #ifdef SUPPORT_PCRE8
4730 if (pcre_mode
== PCRE8_MODE
)
4737 fprintf(outfile
, "** Character \\x{%x} is greater than 0x7fffffff "
4738 "and so cannot be converted to UTF-8\n", c
);
4741 q8
+= ord2utf8(c
, q8
);
4748 fprintf(outfile
, "** Character \\x{%x} is greater than 255 "
4749 "and UTF-8 mode is not enabled.\n", c
);
4750 fprintf(outfile
, "** Truncation will probably give the wrong "
4757 #ifdef SUPPORT_PCRE16
4758 if (pcre_mode
== PCRE16_MODE
)
4765 fprintf(outfile
, "** Failed: character \\x{%x} is greater than "
4766 "0x10ffff and so cannot be converted to UTF-16\n", c
);
4769 else if (c
>= 0x10000u
)
4772 *q16
++ = 0xD800 | (c
>> 10);
4773 *q16
++ = 0xDC00 | (c
& 0x3ff);
4783 fprintf(outfile
, "** Character \\x{%x} is greater than 0xffff "
4784 "and UTF-16 mode is not enabled.\n", c
);
4785 fprintf(outfile
, "** Truncation will probably give the wrong "
4793 #ifdef SUPPORT_PCRE32
4794 if (pcre_mode
== PCRE32_MODE
)
4802 /* Reached end of subject string */
4804 #ifdef SUPPORT_PCRE8
4805 if (pcre_mode
== PCRE8_MODE
)
4808 len
= (int)(q8
- (pcre_uint8
*)dbuffer
);
4811 #ifdef SUPPORT_PCRE16
4812 if (pcre_mode
== PCRE16_MODE
)
4815 len
= (int)(q16
- (pcre_uint16
*)dbuffer
);
4818 #ifdef SUPPORT_PCRE32
4819 if (pcre_mode
== PCRE32_MODE
)
4822 len
= (int)(q32
- (pcre_uint32
*)dbuffer
);
4826 /* If we're compiling with explicit valgrind support, Mark the data from after
4827 its end to the end of the buffer as unaddressable, so that a read over the end
4828 of the buffer will be seen by valgrind, even if it doesn't cause a crash.
4829 If we're not building with valgrind support, at least move the data to the end
4830 of the buffer so that it might at least cause a crash.
4831 If we are using the POSIX interface, we must include the terminating zero. */
4835 #if !defined NOPOSIX
4836 if (posix
|| do_posix
)
4838 #ifdef SUPPORT_VALGRIND
4839 VALGRIND_MAKE_MEM_NOACCESS(dbuffer
+ len
+ 1, dbuffer_size
- (len
+ 1));
4841 memmove(bptr
+ dbuffer_size
- len
- 1, bptr
, len
+ 1);
4842 bptr
+= dbuffer_size
- len
- 1;
4848 #ifdef SUPPORT_VALGRIND
4849 VALGRIND_MAKE_MEM_NOACCESS(dbuffer
+ len
* CHAR_SIZE
, (dbuffer_size
- len
) * CHAR_SIZE
);
4851 bptr
= memmove(bptr
+ (dbuffer_size
- len
) * CHAR_SIZE
, bptr
, len
* CHAR_SIZE
);
4855 if ((all_use_dfa
|| use_dfa
) && find_match_limit
)
4857 printf("**Match limit not relevant for DFA matching: ignored\n");
4858 find_match_limit
= 0;
4861 /* Handle matching via the POSIX interface, which does not
4862 support timing or playing with the match limit or callout data. */
4864 #if !defined NOPOSIX
4865 if (posix
|| do_posix
)
4869 regmatch_t
*pmatch
= NULL
;
4870 if (use_size_offsets
> 0)
4871 pmatch
= (regmatch_t
*)malloc(sizeof(regmatch_t
) * use_size_offsets
);
4872 if ((options
& PCRE_NOTBOL
) != 0) eflags
|= REG_NOTBOL
;
4873 if ((options
& PCRE_NOTEOL
) != 0) eflags
|= REG_NOTEOL
;
4874 if ((options
& PCRE_NOTEMPTY
) != 0) eflags
|= REG_NOTEMPTY
;
4876 rc
= regexec(&preg
, (const char *)bptr
, use_size_offsets
, pmatch
, eflags
);
4880 (void)regerror(rc
, &preg
, (char *)buffer
, buffer_size
);
4881 fprintf(outfile
, "No match: POSIX code %d: %s\n", rc
, buffer
);
4883 else if ((REAL_PCRE_OPTIONS(preg
.re_pcre
) & PCRE_NO_AUTO_CAPTURE
) != 0)
4885 fprintf(outfile
, "Matched with REG_NOSUB\n");
4890 for (i
= 0; i
< (size_t)use_size_offsets
; i
++)
4892 if (pmatch
[i
].rm_so
>= 0)
4894 fprintf(outfile
, "%2d: ", (int)i
);
4895 PCHARSV(dbuffer
, pmatch
[i
].rm_so
,
4896 pmatch
[i
].rm_eo
- pmatch
[i
].rm_so
, outfile
);
4897 fprintf(outfile
, "\n");
4898 if (do_showcaprest
|| (i
== 0 && do_showrest
))
4900 fprintf(outfile
, "%2d+ ", (int)i
);
4901 PCHARSV(dbuffer
, pmatch
[i
].rm_eo
, len
- pmatch
[i
].rm_eo
,
4903 fprintf(outfile
, "\n");
4912 #endif /* !defined NOPOSIX */
4914 /* Handle matching via the native interface - repeats for /g and /G */
4916 /* Ensure that there is a JIT callback if we want to verify that JIT was
4917 actually used. If jit_stack == NULL, no stack has yet been assigned. */
4919 if (verify_jit
&& jit_stack
== NULL
&& extra
!= NULL
)
4920 { PCRE_ASSIGN_JIT_STACK(extra
, jit_callback
, jit_stack
); }
4922 for (;; gmatched
++) /* Loop for /g or /G */
4925 jit_was_used
= FALSE
;
4931 clock_t start_time
= clock();
4934 if (all_use_dfa
|| use_dfa
)
4936 if ((options
& PCRE_DFA_RESTART
) != 0)
4938 fprintf(outfile
, "Timing DFA restarts is not supported\n");
4941 if (dfa_workspace
== NULL
)
4942 dfa_workspace
= (int *)malloc(DFA_WS_DIMENSION
*sizeof(int));
4943 for (i
= 0; i
< timeitm
; i
++)
4945 PCRE_DFA_EXEC(count
, re
, extra
, bptr
, len
, start_offset
,
4946 (options
| g_notempty
), use_offsets
, use_size_offsets
,
4947 dfa_workspace
, DFA_WS_DIMENSION
);
4953 for (i
= 0; i
< timeitm
; i
++)
4955 PCRE_EXEC(count
, re
, extra
, bptr
, len
, start_offset
,
4956 (options
| g_notempty
), use_offsets
, use_size_offsets
);
4958 time_taken
= clock() - start_time
;
4959 fprintf(outfile
, "Execute time %.4f milliseconds\n",
4960 (((double)time_taken
* 1000.0) / (double)timeitm
) /
4961 (double)CLOCKS_PER_SEC
);
4964 /* If find_match_limit is set, we want to do repeated matches with
4965 varying limits in order to find the minimum value for the match limit and
4966 for the recursion limit. The match limits are relevant only to the normal
4967 running of pcre_exec(), so disable the JIT optimization. This makes it
4968 possible to run the same set of tests with and without JIT externally
4971 if (find_match_limit
)
4973 if (extra
!= NULL
) { PCRE_FREE_STUDY(extra
); }
4974 extra
= (pcre_extra
*)malloc(sizeof(pcre_extra
));
4977 (void)check_match_limit(re
, extra
, bptr
, len
, start_offset
,
4978 options
|g_notempty
, use_offsets
, use_size_offsets
,
4979 PCRE_EXTRA_MATCH_LIMIT
, &(extra
->match_limit
),
4980 PCRE_ERROR_MATCHLIMIT
, "match()");
4982 count
= check_match_limit(re
, extra
, bptr
, len
, start_offset
,
4983 options
|g_notempty
, use_offsets
, use_size_offsets
,
4984 PCRE_EXTRA_MATCH_LIMIT_RECURSION
, &(extra
->match_limit_recursion
),
4985 PCRE_ERROR_RECURSIONLIMIT
, "match() recursion");
4988 /* If callout_data is set, use the interface with additional data */
4990 else if (callout_data_set
)
4994 extra
= (pcre_extra
*)malloc(sizeof(pcre_extra
));
4997 extra
->flags
|= PCRE_EXTRA_CALLOUT_DATA
;
4998 extra
->callout_data
= &callout_data
;
4999 PCRE_EXEC(count
, re
, extra
, bptr
, len
, start_offset
,
5000 options
| g_notempty
, use_offsets
, use_size_offsets
);
5001 extra
->flags
&= ~PCRE_EXTRA_CALLOUT_DATA
;
5004 /* The normal case is just to do the match once, with the default
5005 value of match_limit. */
5008 else if (all_use_dfa
|| use_dfa
)
5010 if (dfa_workspace
== NULL
)
5011 dfa_workspace
= (int *)malloc(DFA_WS_DIMENSION
*sizeof(int));
5012 if (dfa_matched
++ == 0)
5013 dfa_workspace
[0] = -1; /* To catch bad restart */
5014 PCRE_DFA_EXEC(count
, re
, extra
, bptr
, len
, start_offset
,
5015 (options
| g_notempty
), use_offsets
, use_size_offsets
, dfa_workspace
,
5019 fprintf(outfile
, "Matched, but too many subsidiary matches\n");
5020 count
= use_size_offsets
/2;
5027 PCRE_EXEC(count
, re
, extra
, bptr
, len
, start_offset
,
5028 options
| g_notempty
, use_offsets
, use_size_offsets
);
5031 fprintf(outfile
, "Matched, but too many substrings\n");
5032 count
= use_size_offsets
/3;
5041 void *cnptr
, *gnptr
;
5044 if (all_use_dfa
|| use_dfa
) maxcount
= use_size_offsets
/2; else
5046 maxcount
= use_size_offsets
/3;
5048 /* This is a check against a lunatic return value. */
5050 if (count
> maxcount
)
5053 "** PCRE error: returned count %d is too big for offset size %d\n",
5054 count
, use_size_offsets
);
5055 count
= use_size_offsets
/3;
5058 fprintf(outfile
, "** /%c loop abandoned\n", do_g
? 'g' : 'G');
5059 do_g
= do_G
= FALSE
; /* Break g/G loop */
5063 /* do_allcaps requests showing of all captures in the pattern, to check
5064 unset ones at the end. */
5068 if (new_info(re
, NULL
, PCRE_INFO_CAPTURECOUNT
, &count
) < 0)
5070 count
++; /* Allow for full match */
5071 if (count
* 2 > use_size_offsets
) count
= use_size_offsets
/2;
5074 /* Output the captured substrings */
5076 for (i
= 0; i
< count
* 2; i
+= 2)
5078 if (use_offsets
[i
] < 0)
5080 if (use_offsets
[i
] != -1)
5081 fprintf(outfile
, "ERROR: bad negative value %d for offset %d\n",
5083 if (use_offsets
[i
+1] != -1)
5084 fprintf(outfile
, "ERROR: bad negative value %d for offset %d\n",
5085 use_offsets
[i
+1], i
+1);
5086 fprintf(outfile
, "%2d: <unset>\n", i
/2);
5090 fprintf(outfile
, "%2d: ", i
/2);
5091 PCHARSV(bptr
, use_offsets
[i
],
5092 use_offsets
[i
+1] - use_offsets
[i
], outfile
);
5093 if (verify_jit
&& jit_was_used
) fprintf(outfile
, " (JIT)");
5094 fprintf(outfile
, "\n");
5095 if (do_showcaprest
|| (i
== 0 && do_showrest
))
5097 fprintf(outfile
, "%2d+ ", i
/2);
5098 PCHARSV(bptr
, use_offsets
[i
+1], len
- use_offsets
[i
+1],
5100 fprintf(outfile
, "\n");
5105 if (markptr
!= NULL
)
5107 fprintf(outfile
, "MK: ");
5108 PCHARSV(markptr
, 0, -1, outfile
);
5109 fprintf(outfile
, "\n");
5112 for (i
= 0; i
< 32; i
++)
5114 if ((copystrings
& (1 << i
)) != 0)
5117 char copybuffer
[256];
5118 PCRE_COPY_SUBSTRING(rc
, bptr
, use_offsets
, count
, i
,
5119 copybuffer
, sizeof(copybuffer
));
5121 fprintf(outfile
, "copy substring %d failed %d\n", i
, rc
);
5124 fprintf(outfile
, "%2dC ", i
);
5125 PCHARSV(copybuffer
, 0, rc
, outfile
);
5126 fprintf(outfile
, " (%d)\n", rc
);
5135 char copybuffer
[256];
5137 #ifdef SUPPORT_PCRE32
5138 if (pcre_mode
== PCRE32_MODE
)
5140 if (*(pcre_uint32
*)cnptr
== 0) break;
5143 #ifdef SUPPORT_PCRE16
5144 if (pcre_mode
== PCRE16_MODE
)
5146 if (*(pcre_uint16
*)cnptr
== 0) break;
5149 #ifdef SUPPORT_PCRE8
5150 if (pcre_mode
== PCRE8_MODE
)
5152 if (*(pcre_uint8
*)cnptr
== 0) break;
5156 PCRE_COPY_NAMED_SUBSTRING(rc
, re
, bptr
, use_offsets
, count
,
5157 cnptr
, copybuffer
, sizeof(copybuffer
));
5161 fprintf(outfile
, "copy substring ");
5162 PCHARSV(cnptr
, 0, -1, outfile
);
5163 fprintf(outfile
, " failed %d\n", rc
);
5167 fprintf(outfile
, " C ");
5168 PCHARSV(copybuffer
, 0, rc
, outfile
);
5169 fprintf(outfile
, " (%d) ", rc
);
5170 PCHARSV(cnptr
, 0, -1, outfile
);
5171 putc('\n', outfile
);
5174 cnptr
= (char *)cnptr
+ (STRLEN(cnptr
) + 1) * CHAR_SIZE
;
5177 for (i
= 0; i
< 32; i
++)
5179 if ((getstrings
& (1 << i
)) != 0)
5182 const char *substring
;
5183 PCRE_GET_SUBSTRING(rc
, bptr
, use_offsets
, count
, i
, &substring
);
5185 fprintf(outfile
, "get substring %d failed %d\n", i
, rc
);
5188 fprintf(outfile
, "%2dG ", i
);
5189 PCHARSV(substring
, 0, rc
, outfile
);
5190 fprintf(outfile
, " (%d)\n", rc
);
5191 PCRE_FREE_SUBSTRING(substring
);
5200 const char *substring
;
5202 #ifdef SUPPORT_PCRE32
5203 if (pcre_mode
== PCRE32_MODE
)
5205 if (*(pcre_uint32
*)gnptr
== 0) break;
5208 #ifdef SUPPORT_PCRE16
5209 if (pcre_mode
== PCRE16_MODE
)
5211 if (*(pcre_uint16
*)gnptr
== 0) break;
5214 #ifdef SUPPORT_PCRE8
5215 if (pcre_mode
== PCRE8_MODE
)
5217 if (*(pcre_uint8
*)gnptr
== 0) break;
5221 PCRE_GET_NAMED_SUBSTRING(rc
, re
, bptr
, use_offsets
, count
,
5225 fprintf(outfile
, "get substring ");
5226 PCHARSV(gnptr
, 0, -1, outfile
);
5227 fprintf(outfile
, " failed %d\n", rc
);
5231 fprintf(outfile
, " G ");
5232 PCHARSV(substring
, 0, rc
, outfile
);
5233 fprintf(outfile
, " (%d) ", rc
);
5234 PCHARSV(gnptr
, 0, -1, outfile
);
5235 PCRE_FREE_SUBSTRING(substring
);
5236 putc('\n', outfile
);
5239 gnptr
= (char *)gnptr
+ (STRLEN(gnptr
) + 1) * CHAR_SIZE
;
5245 const char **stringlist
;
5246 PCRE_GET_SUBSTRING_LIST(rc
, bptr
, use_offsets
, count
, &stringlist
);
5248 fprintf(outfile
, "get substring list failed %d\n", rc
);
5251 for (i
= 0; i
< count
; i
++)
5253 fprintf(outfile
, "%2dL ", i
);
5254 PCHARSV(stringlist
[i
], 0, -1, outfile
);
5255 putc('\n', outfile
);
5257 if (stringlist
[i
] != NULL
)
5258 fprintf(outfile
, "string list not terminated by NULL\n");
5259 PCRE_FREE_SUBSTRING_LIST(stringlist
);
5264 /* There was a partial match */
5266 else if (count
== PCRE_ERROR_PARTIAL
)
5268 if (markptr
== NULL
) fprintf(outfile
, "Partial match");
5271 fprintf(outfile
, "Partial match, mark=");
5272 PCHARSV(markptr
, 0, -1, outfile
);
5274 if (use_size_offsets
> 1)
5276 fprintf(outfile
, ": ");
5277 PCHARSV(bptr
, use_offsets
[0], use_offsets
[1] - use_offsets
[0],
5280 if (verify_jit
&& jit_was_used
) fprintf(outfile
, " (JIT)");
5281 fprintf(outfile
, "\n");
5282 break; /* Out of the /g loop */
5285 /* Failed to match. If this is a /g or /G loop and we previously set
5286 g_notempty after a null match, this is not necessarily the end. We want
5287 to advance the start offset, and continue. We won't be at the end of the
5288 string - that was checked before setting g_notempty.
5290 Complication arises in the case when the newline convention is "any",
5291 "crlf", or "anycrlf". If the previous match was at the end of a line
5292 terminated by CRLF, an advance of one character just passes the \r,
5293 whereas we should prefer the longer newline sequence, as does the code in
5294 pcre_exec(). Fudge the offset value to achieve this. We check for a
5295 newline setting in the pattern; if none was set, use PCRE_CONFIG() to
5298 Otherwise, in the case of UTF-8 matching, the advance must be one
5299 character, not one byte. */
5303 if (g_notempty
!= 0)
5306 unsigned int obits
= REAL_PCRE_OPTIONS(re
);
5307 use_offsets
[0] = start_offset
;
5308 if ((obits
& PCRE_NEWLINE_BITS
) == 0)
5311 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE
, &d
);
5312 /* Note that these values are always the ASCII ones, even in
5313 EBCDIC environments. CR = 13, NL = 10. */
5314 obits
= (d
== 13)? PCRE_NEWLINE_CR
:
5315 (d
== 10)? PCRE_NEWLINE_LF
:
5316 (d
== (13<<8 | 10))? PCRE_NEWLINE_CRLF
:
5317 (d
== -2)? PCRE_NEWLINE_ANYCRLF
:
5318 (d
== -1)? PCRE_NEWLINE_ANY
: 0;
5320 if (((obits
& PCRE_NEWLINE_BITS
) == PCRE_NEWLINE_ANY
||
5321 (obits
& PCRE_NEWLINE_BITS
) == PCRE_NEWLINE_CRLF
||
5322 (obits
& PCRE_NEWLINE_BITS
) == PCRE_NEWLINE_ANYCRLF
)
5324 start_offset
< len
- 1 && (
5325 #ifdef SUPPORT_PCRE8
5326 (pcre_mode
== PCRE8_MODE
&&
5327 bptr
[start_offset
] == '\r' &&
5328 bptr
[start_offset
+ 1] == '\n') ||
5330 #ifdef SUPPORT_PCRE16
5331 (pcre_mode
== PCRE16_MODE
&&
5332 ((PCRE_SPTR16
)bptr
)[start_offset
] == '\r' &&
5333 ((PCRE_SPTR16
)bptr
)[start_offset
+ 1] == '\n') ||
5335 #ifdef SUPPORT_PCRE32
5336 (pcre_mode
== PCRE32_MODE
&&
5337 ((PCRE_SPTR32
)bptr
)[start_offset
] == '\r' &&
5338 ((PCRE_SPTR32
)bptr
)[start_offset
+ 1] == '\n') ||
5344 while (start_offset
+ onechar
< len
)
5346 if ((bptr
[start_offset
+onechar
] & 0xc0) != 0x80) break;
5350 use_offsets
[1] = start_offset
+ onechar
;
5356 case PCRE_ERROR_NOMATCH
:
5359 if (markptr
== NULL
)
5361 fprintf(outfile
, "No match");
5365 fprintf(outfile
, "No match, mark = ");
5366 PCHARSV(markptr
, 0, -1, outfile
);
5368 if (verify_jit
&& jit_was_used
) fprintf(outfile
, " (JIT)");
5369 putc('\n', outfile
);
5373 case PCRE_ERROR_BADUTF8
:
5374 case PCRE_ERROR_SHORTUTF8
:
5375 fprintf(outfile
, "Error %d (%s UTF-%d string)", count
,
5376 (count
== PCRE_ERROR_BADUTF8
)? "bad" : "short",
5378 if (use_size_offsets
>= 2)
5379 fprintf(outfile
, " offset=%d reason=%d", use_offsets
[0],
5381 fprintf(outfile
, "\n");
5384 case PCRE_ERROR_BADUTF8_OFFSET
:
5385 fprintf(outfile
, "Error %d (bad UTF-%d offset)\n", count
,
5391 (-count
) < (int)(sizeof(errtexts
)/sizeof(const char *)))
5392 fprintf(outfile
, "Error %d (%s)\n", count
, errtexts
[-count
]);
5394 fprintf(outfile
, "Error %d (Unexpected value)\n", count
);
5398 break; /* Out of the /g loop */
5402 /* If not /g or /G we are done */
5404 if (!do_g
&& !do_G
) break;
5406 /* If we have matched an empty string, first check to see if we are at
5407 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
5408 Perl's /g options does. This turns out to be rather cunning. First we set
5409 PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
5410 same point. If this fails (picked up above) we advance to the next
5415 if (use_offsets
[0] == use_offsets
[1])
5417 if (use_offsets
[0] == len
) break;
5418 g_notempty
= PCRE_NOTEMPTY_ATSTART
| PCRE_ANCHORED
;
5421 /* For /g, update the start offset, leaving the rest alone */
5423 if (do_g
) start_offset
= use_offsets
[1];
5425 /* For /G, update the pointer and length */
5429 bptr
+= use_offsets
[1] * CHAR_SIZE
;
5430 len
-= use_offsets
[1];
5432 } /* End of loop for /g and /G */
5434 NEXT_DATA
: continue;
5435 } /* End of loop for data lines */
5439 #if !defined NOPOSIX
5440 if (posix
|| do_posix
) regfree(&preg
);
5443 if (re
!= NULL
) new_free(re
);
5446 PCRE_FREE_STUDY(extra
);
5450 new_free((void *)tables
);
5451 setlocale(LC_CTYPE
, "C");
5454 if (jit_stack
!= NULL
)
5456 PCRE_JIT_STACK_FREE(jit_stack
);
5461 if (infile
== stdin
) fprintf(outfile
, "\n");
5465 if (infile
!= NULL
&& infile
!= stdin
) fclose(infile
);
5466 if (outfile
!= NULL
&& outfile
!= stdout
) fclose(outfile
);
5473 #ifdef SUPPORT_PCRE16
5474 if (buffer16
!= NULL
) free(buffer16
);
5476 #ifdef SUPPORT_PCRE32
5477 if (buffer32
!= NULL
) free(buffer32
);
5481 if (dfa_workspace
!= NULL
)
5482 free(dfa_workspace
);
5488 /* End of pcretest.c */