usbmodeswitch: Updated to v.1.2.6 from shibby's branch.
[tomato.git] / release / src / router / pcre / pcretest.c
blob0b6c821aa7c830a73225838df4c55ddd9acf54c8
1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places. The
8 addition of 16-bit support has made it even worse. :-(
10 -----------------------------------------------------------------------------
11 Redistribution and use in source and binary forms, with or without
12 modification, are permitted provided that the following conditions are met:
14 * Redistributions of source code must retain the above copyright notice,
15 this list of conditions and the following disclaimer.
17 * Redistributions in binary form must reproduce the above copyright
18 notice, this list of conditions and the following disclaimer in the
19 documentation and/or other materials provided with the distribution.
21 * Neither the name of the University of Cambridge nor the names of its
22 contributors may be used to endorse or promote products derived from
23 this software without specific prior written permission.
25 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 POSSIBILITY OF SUCH DAMAGE.
36 -----------------------------------------------------------------------------
39 /* This program now supports the testing of all of the 8-bit, 16-bit, and
40 32-bit PCRE libraries in a single program. This is different from the modules
41 such as pcre_compile.c in the library itself, which are compiled separately for
42 each mode. If two modes are enabled, for example, pcre_compile.c is compiled
43 twice. By contrast, pcretest.c is compiled only once. Therefore, it must not
44 make use of any of the macros from pcre_internal.h that depend on
45 COMPILE_PCRE8, COMPILE_PCRE16, or COMPILE_PCRE32. It does, however, make use of
46 SUPPORT_PCRE8, SUPPORT_PCRE16, and SUPPORT_PCRE32 to ensure that it calls only
47 supported library functions. */
49 #ifdef HAVE_CONFIG_H
50 #include "config.h"
51 #endif
53 #include <ctype.h>
54 #include <stdio.h>
55 #include <string.h>
56 #include <stdlib.h>
57 #include <time.h>
58 #include <locale.h>
59 #include <errno.h>
61 /* Both libreadline and libedit are optionally supported. The user-supplied
62 original patch uses readline/readline.h for libedit, but in at least one system
63 it is installed as editline/readline.h, so the configuration code now looks for
64 that first, falling back to readline/readline.h. */
66 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
67 #ifdef HAVE_UNISTD_H
68 #include <unistd.h>
69 #endif
70 #if defined(SUPPORT_LIBREADLINE)
71 #include <readline/readline.h>
72 #include <readline/history.h>
73 #else
74 #if defined(HAVE_EDITLINE_READLINE_H)
75 #include <editline/readline.h>
76 #else
77 #include <readline/readline.h>
78 #endif
79 #endif
80 #endif
82 /* A number of things vary for Windows builds. Originally, pcretest opened its
83 input and output without "b"; then I was told that "b" was needed in some
84 environments, so it was added for release 5.0 to both the input and output. (It
85 makes no difference on Unix-like systems.) Later I was told that it is wrong
86 for the input on Windows. I've now abstracted the modes into two macros that
87 are set here, to make it easier to fiddle with them, and removed "b" from the
88 input mode under Windows. */
90 #if defined(_WIN32) || defined(WIN32)
91 #include <io.h> /* For _setmode() */
92 #include <fcntl.h> /* For _O_BINARY */
93 #define INPUT_MODE "r"
94 #define OUTPUT_MODE "wb"
96 #ifndef isatty
97 #define isatty _isatty /* This is what Windows calls them, I'm told, */
98 #endif /* though in some environments they seem to */
99 /* be already defined, hence the #ifndefs. */
100 #ifndef fileno
101 #define fileno _fileno
102 #endif
104 /* A user sent this fix for Borland Builder 5 under Windows. */
106 #ifdef __BORLANDC__
107 #define _setmode(handle, mode) setmode(handle, mode)
108 #endif
110 /* Not Windows */
112 #else
113 #include <sys/time.h> /* These two includes are needed */
114 #include <sys/resource.h> /* for setrlimit(). */
115 #if defined NATIVE_ZOS /* z/OS uses non-binary I/O */
116 #define INPUT_MODE "r"
117 #define OUTPUT_MODE "w"
118 #else
119 #define INPUT_MODE "rb"
120 #define OUTPUT_MODE "wb"
121 #endif
122 #endif
124 #define PRIV(name) name
126 /* We have to include pcre_internal.h because we need the internal info for
127 displaying the results of pcre_study() and we also need to know about the
128 internal macros, structures, and other internal data values; pcretest has
129 "inside information" compared to a program that strictly follows the PCRE API.
131 Although pcre_internal.h does itself include pcre.h, we explicitly include it
132 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
133 appropriately for an application, not for building PCRE. */
135 #include "pcre.h"
136 #include "pcre_internal.h"
138 /* The pcre_printint() function, which prints the internal form of a compiled
139 regex, is held in a separate file so that (a) it can be compiled in either
140 8-, 16- or 32-bit mode, and (b) it can be #included directly in pcre_compile.c
141 when that is compiled in debug mode. */
143 #ifdef SUPPORT_PCRE8
144 void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
145 #endif
146 #ifdef SUPPORT_PCRE16
147 void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
148 #endif
149 #ifdef SUPPORT_PCRE32
150 void pcre32_printint(pcre *external_re, FILE *f, BOOL print_lengths);
151 #endif
153 /* We need access to some of the data tables that PCRE uses. So as not to have
154 to keep two copies, we include the source files here, changing the names of the
155 external symbols to prevent clashes. */
157 #define PCRE_INCLUDED
159 #include "pcre_tables.c"
160 #include "pcre_ucd.c"
162 /* The definition of the macro PRINTABLE, which determines whether to print an
163 output character as-is or as a hex value when showing compiled patterns, is
164 the same as in the printint.src file. We uses it here in cases when the locale
165 has not been explicitly changed, so as to get consistent output from systems
166 that differ in their output from isprint() even in the "C" locale. */
168 #ifdef EBCDIC
169 #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
170 #else
171 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
172 #endif
174 #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
176 /* Posix support is disabled in 16 or 32 bit only mode. */
177 #if !defined SUPPORT_PCRE8 && !defined NOPOSIX
178 #define NOPOSIX
179 #endif
181 /* It is possible to compile this test program without including support for
182 testing the POSIX interface, though this is not available via the standard
183 Makefile. */
185 #if !defined NOPOSIX
186 #include "pcreposix.h"
187 #endif
189 /* It is also possible, originally for the benefit of a version that was
190 imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
191 NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
192 automatically cut out the UTF support if PCRE is built without it. */
194 #ifndef SUPPORT_UTF
195 #ifndef NOUTF
196 #define NOUTF
197 #endif
198 #endif
200 /* To make the code a bit tidier for 8/16/32-bit support, we define macros
201 for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
202 only from one place and is handled differently). I couldn't dream up any way of
203 using a single macro to do this in a generic way, because of the many different
204 argument requirements. We know that at least one of SUPPORT_PCRE8 and
205 SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
206 use these in the definitions of generic macros.
208 **** Special note about the PCHARSxxx macros: the address of the string to be
209 printed is always given as two arguments: a base address followed by an offset.
210 The base address is cast to the correct data size for 8 or 16 bit data; the
211 offset is in units of this size. If the string were given as base+offset in one
212 argument, the casting might be incorrectly applied. */
214 #ifdef SUPPORT_PCRE8
216 #define PCHARS8(lv, p, offset, len, f) \
217 lv = pchars((pcre_uint8 *)(p) + offset, len, f)
219 #define PCHARSV8(p, offset, len, f) \
220 (void)pchars((pcre_uint8 *)(p) + offset, len, f)
222 #define READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re) \
223 p = read_capture_name8(p, cn8, re)
225 #define STRLEN8(p) ((int)strlen((char *)p))
227 #define SET_PCRE_CALLOUT8(callout) \
228 pcre_callout = callout
230 #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
231 pcre_assign_jit_stack(extra, callback, userdata)
233 #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
234 re = pcre_compile((char *)pat, options, error, erroffset, tables)
236 #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
237 namesptr, cbuffer, size) \
238 rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
239 (char *)namesptr, cbuffer, size)
241 #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
242 rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
244 #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
245 offsets, size_offsets, workspace, size_workspace) \
246 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
247 offsets, size_offsets, workspace, size_workspace)
249 #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
250 offsets, size_offsets) \
251 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
252 offsets, size_offsets)
254 #define PCRE_FREE_STUDY8(extra) \
255 pcre_free_study(extra)
257 #define PCRE_FREE_SUBSTRING8(substring) \
258 pcre_free_substring(substring)
260 #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
261 pcre_free_substring_list(listptr)
263 #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
264 getnamesptr, subsptr) \
265 rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
266 (char *)getnamesptr, subsptr)
268 #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
269 n = pcre_get_stringnumber(re, (char *)ptr)
271 #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
272 rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
274 #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
275 rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
277 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
278 rc = pcre_pattern_to_host_byte_order(re, extra, tables)
280 #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
281 pcre_printint(re, outfile, debug_lengths)
283 #define PCRE_STUDY8(extra, re, options, error) \
284 extra = pcre_study(re, options, error)
286 #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
287 pcre_jit_stack_alloc(startsize, maxsize)
289 #define PCRE_JIT_STACK_FREE8(stack) \
290 pcre_jit_stack_free(stack)
292 #define pcre8_maketables pcre_maketables
294 #endif /* SUPPORT_PCRE8 */
296 /* -----------------------------------------------------------*/
298 #ifdef SUPPORT_PCRE16
300 #define PCHARS16(lv, p, offset, len, f) \
301 lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
303 #define PCHARSV16(p, offset, len, f) \
304 (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
306 #define READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re) \
307 p = read_capture_name16(p, cn16, re)
309 #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
311 #define SET_PCRE_CALLOUT16(callout) \
312 pcre16_callout = (int (*)(pcre16_callout_block *))callout
314 #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
315 pcre16_assign_jit_stack((pcre16_extra *)extra, \
316 (pcre16_jit_callback)callback, userdata)
318 #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
319 re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
320 tables)
322 #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
323 namesptr, cbuffer, size) \
324 rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
325 count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
327 #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
328 rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
329 (PCRE_UCHAR16 *)cbuffer, size/2)
331 #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
332 offsets, size_offsets, workspace, size_workspace) \
333 count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
334 (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
335 workspace, size_workspace)
337 #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
338 offsets, size_offsets) \
339 count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
340 len, start_offset, options, offsets, size_offsets)
342 #define PCRE_FREE_STUDY16(extra) \
343 pcre16_free_study((pcre16_extra *)extra)
345 #define PCRE_FREE_SUBSTRING16(substring) \
346 pcre16_free_substring((PCRE_SPTR16)substring)
348 #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
349 pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
351 #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
352 getnamesptr, subsptr) \
353 rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
354 count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
356 #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
357 n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
359 #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
360 rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
361 (PCRE_SPTR16 *)(void*)subsptr)
363 #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
364 rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
365 (PCRE_SPTR16 **)(void*)listptr)
367 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
368 rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
369 tables)
371 #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
372 pcre16_printint(re, outfile, debug_lengths)
374 #define PCRE_STUDY16(extra, re, options, error) \
375 extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
377 #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
378 (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
380 #define PCRE_JIT_STACK_FREE16(stack) \
381 pcre16_jit_stack_free((pcre16_jit_stack *)stack)
383 #endif /* SUPPORT_PCRE16 */
385 /* -----------------------------------------------------------*/
387 #ifdef SUPPORT_PCRE32
389 #define PCHARS32(lv, p, offset, len, f) \
390 lv = pchars32((PCRE_SPTR32)(p) + offset, len, use_utf, f)
392 #define PCHARSV32(p, offset, len, f) \
393 (void)pchars32((PCRE_SPTR32)(p) + offset, len, use_utf, f)
395 #define READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re) \
396 p = read_capture_name32(p, cn32, re)
398 #define STRLEN32(p) ((int)strlen32((PCRE_SPTR32)p))
400 #define SET_PCRE_CALLOUT32(callout) \
401 pcre32_callout = (int (*)(pcre32_callout_block *))callout
403 #define PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata) \
404 pcre32_assign_jit_stack((pcre32_extra *)extra, \
405 (pcre32_jit_callback)callback, userdata)
407 #define PCRE_COMPILE32(re, pat, options, error, erroffset, tables) \
408 re = (pcre *)pcre32_compile((PCRE_SPTR32)pat, options, error, erroffset, \
409 tables)
411 #define PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
412 namesptr, cbuffer, size) \
413 rc = pcre32_copy_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
414 count, (PCRE_SPTR32)namesptr, (PCRE_UCHAR32 *)cbuffer, size/2)
416 #define PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size) \
417 rc = pcre32_copy_substring((PCRE_SPTR32)bptr, offsets, count, i, \
418 (PCRE_UCHAR32 *)cbuffer, size/2)
420 #define PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
421 offsets, size_offsets, workspace, size_workspace) \
422 count = pcre32_dfa_exec((pcre32 *)re, (pcre32_extra *)extra, \
423 (PCRE_SPTR32)bptr, len, start_offset, options, offsets, size_offsets, \
424 workspace, size_workspace)
426 #define PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
427 offsets, size_offsets) \
428 count = pcre32_exec((pcre32 *)re, (pcre32_extra *)extra, (PCRE_SPTR32)bptr, \
429 len, start_offset, options, offsets, size_offsets)
431 #define PCRE_FREE_STUDY32(extra) \
432 pcre32_free_study((pcre32_extra *)extra)
434 #define PCRE_FREE_SUBSTRING32(substring) \
435 pcre32_free_substring((PCRE_SPTR32)substring)
437 #define PCRE_FREE_SUBSTRING_LIST32(listptr) \
438 pcre32_free_substring_list((PCRE_SPTR32 *)listptr)
440 #define PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
441 getnamesptr, subsptr) \
442 rc = pcre32_get_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
443 count, (PCRE_SPTR32)getnamesptr, (PCRE_SPTR32 *)(void*)subsptr)
445 #define PCRE_GET_STRINGNUMBER32(n, rc, ptr) \
446 n = pcre32_get_stringnumber(re, (PCRE_SPTR32)ptr)
448 #define PCRE_GET_SUBSTRING32(rc, bptr, offsets, count, i, subsptr) \
449 rc = pcre32_get_substring((PCRE_SPTR32)bptr, offsets, count, i, \
450 (PCRE_SPTR32 *)(void*)subsptr)
452 #define PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr) \
453 rc = pcre32_get_substring_list((PCRE_SPTR32)bptr, offsets, count, \
454 (PCRE_SPTR32 **)(void*)listptr)
456 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables) \
457 rc = pcre32_pattern_to_host_byte_order((pcre32 *)re, (pcre32_extra *)extra, \
458 tables)
460 #define PCRE_PRINTINT32(re, outfile, debug_lengths) \
461 pcre32_printint(re, outfile, debug_lengths)
463 #define PCRE_STUDY32(extra, re, options, error) \
464 extra = (pcre_extra *)pcre32_study((pcre32 *)re, options, error)
466 #define PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
467 (pcre_jit_stack *)pcre32_jit_stack_alloc(startsize, maxsize)
469 #define PCRE_JIT_STACK_FREE32(stack) \
470 pcre32_jit_stack_free((pcre32_jit_stack *)stack)
472 #endif /* SUPPORT_PCRE32 */
475 /* ----- More than one mode is supported; a runtime test is needed, except for
476 pcre_config(), and the JIT stack functions, when it doesn't matter which
477 available version is called. ----- */
479 enum {
480 PCRE8_MODE,
481 PCRE16_MODE,
482 PCRE32_MODE
485 #if (defined (SUPPORT_PCRE8) + defined (SUPPORT_PCRE16) + \
486 defined (SUPPORT_PCRE32)) >= 2
488 #define CHAR_SIZE (1 << pcre_mode)
490 /* There doesn't seem to be an easy way of writing these macros that can cope
491 with the 3 pairs of bit sizes plus all three bit sizes. So just handle all the
492 cases separately. */
494 /* ----- All three modes supported ----- */
496 #if defined(SUPPORT_PCRE8) && defined(SUPPORT_PCRE16) && defined(SUPPORT_PCRE32)
498 #define PCHARS(lv, p, offset, len, f) \
499 if (pcre_mode == PCRE32_MODE) \
500 PCHARS32(lv, p, offset, len, f); \
501 else if (pcre_mode == PCRE16_MODE) \
502 PCHARS16(lv, p, offset, len, f); \
503 else \
504 PCHARS8(lv, p, offset, len, f)
506 #define PCHARSV(p, offset, len, f) \
507 if (pcre_mode == PCRE32_MODE) \
508 PCHARSV32(p, offset, len, f); \
509 else if (pcre_mode == PCRE16_MODE) \
510 PCHARSV16(p, offset, len, f); \
511 else \
512 PCHARSV8(p, offset, len, f)
514 #define READ_CAPTURE_NAME(p, cn8, cn16, cn32, re) \
515 if (pcre_mode == PCRE32_MODE) \
516 READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re); \
517 else if (pcre_mode == PCRE16_MODE) \
518 READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re); \
519 else \
520 READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re)
522 #define SET_PCRE_CALLOUT(callout) \
523 if (pcre_mode == PCRE32_MODE) \
524 SET_PCRE_CALLOUT32(callout); \
525 else if (pcre_mode == PCRE16_MODE) \
526 SET_PCRE_CALLOUT16(callout); \
527 else \
528 SET_PCRE_CALLOUT8(callout)
530 #define STRLEN(p) (pcre_mode == PCRE32_MODE ? STRLEN32(p) : pcre_mode == PCRE16_MODE ? STRLEN16(p) : STRLEN8(p))
532 #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
533 if (pcre_mode == PCRE32_MODE) \
534 PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata); \
535 else if (pcre_mode == PCRE16_MODE) \
536 PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
537 else \
538 PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
540 #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
541 if (pcre_mode == PCRE32_MODE) \
542 PCRE_COMPILE32(re, pat, options, error, erroffset, tables); \
543 else if (pcre_mode == PCRE16_MODE) \
544 PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
545 else \
546 PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
548 #define PCRE_CONFIG pcre_config
550 #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
551 namesptr, cbuffer, size) \
552 if (pcre_mode == PCRE32_MODE) \
553 PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
554 namesptr, cbuffer, size); \
555 else if (pcre_mode == PCRE16_MODE) \
556 PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
557 namesptr, cbuffer, size); \
558 else \
559 PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
560 namesptr, cbuffer, size)
562 #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
563 if (pcre_mode == PCRE32_MODE) \
564 PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size); \
565 else if (pcre_mode == PCRE16_MODE) \
566 PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
567 else \
568 PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
570 #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
571 offsets, size_offsets, workspace, size_workspace) \
572 if (pcre_mode == PCRE32_MODE) \
573 PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
574 offsets, size_offsets, workspace, size_workspace); \
575 else if (pcre_mode == PCRE16_MODE) \
576 PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
577 offsets, size_offsets, workspace, size_workspace); \
578 else \
579 PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
580 offsets, size_offsets, workspace, size_workspace)
582 #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
583 offsets, size_offsets) \
584 if (pcre_mode == PCRE32_MODE) \
585 PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
586 offsets, size_offsets); \
587 else if (pcre_mode == PCRE16_MODE) \
588 PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
589 offsets, size_offsets); \
590 else \
591 PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
592 offsets, size_offsets)
594 #define PCRE_FREE_STUDY(extra) \
595 if (pcre_mode == PCRE32_MODE) \
596 PCRE_FREE_STUDY32(extra); \
597 else if (pcre_mode == PCRE16_MODE) \
598 PCRE_FREE_STUDY16(extra); \
599 else \
600 PCRE_FREE_STUDY8(extra)
602 #define PCRE_FREE_SUBSTRING(substring) \
603 if (pcre_mode == PCRE32_MODE) \
604 PCRE_FREE_SUBSTRING32(substring); \
605 else if (pcre_mode == PCRE16_MODE) \
606 PCRE_FREE_SUBSTRING16(substring); \
607 else \
608 PCRE_FREE_SUBSTRING8(substring)
610 #define PCRE_FREE_SUBSTRING_LIST(listptr) \
611 if (pcre_mode == PCRE32_MODE) \
612 PCRE_FREE_SUBSTRING_LIST32(listptr); \
613 else if (pcre_mode == PCRE16_MODE) \
614 PCRE_FREE_SUBSTRING_LIST16(listptr); \
615 else \
616 PCRE_FREE_SUBSTRING_LIST8(listptr)
618 #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
619 getnamesptr, subsptr) \
620 if (pcre_mode == PCRE32_MODE) \
621 PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
622 getnamesptr, subsptr); \
623 else if (pcre_mode == PCRE16_MODE) \
624 PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
625 getnamesptr, subsptr); \
626 else \
627 PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
628 getnamesptr, subsptr)
630 #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
631 if (pcre_mode == PCRE32_MODE) \
632 PCRE_GET_STRINGNUMBER32(n, rc, ptr); \
633 else if (pcre_mode == PCRE16_MODE) \
634 PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
635 else \
636 PCRE_GET_STRINGNUMBER8(n, rc, ptr)
638 #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
639 if (pcre_mode == PCRE32_MODE) \
640 PCRE_GET_SUBSTRING32(rc, bptr, use_offsets, count, i, subsptr); \
641 else if (pcre_mode == PCRE16_MODE) \
642 PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
643 else \
644 PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
646 #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
647 if (pcre_mode == PCRE32_MODE) \
648 PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr); \
649 else if (pcre_mode == PCRE16_MODE) \
650 PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
651 else \
652 PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
654 #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
655 (pcre_mode == PCRE32_MODE ? \
656 PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
657 : pcre_mode == PCRE16_MODE ? \
658 PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
659 : PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
661 #define PCRE_JIT_STACK_FREE(stack) \
662 if (pcre_mode == PCRE32_MODE) \
663 PCRE_JIT_STACK_FREE32(stack); \
664 else if (pcre_mode == PCRE16_MODE) \
665 PCRE_JIT_STACK_FREE16(stack); \
666 else \
667 PCRE_JIT_STACK_FREE8(stack)
669 #define PCRE_MAKETABLES \
670 (pcre_mode == PCRE32_MODE ? pcre32_maketables() : pcre_mode == PCRE16_MODE ? pcre16_maketables() : pcre_maketables())
672 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
673 if (pcre_mode == PCRE32_MODE) \
674 PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables); \
675 else if (pcre_mode == PCRE16_MODE) \
676 PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
677 else \
678 PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
680 #define PCRE_PRINTINT(re, outfile, debug_lengths) \
681 if (pcre_mode == PCRE32_MODE) \
682 PCRE_PRINTINT32(re, outfile, debug_lengths); \
683 else if (pcre_mode == PCRE16_MODE) \
684 PCRE_PRINTINT16(re, outfile, debug_lengths); \
685 else \
686 PCRE_PRINTINT8(re, outfile, debug_lengths)
688 #define PCRE_STUDY(extra, re, options, error) \
689 if (pcre_mode == PCRE32_MODE) \
690 PCRE_STUDY32(extra, re, options, error); \
691 else if (pcre_mode == PCRE16_MODE) \
692 PCRE_STUDY16(extra, re, options, error); \
693 else \
694 PCRE_STUDY8(extra, re, options, error)
697 /* ----- Two out of three modes are supported ----- */
699 #else
701 /* We can use some macro trickery to make a single set of definitions work in
702 the three different cases. */
704 /* ----- 32-bit and 16-bit but not 8-bit supported ----- */
706 #if defined(SUPPORT_PCRE32) && defined(SUPPORT_PCRE16)
707 #define BITONE 32
708 #define BITTWO 16
710 /* ----- 32-bit and 8-bit but not 16-bit supported ----- */
712 #elif defined(SUPPORT_PCRE32) && defined(SUPPORT_PCRE8)
713 #define BITONE 32
714 #define BITTWO 8
716 /* ----- 16-bit and 8-bit but not 32-bit supported ----- */
718 #else
719 #define BITONE 16
720 #define BITTWO 8
721 #endif
723 #define glue(a,b) a##b
724 #define G(a,b) glue(a,b)
727 /* ----- Common macros for two-mode cases ----- */
729 #define PCHARS(lv, p, offset, len, f) \
730 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
731 G(PCHARS,BITONE)(lv, p, offset, len, f); \
732 else \
733 G(PCHARS,BITTWO)(lv, p, offset, len, f)
735 #define PCHARSV(p, offset, len, f) \
736 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
737 G(PCHARSV,BITONE)(p, offset, len, f); \
738 else \
739 G(PCHARSV,BITTWO)(p, offset, len, f)
741 #define READ_CAPTURE_NAME(p, cn8, cn16, cn32, re) \
742 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
743 G(READ_CAPTURE_NAME,BITONE)(p, cn8, cn16, cn32, re); \
744 else \
745 G(READ_CAPTURE_NAME,BITTWO)(p, cn8, cn16, cn32, re)
747 #define SET_PCRE_CALLOUT(callout) \
748 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
749 G(SET_PCRE_CALLOUT,BITONE)(callout); \
750 else \
751 G(SET_PCRE_CALLOUT,BITTWO)(callout)
753 #define STRLEN(p) ((pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
754 G(STRLEN,BITONE)(p) : G(STRLEN,BITTWO)(p))
756 #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
757 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
758 G(PCRE_ASSIGN_JIT_STACK,BITONE)(extra, callback, userdata); \
759 else \
760 G(PCRE_ASSIGN_JIT_STACK,BITTWO)(extra, callback, userdata)
762 #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
763 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
764 G(PCRE_COMPILE,BITONE)(re, pat, options, error, erroffset, tables); \
765 else \
766 G(PCRE_COMPILE,BITTWO)(re, pat, options, error, erroffset, tables)
768 #define PCRE_CONFIG G(G(pcre,BITONE),_config)
770 #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
771 namesptr, cbuffer, size) \
772 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
773 G(PCRE_COPY_NAMED_SUBSTRING,BITONE)(rc, re, bptr, offsets, count, \
774 namesptr, cbuffer, size); \
775 else \
776 G(PCRE_COPY_NAMED_SUBSTRING,BITTWO)(rc, re, bptr, offsets, count, \
777 namesptr, cbuffer, size)
779 #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
780 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
781 G(PCRE_COPY_SUBSTRING,BITONE)(rc, bptr, offsets, count, i, cbuffer, size); \
782 else \
783 G(PCRE_COPY_SUBSTRING,BITTWO)(rc, bptr, offsets, count, i, cbuffer, size)
785 #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
786 offsets, size_offsets, workspace, size_workspace) \
787 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
788 G(PCRE_DFA_EXEC,BITONE)(count, re, extra, bptr, len, start_offset, options, \
789 offsets, size_offsets, workspace, size_workspace); \
790 else \
791 G(PCRE_DFA_EXEC,BITTWO)(count, re, extra, bptr, len, start_offset, options, \
792 offsets, size_offsets, workspace, size_workspace)
794 #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
795 offsets, size_offsets) \
796 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
797 G(PCRE_EXEC,BITONE)(count, re, extra, bptr, len, start_offset, options, \
798 offsets, size_offsets); \
799 else \
800 G(PCRE_EXEC,BITTWO)(count, re, extra, bptr, len, start_offset, options, \
801 offsets, size_offsets)
803 #define PCRE_FREE_STUDY(extra) \
804 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
805 G(PCRE_FREE_STUDY,BITONE)(extra); \
806 else \
807 G(PCRE_FREE_STUDY,BITTWO)(extra)
809 #define PCRE_FREE_SUBSTRING(substring) \
810 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
811 G(PCRE_FREE_SUBSTRING,BITONE)(substring); \
812 else \
813 G(PCRE_FREE_SUBSTRING,BITTWO)(substring)
815 #define PCRE_FREE_SUBSTRING_LIST(listptr) \
816 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
817 G(PCRE_FREE_SUBSTRING_LIST,BITONE)(listptr); \
818 else \
819 G(PCRE_FREE_SUBSTRING_LIST,BITTWO)(listptr)
821 #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
822 getnamesptr, subsptr) \
823 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
824 G(PCRE_GET_NAMED_SUBSTRING,BITONE)(rc, re, bptr, offsets, count, \
825 getnamesptr, subsptr); \
826 else \
827 G(PCRE_GET_NAMED_SUBSTRING,BITTWO)(rc, re, bptr, offsets, count, \
828 getnamesptr, subsptr)
830 #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
831 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
832 G(PCRE_GET_STRINGNUMBER,BITONE)(n, rc, ptr); \
833 else \
834 G(PCRE_GET_STRINGNUMBER,BITTWO)(n, rc, ptr)
836 #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
837 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
838 G(PCRE_GET_SUBSTRING,BITONE)(rc, bptr, use_offsets, count, i, subsptr); \
839 else \
840 G(PCRE_GET_SUBSTRING,BITTWO)(rc, bptr, use_offsets, count, i, subsptr)
842 #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
843 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
844 G(PCRE_GET_SUBSTRING_LIST,BITONE)(rc, bptr, offsets, count, listptr); \
845 else \
846 G(PCRE_GET_SUBSTRING_LIST,BITTWO)(rc, bptr, offsets, count, listptr)
848 #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
849 (pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
850 G(PCRE_JIT_STACK_ALLOC,BITONE)(startsize, maxsize) \
851 : G(PCRE_JIT_STACK_ALLOC,BITTWO)(startsize, maxsize)
853 #define PCRE_JIT_STACK_FREE(stack) \
854 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
855 G(PCRE_JIT_STACK_FREE,BITONE)(stack); \
856 else \
857 G(PCRE_JIT_STACK_FREE,BITTWO)(stack)
859 #define PCRE_MAKETABLES \
860 (pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
861 G(G(pcre,BITONE),_maketables)() : G(G(pcre,BITTWO),_maketables)()
863 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
864 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
865 G(PCRE_PATTERN_TO_HOST_BYTE_ORDER,BITONE)(rc, re, extra, tables); \
866 else \
867 G(PCRE_PATTERN_TO_HOST_BYTE_ORDER,BITTWO)(rc, re, extra, tables)
869 #define PCRE_PRINTINT(re, outfile, debug_lengths) \
870 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
871 G(PCRE_PRINTINT,BITONE)(re, outfile, debug_lengths); \
872 else \
873 G(PCRE_PRINTINT,BITTWO)(re, outfile, debug_lengths)
875 #define PCRE_STUDY(extra, re, options, error) \
876 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
877 G(PCRE_STUDY,BITONE)(extra, re, options, error); \
878 else \
879 G(PCRE_STUDY,BITTWO)(extra, re, options, error)
881 #endif /* Two out of three modes */
883 /* ----- End of cases where more than one mode is supported ----- */
886 /* ----- Only 8-bit mode is supported ----- */
888 #elif defined SUPPORT_PCRE8
889 #define CHAR_SIZE 1
890 #define PCHARS PCHARS8
891 #define PCHARSV PCHARSV8
892 #define READ_CAPTURE_NAME READ_CAPTURE_NAME8
893 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT8
894 #define STRLEN STRLEN8
895 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK8
896 #define PCRE_COMPILE PCRE_COMPILE8
897 #define PCRE_CONFIG pcre_config
898 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
899 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING8
900 #define PCRE_DFA_EXEC PCRE_DFA_EXEC8
901 #define PCRE_EXEC PCRE_EXEC8
902 #define PCRE_FREE_STUDY PCRE_FREE_STUDY8
903 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING8
904 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST8
905 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING8
906 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER8
907 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING8
908 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST8
909 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC8
910 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE8
911 #define PCRE_MAKETABLES pcre_maketables()
912 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
913 #define PCRE_PRINTINT PCRE_PRINTINT8
914 #define PCRE_STUDY PCRE_STUDY8
916 /* ----- Only 16-bit mode is supported ----- */
918 #elif defined SUPPORT_PCRE16
919 #define CHAR_SIZE 2
920 #define PCHARS PCHARS16
921 #define PCHARSV PCHARSV16
922 #define READ_CAPTURE_NAME READ_CAPTURE_NAME16
923 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT16
924 #define STRLEN STRLEN16
925 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK16
926 #define PCRE_COMPILE PCRE_COMPILE16
927 #define PCRE_CONFIG pcre16_config
928 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
929 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING16
930 #define PCRE_DFA_EXEC PCRE_DFA_EXEC16
931 #define PCRE_EXEC PCRE_EXEC16
932 #define PCRE_FREE_STUDY PCRE_FREE_STUDY16
933 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING16
934 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST16
935 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING16
936 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER16
937 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING16
938 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST16
939 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC16
940 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE16
941 #define PCRE_MAKETABLES pcre16_maketables()
942 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
943 #define PCRE_PRINTINT PCRE_PRINTINT16
944 #define PCRE_STUDY PCRE_STUDY16
946 /* ----- Only 32-bit mode is supported ----- */
948 #elif defined SUPPORT_PCRE32
949 #define CHAR_SIZE 4
950 #define PCHARS PCHARS32
951 #define PCHARSV PCHARSV32
952 #define READ_CAPTURE_NAME READ_CAPTURE_NAME32
953 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT32
954 #define STRLEN STRLEN32
955 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK32
956 #define PCRE_COMPILE PCRE_COMPILE32
957 #define PCRE_CONFIG pcre32_config
958 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING32
959 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING32
960 #define PCRE_DFA_EXEC PCRE_DFA_EXEC32
961 #define PCRE_EXEC PCRE_EXEC32
962 #define PCRE_FREE_STUDY PCRE_FREE_STUDY32
963 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING32
964 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST32
965 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING32
966 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER32
967 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING32
968 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST32
969 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC32
970 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE32
971 #define PCRE_MAKETABLES pcre32_maketables()
972 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER32
973 #define PCRE_PRINTINT PCRE_PRINTINT32
974 #define PCRE_STUDY PCRE_STUDY32
976 #endif
978 /* ----- End of mode-specific function call macros ----- */
981 /* Other parameters */
983 #ifndef CLOCKS_PER_SEC
984 #ifdef CLK_TCK
985 #define CLOCKS_PER_SEC CLK_TCK
986 #else
987 #define CLOCKS_PER_SEC 100
988 #endif
989 #endif
991 #if !defined NODFA
992 #define DFA_WS_DIMENSION 1000
993 #endif
995 /* This is the default loop count for timing. */
997 #define LOOPREPEAT 500000
999 /* Static variables */
1001 static FILE *outfile;
1002 static int log_store = 0;
1003 static int callout_count;
1004 static int callout_extra;
1005 static int callout_fail_count;
1006 static int callout_fail_id;
1007 static int debug_lengths;
1008 static int first_callout;
1009 static int jit_was_used;
1010 static int locale_set = 0;
1011 static int show_malloc;
1012 static int use_utf;
1013 static size_t gotten_store;
1014 static size_t first_gotten_store = 0;
1015 static const unsigned char *last_callout_mark = NULL;
1017 /* The buffers grow automatically if very long input lines are encountered. */
1019 static int buffer_size = 50000;
1020 static pcre_uint8 *buffer = NULL;
1021 static pcre_uint8 *pbuffer = NULL;
1023 /* Just as a safety check, make sure that COMPILE_PCRE[16|32] are *not* set. */
1025 #ifdef COMPILE_PCRE16
1026 #error COMPILE_PCRE16 must not be set when compiling pcretest.c
1027 #endif
1029 #ifdef COMPILE_PCRE32
1030 #error COMPILE_PCRE32 must not be set when compiling pcretest.c
1031 #endif
1033 /* We need buffers for building 16/32-bit strings, and the tables of operator
1034 lengths that are used for 16/32-bit compiling, in order to swap bytes in a
1035 pattern for saving/reloading testing. Luckily, the data for these tables is
1036 defined as a macro. However, we must ensure that LINK_SIZE and IMM2_SIZE (which
1037 are used in the tables) are adjusted appropriately for the 16/32-bit world.
1038 LINK_SIZE is also used later in this program. */
1040 #ifdef SUPPORT_PCRE16
1041 #undef IMM2_SIZE
1042 #define IMM2_SIZE 1
1044 #if LINK_SIZE == 2
1045 #undef LINK_SIZE
1046 #define LINK_SIZE 1
1047 #elif LINK_SIZE == 3 || LINK_SIZE == 4
1048 #undef LINK_SIZE
1049 #define LINK_SIZE 2
1050 #else
1051 #error LINK_SIZE must be either 2, 3, or 4
1052 #endif
1054 static int buffer16_size = 0;
1055 static pcre_uint16 *buffer16 = NULL;
1056 static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
1057 #endif /* SUPPORT_PCRE16 */
1059 #ifdef SUPPORT_PCRE32
1060 #undef IMM2_SIZE
1061 #define IMM2_SIZE 1
1062 #undef LINK_SIZE
1063 #define LINK_SIZE 1
1065 static int buffer32_size = 0;
1066 static pcre_uint32 *buffer32 = NULL;
1067 static const pcre_uint32 OP_lengths32[] = { OP_LENGTHS };
1068 #endif /* SUPPORT_PCRE32 */
1070 /* If we have 8-bit support, default to it; if there is also 16-or 32-bit
1071 support, it can be changed by an option. If there is no 8-bit support, there
1072 must be 16-or 32-bit support, so default it to 1. */
1074 #if defined SUPPORT_PCRE8
1075 static int pcre_mode = PCRE8_MODE;
1076 #elif defined SUPPORT_PCRE16
1077 static int pcre_mode = PCRE16_MODE;
1078 #elif defined SUPPORT_PCRE32
1079 static int pcre_mode = PCRE32_MODE;
1080 #endif
1082 /* JIT study options for -s+n and /S+n where '1' <= n <= '7'. */
1084 static int jit_study_bits[] =
1086 PCRE_STUDY_JIT_COMPILE,
1087 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
1088 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
1089 PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
1090 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
1091 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
1092 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE +
1093 PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
1096 #define PCRE_STUDY_ALLJIT (PCRE_STUDY_JIT_COMPILE | \
1097 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE | PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE)
1099 /* Textual explanations for runtime error codes */
1101 static const char *errtexts[] = {
1102 NULL, /* 0 is no error */
1103 NULL, /* NOMATCH is handled specially */
1104 "NULL argument passed",
1105 "bad option value",
1106 "magic number missing",
1107 "unknown opcode - pattern overwritten?",
1108 "no more memory",
1109 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
1110 "match limit exceeded",
1111 "callout error code",
1112 NULL, /* BADUTF8/16 is handled specially */
1113 NULL, /* BADUTF8/16 offset is handled specially */
1114 NULL, /* PARTIAL is handled specially */
1115 "not used - internal error",
1116 "internal error - pattern overwritten?",
1117 "bad count value",
1118 "item unsupported for DFA matching",
1119 "backreference condition or recursion test not supported for DFA matching",
1120 "match limit not supported for DFA matching",
1121 "workspace size exceeded in DFA matching",
1122 "too much recursion for DFA matching",
1123 "recursion limit exceeded",
1124 "not used - internal error",
1125 "invalid combination of newline options",
1126 "bad offset value",
1127 NULL, /* SHORTUTF8/16 is handled specially */
1128 "nested recursion at the same subject position",
1129 "JIT stack limit reached",
1130 "pattern compiled in wrong mode: 8-bit/16-bit error",
1131 "pattern compiled with other endianness",
1132 "invalid data in workspace for DFA restart",
1133 "bad JIT option",
1134 "bad length"
1138 /*************************************************
1139 * Alternate character tables *
1140 *************************************************/
1142 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
1143 using the default tables of the library. However, the T option can be used to
1144 select alternate sets of tables, for different kinds of testing. Note also that
1145 the L (locale) option also adjusts the tables. */
1147 /* This is the set of tables distributed as default with PCRE. It recognizes
1148 only ASCII characters. */
1150 static const pcre_uint8 tables0[] = {
1152 /* This table is a lower casing table. */
1154 0, 1, 2, 3, 4, 5, 6, 7,
1155 8, 9, 10, 11, 12, 13, 14, 15,
1156 16, 17, 18, 19, 20, 21, 22, 23,
1157 24, 25, 26, 27, 28, 29, 30, 31,
1158 32, 33, 34, 35, 36, 37, 38, 39,
1159 40, 41, 42, 43, 44, 45, 46, 47,
1160 48, 49, 50, 51, 52, 53, 54, 55,
1161 56, 57, 58, 59, 60, 61, 62, 63,
1162 64, 97, 98, 99,100,101,102,103,
1163 104,105,106,107,108,109,110,111,
1164 112,113,114,115,116,117,118,119,
1165 120,121,122, 91, 92, 93, 94, 95,
1166 96, 97, 98, 99,100,101,102,103,
1167 104,105,106,107,108,109,110,111,
1168 112,113,114,115,116,117,118,119,
1169 120,121,122,123,124,125,126,127,
1170 128,129,130,131,132,133,134,135,
1171 136,137,138,139,140,141,142,143,
1172 144,145,146,147,148,149,150,151,
1173 152,153,154,155,156,157,158,159,
1174 160,161,162,163,164,165,166,167,
1175 168,169,170,171,172,173,174,175,
1176 176,177,178,179,180,181,182,183,
1177 184,185,186,187,188,189,190,191,
1178 192,193,194,195,196,197,198,199,
1179 200,201,202,203,204,205,206,207,
1180 208,209,210,211,212,213,214,215,
1181 216,217,218,219,220,221,222,223,
1182 224,225,226,227,228,229,230,231,
1183 232,233,234,235,236,237,238,239,
1184 240,241,242,243,244,245,246,247,
1185 248,249,250,251,252,253,254,255,
1187 /* This table is a case flipping table. */
1189 0, 1, 2, 3, 4, 5, 6, 7,
1190 8, 9, 10, 11, 12, 13, 14, 15,
1191 16, 17, 18, 19, 20, 21, 22, 23,
1192 24, 25, 26, 27, 28, 29, 30, 31,
1193 32, 33, 34, 35, 36, 37, 38, 39,
1194 40, 41, 42, 43, 44, 45, 46, 47,
1195 48, 49, 50, 51, 52, 53, 54, 55,
1196 56, 57, 58, 59, 60, 61, 62, 63,
1197 64, 97, 98, 99,100,101,102,103,
1198 104,105,106,107,108,109,110,111,
1199 112,113,114,115,116,117,118,119,
1200 120,121,122, 91, 92, 93, 94, 95,
1201 96, 65, 66, 67, 68, 69, 70, 71,
1202 72, 73, 74, 75, 76, 77, 78, 79,
1203 80, 81, 82, 83, 84, 85, 86, 87,
1204 88, 89, 90,123,124,125,126,127,
1205 128,129,130,131,132,133,134,135,
1206 136,137,138,139,140,141,142,143,
1207 144,145,146,147,148,149,150,151,
1208 152,153,154,155,156,157,158,159,
1209 160,161,162,163,164,165,166,167,
1210 168,169,170,171,172,173,174,175,
1211 176,177,178,179,180,181,182,183,
1212 184,185,186,187,188,189,190,191,
1213 192,193,194,195,196,197,198,199,
1214 200,201,202,203,204,205,206,207,
1215 208,209,210,211,212,213,214,215,
1216 216,217,218,219,220,221,222,223,
1217 224,225,226,227,228,229,230,231,
1218 232,233,234,235,236,237,238,239,
1219 240,241,242,243,244,245,246,247,
1220 248,249,250,251,252,253,254,255,
1222 /* This table contains bit maps for various character classes. Each map is 32
1223 bytes long and the bits run from the least significant end of each byte. The
1224 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
1225 graph, print, punct, and cntrl. Other classes are built from combinations. */
1227 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
1228 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1229 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1230 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1232 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1233 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
1234 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1235 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1237 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1238 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1239 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1240 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1242 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1243 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
1244 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1245 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1247 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1248 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
1249 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1250 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1252 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1253 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
1254 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1255 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1257 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
1258 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
1259 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1260 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1262 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
1263 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
1264 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1265 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1267 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
1268 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
1269 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1270 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1272 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
1273 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
1274 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1275 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1277 /* This table identifies various classes of character by individual bits:
1278 0x01 white space character
1279 0x02 letter
1280 0x04 decimal digit
1281 0x08 hexadecimal digit
1282 0x10 alphanumeric or '_'
1283 0x80 regular expression metacharacter or binary zero
1286 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
1287 0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
1288 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
1289 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
1290 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
1291 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
1292 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
1293 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
1294 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
1295 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
1296 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
1297 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
1298 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
1299 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
1300 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
1301 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
1302 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
1303 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
1304 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
1305 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
1306 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
1307 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
1308 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
1309 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
1310 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
1311 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
1312 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
1313 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
1314 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
1315 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
1316 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
1317 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
1319 /* This is a set of tables that came orginally from a Windows user. It seems to
1320 be at least an approximation of ISO 8859. In particular, there are characters
1321 greater than 128 that are marked as spaces, letters, etc. */
1323 static const pcre_uint8 tables1[] = {
1324 0,1,2,3,4,5,6,7,
1325 8,9,10,11,12,13,14,15,
1326 16,17,18,19,20,21,22,23,
1327 24,25,26,27,28,29,30,31,
1328 32,33,34,35,36,37,38,39,
1329 40,41,42,43,44,45,46,47,
1330 48,49,50,51,52,53,54,55,
1331 56,57,58,59,60,61,62,63,
1332 64,97,98,99,100,101,102,103,
1333 104,105,106,107,108,109,110,111,
1334 112,113,114,115,116,117,118,119,
1335 120,121,122,91,92,93,94,95,
1336 96,97,98,99,100,101,102,103,
1337 104,105,106,107,108,109,110,111,
1338 112,113,114,115,116,117,118,119,
1339 120,121,122,123,124,125,126,127,
1340 128,129,130,131,132,133,134,135,
1341 136,137,138,139,140,141,142,143,
1342 144,145,146,147,148,149,150,151,
1343 152,153,154,155,156,157,158,159,
1344 160,161,162,163,164,165,166,167,
1345 168,169,170,171,172,173,174,175,
1346 176,177,178,179,180,181,182,183,
1347 184,185,186,187,188,189,190,191,
1348 224,225,226,227,228,229,230,231,
1349 232,233,234,235,236,237,238,239,
1350 240,241,242,243,244,245,246,215,
1351 248,249,250,251,252,253,254,223,
1352 224,225,226,227,228,229,230,231,
1353 232,233,234,235,236,237,238,239,
1354 240,241,242,243,244,245,246,247,
1355 248,249,250,251,252,253,254,255,
1356 0,1,2,3,4,5,6,7,
1357 8,9,10,11,12,13,14,15,
1358 16,17,18,19,20,21,22,23,
1359 24,25,26,27,28,29,30,31,
1360 32,33,34,35,36,37,38,39,
1361 40,41,42,43,44,45,46,47,
1362 48,49,50,51,52,53,54,55,
1363 56,57,58,59,60,61,62,63,
1364 64,97,98,99,100,101,102,103,
1365 104,105,106,107,108,109,110,111,
1366 112,113,114,115,116,117,118,119,
1367 120,121,122,91,92,93,94,95,
1368 96,65,66,67,68,69,70,71,
1369 72,73,74,75,76,77,78,79,
1370 80,81,82,83,84,85,86,87,
1371 88,89,90,123,124,125,126,127,
1372 128,129,130,131,132,133,134,135,
1373 136,137,138,139,140,141,142,143,
1374 144,145,146,147,148,149,150,151,
1375 152,153,154,155,156,157,158,159,
1376 160,161,162,163,164,165,166,167,
1377 168,169,170,171,172,173,174,175,
1378 176,177,178,179,180,181,182,183,
1379 184,185,186,187,188,189,190,191,
1380 224,225,226,227,228,229,230,231,
1381 232,233,234,235,236,237,238,239,
1382 240,241,242,243,244,245,246,215,
1383 248,249,250,251,252,253,254,223,
1384 192,193,194,195,196,197,198,199,
1385 200,201,202,203,204,205,206,207,
1386 208,209,210,211,212,213,214,247,
1387 216,217,218,219,220,221,222,255,
1388 0,62,0,0,1,0,0,0,
1389 0,0,0,0,0,0,0,0,
1390 32,0,0,0,1,0,0,0,
1391 0,0,0,0,0,0,0,0,
1392 0,0,0,0,0,0,255,3,
1393 126,0,0,0,126,0,0,0,
1394 0,0,0,0,0,0,0,0,
1395 0,0,0,0,0,0,0,0,
1396 0,0,0,0,0,0,255,3,
1397 0,0,0,0,0,0,0,0,
1398 0,0,0,0,0,0,12,2,
1399 0,0,0,0,0,0,0,0,
1400 0,0,0,0,0,0,0,0,
1401 254,255,255,7,0,0,0,0,
1402 0,0,0,0,0,0,0,0,
1403 255,255,127,127,0,0,0,0,
1404 0,0,0,0,0,0,0,0,
1405 0,0,0,0,254,255,255,7,
1406 0,0,0,0,0,4,32,4,
1407 0,0,0,128,255,255,127,255,
1408 0,0,0,0,0,0,255,3,
1409 254,255,255,135,254,255,255,7,
1410 0,0,0,0,0,4,44,6,
1411 255,255,127,255,255,255,127,255,
1412 0,0,0,0,254,255,255,255,
1413 255,255,255,255,255,255,255,127,
1414 0,0,0,0,254,255,255,255,
1415 255,255,255,255,255,255,255,255,
1416 0,2,0,0,255,255,255,255,
1417 255,255,255,255,255,255,255,127,
1418 0,0,0,0,255,255,255,255,
1419 255,255,255,255,255,255,255,255,
1420 0,0,0,0,254,255,0,252,
1421 1,0,0,248,1,0,0,120,
1422 0,0,0,0,254,255,255,255,
1423 0,0,128,0,0,0,128,0,
1424 255,255,255,255,0,0,0,0,
1425 0,0,0,0,0,0,0,128,
1426 255,255,255,255,0,0,0,0,
1427 0,0,0,0,0,0,0,0,
1428 128,0,0,0,0,0,0,0,
1429 0,1,1,0,1,1,0,0,
1430 0,0,0,0,0,0,0,0,
1431 0,0,0,0,0,0,0,0,
1432 1,0,0,0,128,0,0,0,
1433 128,128,128,128,0,0,128,0,
1434 28,28,28,28,28,28,28,28,
1435 28,28,0,0,0,0,0,128,
1436 0,26,26,26,26,26,26,18,
1437 18,18,18,18,18,18,18,18,
1438 18,18,18,18,18,18,18,18,
1439 18,18,18,128,128,0,128,16,
1440 0,26,26,26,26,26,26,18,
1441 18,18,18,18,18,18,18,18,
1442 18,18,18,18,18,18,18,18,
1443 18,18,18,128,128,0,0,0,
1444 0,0,0,0,0,1,0,0,
1445 0,0,0,0,0,0,0,0,
1446 0,0,0,0,0,0,0,0,
1447 0,0,0,0,0,0,0,0,
1448 1,0,0,0,0,0,0,0,
1449 0,0,18,0,0,0,0,0,
1450 0,0,20,20,0,18,0,0,
1451 0,20,18,0,0,0,0,0,
1452 18,18,18,18,18,18,18,18,
1453 18,18,18,18,18,18,18,18,
1454 18,18,18,18,18,18,18,0,
1455 18,18,18,18,18,18,18,18,
1456 18,18,18,18,18,18,18,18,
1457 18,18,18,18,18,18,18,18,
1458 18,18,18,18,18,18,18,0,
1459 18,18,18,18,18,18,18,18
1465 #ifndef HAVE_STRERROR
1466 /*************************************************
1467 * Provide strerror() for non-ANSI libraries *
1468 *************************************************/
1470 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1471 in their libraries, but can provide the same facility by this simple
1472 alternative function. */
1474 extern int sys_nerr;
1475 extern char *sys_errlist[];
1477 char *
1478 strerror(int n)
1480 if (n < 0 || n >= sys_nerr) return "unknown error number";
1481 return sys_errlist[n];
1483 #endif /* HAVE_STRERROR */
1487 /*************************************************
1488 * Print newline configuration *
1489 *************************************************/
1492 Arguments:
1493 rc the return code from PCRE_CONFIG_NEWLINE
1494 isc TRUE if called from "-C newline"
1495 Returns: nothing
1498 static void
1499 print_newline_config(int rc, BOOL isc)
1501 const char *s = NULL;
1502 if (!isc) printf(" Newline sequence is ");
1503 switch(rc)
1505 case CHAR_CR: s = "CR"; break;
1506 case CHAR_LF: s = "LF"; break;
1507 case (CHAR_CR<<8 | CHAR_LF): s = "CRLF"; break;
1508 case -1: s = "ANY"; break;
1509 case -2: s = "ANYCRLF"; break;
1511 default:
1512 printf("a non-standard value: 0x%04x\n", rc);
1513 return;
1516 printf("%s\n", s);
1521 /*************************************************
1522 * JIT memory callback *
1523 *************************************************/
1525 static pcre_jit_stack* jit_callback(void *arg)
1527 jit_was_used = TRUE;
1528 return (pcre_jit_stack *)arg;
1532 #if !defined NOUTF || defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32
1533 /*************************************************
1534 * Convert UTF-8 string to value *
1535 *************************************************/
1537 /* This function takes one or more bytes that represents a UTF-8 character,
1538 and returns the value of the character.
1540 Argument:
1541 utf8bytes a pointer to the byte vector
1542 vptr a pointer to an int to receive the value
1544 Returns: > 0 => the number of bytes consumed
1545 -6 to 0 => malformed UTF-8 character at offset = (-return)
1548 static int
1549 utf82ord(pcre_uint8 *utf8bytes, pcre_uint32 *vptr)
1551 pcre_uint32 c = *utf8bytes++;
1552 pcre_uint32 d = c;
1553 int i, j, s;
1555 for (i = -1; i < 6; i++) /* i is number of additional bytes */
1557 if ((d & 0x80) == 0) break;
1558 d <<= 1;
1561 if (i == -1) { *vptr = c; return 1; } /* ascii character */
1562 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
1564 /* i now has a value in the range 1-5 */
1566 s = 6*i;
1567 d = (c & utf8_table3[i]) << s;
1569 for (j = 0; j < i; j++)
1571 c = *utf8bytes++;
1572 if ((c & 0xc0) != 0x80) return -(j+1);
1573 s -= 6;
1574 d |= (c & 0x3f) << s;
1577 /* Check that encoding was the correct unique one */
1579 for (j = 0; j < utf8_table1_size; j++)
1580 if (d <= (pcre_uint32)utf8_table1[j]) break;
1581 if (j != i) return -(i+1);
1583 /* Valid value */
1585 *vptr = d;
1586 return i+1;
1588 #endif /* NOUTF || SUPPORT_PCRE16 */
1592 #if defined SUPPORT_PCRE8 && !defined NOUTF
1593 /*************************************************
1594 * Convert character value to UTF-8 *
1595 *************************************************/
1597 /* This function takes an integer value in the range 0 - 0x7fffffff
1598 and encodes it as a UTF-8 character in 0 to 6 bytes.
1600 Arguments:
1601 cvalue the character value
1602 utf8bytes pointer to buffer for result - at least 6 bytes long
1604 Returns: number of characters placed in the buffer
1607 static int
1608 ord2utf8(pcre_uint32 cvalue, pcre_uint8 *utf8bytes)
1610 register int i, j;
1611 if (cvalue > 0x7fffffffu)
1612 return -1;
1613 for (i = 0; i < utf8_table1_size; i++)
1614 if (cvalue <= (pcre_uint32)utf8_table1[i]) break;
1615 utf8bytes += i;
1616 for (j = i; j > 0; j--)
1618 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1619 cvalue >>= 6;
1621 *utf8bytes = utf8_table2[i] | cvalue;
1622 return i + 1;
1624 #endif
1627 #ifdef SUPPORT_PCRE16
1628 /*************************************************
1629 * Convert a string to 16-bit *
1630 *************************************************/
1632 /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1633 8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1634 double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1635 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1636 result is always left in buffer16.
1638 Note that this function does not object to surrogate values. This is
1639 deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1640 for the purpose of testing that they are correctly faulted.
1642 Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1643 in UTF-8 so that values greater than 255 can be handled.
1645 Arguments:
1646 data TRUE if converting a data line; FALSE for a regex
1647 p points to a byte string
1648 utf true if UTF-8 (to be converted to UTF-16)
1649 len number of bytes in the string (excluding trailing zero)
1651 Returns: number of 16-bit data items used (excluding trailing zero)
1652 OR -1 if a UTF-8 string is malformed
1653 OR -2 if a value > 0x10ffff is encountered
1654 OR -3 if a value > 0xffff is encountered when not in UTF mode
1657 static int
1658 to16(int data, pcre_uint8 *p, int utf, int len)
1660 pcre_uint16 *pp;
1662 if (buffer16_size < 2*len + 2)
1664 if (buffer16 != NULL) free(buffer16);
1665 buffer16_size = 2*len + 2;
1666 buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1667 if (buffer16 == NULL)
1669 fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1670 exit(1);
1674 pp = buffer16;
1676 if (!utf && !data)
1678 while (len-- > 0) *pp++ = *p++;
1681 else
1683 pcre_uint32 c = 0;
1684 while (len > 0)
1686 int chlen = utf82ord(p, &c);
1687 if (chlen <= 0) return -1;
1688 if (c > 0x10ffff) return -2;
1689 p += chlen;
1690 len -= chlen;
1691 if (c < 0x10000) *pp++ = c; else
1693 if (!utf) return -3;
1694 c -= 0x10000;
1695 *pp++ = 0xD800 | (c >> 10);
1696 *pp++ = 0xDC00 | (c & 0x3ff);
1701 *pp = 0;
1702 return pp - buffer16;
1704 #endif
1706 #ifdef SUPPORT_PCRE32
1707 /*************************************************
1708 * Convert a string to 32-bit *
1709 *************************************************/
1711 /* In non-UTF mode, the space needed for a 32-bit string is exactly four times the
1712 8-bit size. For a UTF-8 string, the size needed for UTF-32 is no more than four
1713 times, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1714 in UTF-32. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-32. The
1715 result is always left in buffer32.
1717 Note that this function does not object to surrogate values. This is
1718 deliberate; it makes it possible to construct UTF-32 strings that are invalid,
1719 for the purpose of testing that they are correctly faulted.
1721 Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1722 in UTF-8 so that values greater than 255 can be handled.
1724 Arguments:
1725 data TRUE if converting a data line; FALSE for a regex
1726 p points to a byte string
1727 utf true if UTF-8 (to be converted to UTF-32)
1728 len number of bytes in the string (excluding trailing zero)
1730 Returns: number of 32-bit data items used (excluding trailing zero)
1731 OR -1 if a UTF-8 string is malformed
1732 OR -2 if a value > 0x10ffff is encountered
1733 OR -3 if an ill-formed value is encountered (i.e. a surrogate)
1736 static int
1737 to32(int data, pcre_uint8 *p, int utf, int len)
1739 pcre_uint32 *pp;
1741 if (buffer32_size < 4*len + 4)
1743 if (buffer32 != NULL) free(buffer32);
1744 buffer32_size = 4*len + 4;
1745 buffer32 = (pcre_uint32 *)malloc(buffer32_size);
1746 if (buffer32 == NULL)
1748 fprintf(stderr, "pcretest: malloc(%d) failed for buffer32\n", buffer32_size);
1749 exit(1);
1753 pp = buffer32;
1755 if (!utf && !data)
1757 while (len-- > 0) *pp++ = *p++;
1760 else
1762 pcre_uint32 c = 0;
1763 while (len > 0)
1765 int chlen = utf82ord(p, &c);
1766 if (chlen <= 0) return -1;
1767 if (utf)
1769 if (c > 0x10ffff) return -2;
1770 if (!data && (c & 0xfffff800u) == 0xd800u) return -3;
1773 p += chlen;
1774 len -= chlen;
1775 *pp++ = c;
1779 *pp = 0;
1780 return pp - buffer32;
1783 /* Check that a 32-bit character string is valid UTF-32.
1785 Arguments:
1786 string points to the string
1787 length length of string, or -1 if the string is zero-terminated
1789 Returns: TRUE if the string is a valid UTF-32 string
1790 FALSE otherwise
1793 #ifdef NEVER
1795 #ifdef SUPPORT_UTF
1796 static BOOL
1797 valid_utf32(pcre_uint32 *string, int length)
1799 register pcre_uint32 *p;
1800 register pcre_uint32 c;
1802 for (p = string; length-- > 0; p++)
1804 c = *p;
1806 if (c > 0x10ffffu)
1807 return FALSE;
1809 /* A surrogate */
1810 if ((c & 0xfffff800u) == 0xd800u)
1811 return FALSE;
1813 /* Non-character */
1814 if ((c & 0xfffeu) == 0xfffeu || (c >= 0xfdd0u && c <= 0xfdefu))
1815 return FALSE;
1818 return TRUE;
1820 #endif /* SUPPORT_UTF */
1822 #endif /* NEVER */
1825 #endif
1827 /*************************************************
1828 * Read or extend an input line *
1829 *************************************************/
1831 /* Input lines are read into buffer, but both patterns and data lines can be
1832 continued over multiple input lines. In addition, if the buffer fills up, we
1833 want to automatically expand it so as to be able to handle extremely large
1834 lines that are needed for certain stress tests. When the input buffer is
1835 expanded, the other two buffers must also be expanded likewise, and the
1836 contents of pbuffer, which are a copy of the input for callouts, must be
1837 preserved (for when expansion happens for a data line). This is not the most
1838 optimal way of handling this, but hey, this is just a test program!
1840 Arguments:
1841 f the file to read
1842 start where in buffer to start (this *must* be within buffer)
1843 prompt for stdin or readline()
1845 Returns: pointer to the start of new data
1846 could be a copy of start, or could be moved
1847 NULL if no data read and EOF reached
1850 static pcre_uint8 *
1851 extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1853 pcre_uint8 *here = start;
1855 for (;;)
1857 size_t rlen = (size_t)(buffer_size - (here - buffer));
1859 if (rlen > 1000)
1861 int dlen;
1863 /* If libreadline or libedit support is required, use readline() to read a
1864 line if the input is a terminal. Note that readline() removes the trailing
1865 newline, so we must put it back again, to be compatible with fgets(). */
1867 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
1868 if (isatty(fileno(f)))
1870 size_t len;
1871 char *s = readline(prompt);
1872 if (s == NULL) return (here == start)? NULL : start;
1873 len = strlen(s);
1874 if (len > 0) add_history(s);
1875 if (len > rlen - 1) len = rlen - 1;
1876 memcpy(here, s, len);
1877 here[len] = '\n';
1878 here[len+1] = 0;
1879 free(s);
1881 else
1882 #endif
1884 /* Read the next line by normal means, prompting if the file is stdin. */
1887 if (f == stdin) printf("%s", prompt);
1888 if (fgets((char *)here, rlen, f) == NULL)
1889 return (here == start)? NULL : start;
1892 dlen = (int)strlen((char *)here);
1893 if (dlen > 0 && here[dlen - 1] == '\n') return start;
1894 here += dlen;
1897 else
1899 int new_buffer_size = 2*buffer_size;
1900 pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1901 pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1903 if (new_buffer == NULL || new_pbuffer == NULL)
1905 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1906 exit(1);
1909 memcpy(new_buffer, buffer, buffer_size);
1910 memcpy(new_pbuffer, pbuffer, buffer_size);
1912 buffer_size = new_buffer_size;
1914 start = new_buffer + (start - buffer);
1915 here = new_buffer + (here - buffer);
1917 free(buffer);
1918 free(pbuffer);
1920 buffer = new_buffer;
1921 pbuffer = new_pbuffer;
1925 return NULL; /* Control never gets here */
1930 /*************************************************
1931 * Read number from string *
1932 *************************************************/
1934 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1935 around with conditional compilation, just do the job by hand. It is only used
1936 for unpicking arguments, so just keep it simple.
1938 Arguments:
1939 str string to be converted
1940 endptr where to put the end pointer
1942 Returns: the unsigned long
1945 static int
1946 get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1948 int result = 0;
1949 while(*str != 0 && isspace(*str)) str++;
1950 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1951 *endptr = str;
1952 return(result);
1957 /*************************************************
1958 * Print one character *
1959 *************************************************/
1961 /* Print a single character either literally, or as a hex escape. */
1963 static int pchar(pcre_uint32 c, FILE *f)
1965 int n = 0;
1966 if (PRINTOK(c))
1968 if (f != NULL) fprintf(f, "%c", c);
1969 return 1;
1972 if (c < 0x100)
1974 if (use_utf)
1976 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1977 return 6;
1979 else
1981 if (f != NULL) fprintf(f, "\\x%02x", c);
1982 return 4;
1986 if (f != NULL) n = fprintf(f, "\\x{%02x}", c);
1987 return n >= 0 ? n : 0;
1992 #ifdef SUPPORT_PCRE8
1993 /*************************************************
1994 * Print 8-bit character string *
1995 *************************************************/
1997 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1998 If handed a NULL file, just counts chars without printing. */
2000 static int pchars(pcre_uint8 *p, int length, FILE *f)
2002 pcre_uint32 c = 0;
2003 int yield = 0;
2005 if (length < 0)
2006 length = strlen((char *)p);
2008 while (length-- > 0)
2010 #if !defined NOUTF
2011 if (use_utf)
2013 int rc = utf82ord(p, &c);
2014 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
2016 length -= rc - 1;
2017 p += rc;
2018 yield += pchar(c, f);
2019 continue;
2022 #endif
2023 c = *p++;
2024 yield += pchar(c, f);
2027 return yield;
2029 #endif
2033 #ifdef SUPPORT_PCRE16
2034 /*************************************************
2035 * Find length of 0-terminated 16-bit string *
2036 *************************************************/
2038 static int strlen16(PCRE_SPTR16 p)
2040 int len = 0;
2041 while (*p++ != 0) len++;
2042 return len;
2044 #endif /* SUPPORT_PCRE16 */
2048 #ifdef SUPPORT_PCRE32
2049 /*************************************************
2050 * Find length of 0-terminated 32-bit string *
2051 *************************************************/
2053 static int strlen32(PCRE_SPTR32 p)
2055 int len = 0;
2056 while (*p++ != 0) len++;
2057 return len;
2059 #endif /* SUPPORT_PCRE32 */
2063 #ifdef SUPPORT_PCRE16
2064 /*************************************************
2065 * Print 16-bit character string *
2066 *************************************************/
2068 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
2069 If handed a NULL file, just counts chars without printing. */
2071 static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
2073 int yield = 0;
2075 if (length < 0)
2076 length = strlen16(p);
2078 while (length-- > 0)
2080 pcre_uint32 c = *p++ & 0xffff;
2081 #if !defined NOUTF
2082 if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
2084 int d = *p & 0xffff;
2085 if (d >= 0xDC00 && d < 0xDFFF)
2087 c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
2088 length--;
2089 p++;
2092 #endif
2093 yield += pchar(c, f);
2096 return yield;
2098 #endif /* SUPPORT_PCRE16 */
2102 #ifdef SUPPORT_PCRE32
2103 /*************************************************
2104 * Print 32-bit character string *
2105 *************************************************/
2107 /* Must handle UTF-32 strings in utf mode. Yields number of characters printed.
2108 If handed a NULL file, just counts chars without printing. */
2110 static int pchars32(PCRE_SPTR32 p, int length, BOOL utf, FILE *f)
2112 int yield = 0;
2114 (void)(utf); /* Avoid compiler warning */
2116 if (length < 0)
2117 length = strlen32(p);
2119 while (length-- > 0)
2121 pcre_uint32 c = *p++;
2122 yield += pchar(c, f);
2125 return yield;
2127 #endif /* SUPPORT_PCRE32 */
2131 #ifdef SUPPORT_PCRE8
2132 /*************************************************
2133 * Read a capture name (8-bit) and check it *
2134 *************************************************/
2136 static pcre_uint8 *
2137 read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
2139 pcre_uint8 *npp = *pp;
2140 while (isalnum(*p)) *npp++ = *p++;
2141 *npp++ = 0;
2142 *npp = 0;
2143 if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
2145 fprintf(outfile, "no parentheses with name \"");
2146 PCHARSV(*pp, 0, -1, outfile);
2147 fprintf(outfile, "\"\n");
2150 *pp = npp;
2151 return p;
2153 #endif /* SUPPORT_PCRE8 */
2157 #ifdef SUPPORT_PCRE16
2158 /*************************************************
2159 * Read a capture name (16-bit) and check it *
2160 *************************************************/
2162 /* Note that the text being read is 8-bit. */
2164 static pcre_uint8 *
2165 read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
2167 pcre_uint16 *npp = *pp;
2168 while (isalnum(*p)) *npp++ = *p++;
2169 *npp++ = 0;
2170 *npp = 0;
2171 if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
2173 fprintf(outfile, "no parentheses with name \"");
2174 PCHARSV(*pp, 0, -1, outfile);
2175 fprintf(outfile, "\"\n");
2177 *pp = npp;
2178 return p;
2180 #endif /* SUPPORT_PCRE16 */
2184 #ifdef SUPPORT_PCRE32
2185 /*************************************************
2186 * Read a capture name (32-bit) and check it *
2187 *************************************************/
2189 /* Note that the text being read is 8-bit. */
2191 static pcre_uint8 *
2192 read_capture_name32(pcre_uint8 *p, pcre_uint32 **pp, pcre *re)
2194 pcre_uint32 *npp = *pp;
2195 while (isalnum(*p)) *npp++ = *p++;
2196 *npp++ = 0;
2197 *npp = 0;
2198 if (pcre32_get_stringnumber((pcre32 *)re, (PCRE_SPTR32)(*pp)) < 0)
2200 fprintf(outfile, "no parentheses with name \"");
2201 PCHARSV(*pp, 0, -1, outfile);
2202 fprintf(outfile, "\"\n");
2204 *pp = npp;
2205 return p;
2207 #endif /* SUPPORT_PCRE32 */
2211 /*************************************************
2212 * Callout function *
2213 *************************************************/
2215 /* Called from PCRE as a result of the (?C) item. We print out where we are in
2216 the match. Yield zero unless more callouts than the fail count, or the callout
2217 data is not zero. */
2219 static int callout(pcre_callout_block *cb)
2221 FILE *f = (first_callout | callout_extra)? outfile : NULL;
2222 int i, pre_start, post_start, subject_length;
2224 if (callout_extra)
2226 fprintf(f, "Callout %d: last capture = %d\n",
2227 cb->callout_number, cb->capture_last);
2229 for (i = 0; i < cb->capture_top * 2; i += 2)
2231 if (cb->offset_vector[i] < 0)
2232 fprintf(f, "%2d: <unset>\n", i/2);
2233 else
2235 fprintf(f, "%2d: ", i/2);
2236 PCHARSV(cb->subject, cb->offset_vector[i],
2237 cb->offset_vector[i+1] - cb->offset_vector[i], f);
2238 fprintf(f, "\n");
2243 /* Re-print the subject in canonical form, the first time or if giving full
2244 datails. On subsequent calls in the same match, we use pchars just to find the
2245 printed lengths of the substrings. */
2247 if (f != NULL) fprintf(f, "--->");
2249 PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
2250 PCHARS(post_start, cb->subject, cb->start_match,
2251 cb->current_position - cb->start_match, f);
2253 PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
2255 PCHARSV(cb->subject, cb->current_position,
2256 cb->subject_length - cb->current_position, f);
2258 if (f != NULL) fprintf(f, "\n");
2260 /* Always print appropriate indicators, with callout number if not already
2261 shown. For automatic callouts, show the pattern offset. */
2263 if (cb->callout_number == 255)
2265 fprintf(outfile, "%+3d ", cb->pattern_position);
2266 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
2268 else
2270 if (callout_extra) fprintf(outfile, " ");
2271 else fprintf(outfile, "%3d ", cb->callout_number);
2274 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
2275 fprintf(outfile, "^");
2277 if (post_start > 0)
2279 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
2280 fprintf(outfile, "^");
2283 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
2284 fprintf(outfile, " ");
2286 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
2287 pbuffer + cb->pattern_position);
2289 fprintf(outfile, "\n");
2290 first_callout = 0;
2292 if (cb->mark != last_callout_mark)
2294 if (cb->mark == NULL)
2295 fprintf(outfile, "Latest Mark: <unset>\n");
2296 else
2298 fprintf(outfile, "Latest Mark: ");
2299 PCHARSV(cb->mark, 0, -1, outfile);
2300 putc('\n', outfile);
2302 last_callout_mark = cb->mark;
2305 if (cb->callout_data != NULL)
2307 int callout_data = *((int *)(cb->callout_data));
2308 if (callout_data != 0)
2310 fprintf(outfile, "Callout data = %d\n", callout_data);
2311 return callout_data;
2315 return (cb->callout_number != callout_fail_id)? 0 :
2316 (++callout_count >= callout_fail_count)? 1 : 0;
2320 /*************************************************
2321 * Local malloc functions *
2322 *************************************************/
2324 /* Alternative malloc function, to test functionality and save the size of a
2325 compiled re, which is the first store request that pcre_compile() makes. The
2326 show_malloc variable is set only during matching. */
2328 static void *new_malloc(size_t size)
2330 void *block = malloc(size);
2331 gotten_store = size;
2332 if (first_gotten_store == 0) first_gotten_store = size;
2333 if (show_malloc)
2334 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
2335 return block;
2338 static void new_free(void *block)
2340 if (show_malloc)
2341 fprintf(outfile, "free %p\n", block);
2342 free(block);
2345 /* For recursion malloc/free, to test stacking calls */
2347 static void *stack_malloc(size_t size)
2349 void *block = malloc(size);
2350 if (show_malloc)
2351 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
2352 return block;
2355 static void stack_free(void *block)
2357 if (show_malloc)
2358 fprintf(outfile, "stack_free %p\n", block);
2359 free(block);
2363 /*************************************************
2364 * Call pcre_fullinfo() *
2365 *************************************************/
2367 /* Get one piece of information from the pcre_fullinfo() function. When only
2368 one of 8-, 16- or 32-bit is supported, pcre_mode should always have the correct
2369 value, but the code is defensive.
2371 Arguments:
2372 re compiled regex
2373 study study data
2374 option PCRE_INFO_xxx option
2375 ptr where to put the data
2377 Returns: 0 when OK, < 0 on error
2380 static int
2381 new_info(pcre *re, pcre_extra *study, int option, void *ptr)
2383 int rc;
2385 if (pcre_mode == PCRE32_MODE)
2386 #ifdef SUPPORT_PCRE32
2387 rc = pcre32_fullinfo((pcre32 *)re, (pcre32_extra *)study, option, ptr);
2388 #else
2389 rc = PCRE_ERROR_BADMODE;
2390 #endif
2391 else if (pcre_mode == PCRE16_MODE)
2392 #ifdef SUPPORT_PCRE16
2393 rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
2394 #else
2395 rc = PCRE_ERROR_BADMODE;
2396 #endif
2397 else
2398 #ifdef SUPPORT_PCRE8
2399 rc = pcre_fullinfo(re, study, option, ptr);
2400 #else
2401 rc = PCRE_ERROR_BADMODE;
2402 #endif
2404 if (rc < 0)
2406 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2407 pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "", option);
2408 if (rc == PCRE_ERROR_BADMODE)
2409 fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
2410 "%d-bit mode\n", 8 * CHAR_SIZE,
2411 8 * (REAL_PCRE_FLAGS(re) & PCRE_MODE_MASK));
2414 return rc;
2419 /*************************************************
2420 * Swap byte functions *
2421 *************************************************/
2423 /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
2424 value, respectively.
2426 Arguments:
2427 value any number
2429 Returns: the byte swapped value
2432 static pcre_uint32
2433 swap_uint32(pcre_uint32 value)
2435 return ((value & 0x000000ff) << 24) |
2436 ((value & 0x0000ff00) << 8) |
2437 ((value & 0x00ff0000) >> 8) |
2438 (value >> 24);
2441 static pcre_uint16
2442 swap_uint16(pcre_uint16 value)
2444 return (value >> 8) | (value << 8);
2449 /*************************************************
2450 * Flip bytes in a compiled pattern *
2451 *************************************************/
2453 /* This function is called if the 'F' option was present on a pattern that is
2454 to be written to a file. We flip the bytes of all the integer fields in the
2455 regex data block and the study block. In 16-bit mode this also flips relevant
2456 bytes in the pattern itself. This is to make it possible to test PCRE's
2457 ability to reload byte-flipped patterns, e.g. those compiled on a different
2458 architecture. */
2460 #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
2461 static void
2462 regexflip8_or_16(pcre *ere, pcre_extra *extra)
2464 real_pcre8_or_16 *re = (real_pcre8_or_16 *)ere;
2465 #ifdef SUPPORT_PCRE16
2466 int op;
2467 pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
2468 int length = re->name_count * re->name_entry_size;
2469 #ifdef SUPPORT_UTF
2470 BOOL utf = (re->options & PCRE_UTF16) != 0;
2471 BOOL utf16_char = FALSE;
2472 #endif /* SUPPORT_UTF */
2473 #endif /* SUPPORT_PCRE16 */
2475 /* Always flip the bytes in the main data block and study blocks. */
2477 re->magic_number = REVERSED_MAGIC_NUMBER;
2478 re->size = swap_uint32(re->size);
2479 re->options = swap_uint32(re->options);
2480 re->flags = swap_uint16(re->flags);
2481 re->top_bracket = swap_uint16(re->top_bracket);
2482 re->top_backref = swap_uint16(re->top_backref);
2483 re->first_char = swap_uint16(re->first_char);
2484 re->req_char = swap_uint16(re->req_char);
2485 re->name_table_offset = swap_uint16(re->name_table_offset);
2486 re->name_entry_size = swap_uint16(re->name_entry_size);
2487 re->name_count = swap_uint16(re->name_count);
2489 if (extra != NULL)
2491 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2492 rsd->size = swap_uint32(rsd->size);
2493 rsd->flags = swap_uint32(rsd->flags);
2494 rsd->minlength = swap_uint32(rsd->minlength);
2497 /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
2498 in the name table, if present, and then in the pattern itself. */
2500 #ifdef SUPPORT_PCRE16
2501 if (pcre_mode != PCRE16_MODE) return;
2503 while(TRUE)
2505 /* Swap previous characters. */
2506 while (length-- > 0)
2508 *ptr = swap_uint16(*ptr);
2509 ptr++;
2511 #ifdef SUPPORT_UTF
2512 if (utf16_char)
2514 if ((ptr[-1] & 0xfc00) == 0xd800)
2516 /* We know that there is only one extra character in UTF-16. */
2517 *ptr = swap_uint16(*ptr);
2518 ptr++;
2521 utf16_char = FALSE;
2522 #endif /* SUPPORT_UTF */
2524 /* Get next opcode. */
2526 length = 0;
2527 op = *ptr;
2528 *ptr++ = swap_uint16(op);
2530 switch (op)
2532 case OP_END:
2533 return;
2535 #ifdef SUPPORT_UTF
2536 case OP_CHAR:
2537 case OP_CHARI:
2538 case OP_NOT:
2539 case OP_NOTI:
2540 case OP_STAR:
2541 case OP_MINSTAR:
2542 case OP_PLUS:
2543 case OP_MINPLUS:
2544 case OP_QUERY:
2545 case OP_MINQUERY:
2546 case OP_UPTO:
2547 case OP_MINUPTO:
2548 case OP_EXACT:
2549 case OP_POSSTAR:
2550 case OP_POSPLUS:
2551 case OP_POSQUERY:
2552 case OP_POSUPTO:
2553 case OP_STARI:
2554 case OP_MINSTARI:
2555 case OP_PLUSI:
2556 case OP_MINPLUSI:
2557 case OP_QUERYI:
2558 case OP_MINQUERYI:
2559 case OP_UPTOI:
2560 case OP_MINUPTOI:
2561 case OP_EXACTI:
2562 case OP_POSSTARI:
2563 case OP_POSPLUSI:
2564 case OP_POSQUERYI:
2565 case OP_POSUPTOI:
2566 case OP_NOTSTAR:
2567 case OP_NOTMINSTAR:
2568 case OP_NOTPLUS:
2569 case OP_NOTMINPLUS:
2570 case OP_NOTQUERY:
2571 case OP_NOTMINQUERY:
2572 case OP_NOTUPTO:
2573 case OP_NOTMINUPTO:
2574 case OP_NOTEXACT:
2575 case OP_NOTPOSSTAR:
2576 case OP_NOTPOSPLUS:
2577 case OP_NOTPOSQUERY:
2578 case OP_NOTPOSUPTO:
2579 case OP_NOTSTARI:
2580 case OP_NOTMINSTARI:
2581 case OP_NOTPLUSI:
2582 case OP_NOTMINPLUSI:
2583 case OP_NOTQUERYI:
2584 case OP_NOTMINQUERYI:
2585 case OP_NOTUPTOI:
2586 case OP_NOTMINUPTOI:
2587 case OP_NOTEXACTI:
2588 case OP_NOTPOSSTARI:
2589 case OP_NOTPOSPLUSI:
2590 case OP_NOTPOSQUERYI:
2591 case OP_NOTPOSUPTOI:
2592 if (utf) utf16_char = TRUE;
2593 #endif
2594 /* Fall through. */
2596 default:
2597 length = OP_lengths16[op] - 1;
2598 break;
2600 case OP_CLASS:
2601 case OP_NCLASS:
2602 /* Skip the character bit map. */
2603 ptr += 32/sizeof(pcre_uint16);
2604 length = 0;
2605 break;
2607 case OP_XCLASS:
2608 /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
2609 if (LINK_SIZE > 1)
2610 length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
2611 - (1 + LINK_SIZE + 1));
2612 else
2613 length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
2615 /* Reverse the size of the XCLASS instance. */
2616 *ptr = swap_uint16(*ptr);
2617 ptr++;
2618 if (LINK_SIZE > 1)
2620 *ptr = swap_uint16(*ptr);
2621 ptr++;
2624 op = *ptr;
2625 *ptr = swap_uint16(op);
2626 ptr++;
2627 if ((op & XCL_MAP) != 0)
2629 /* Skip the character bit map. */
2630 ptr += 32/sizeof(pcre_uint16);
2631 length -= 32/sizeof(pcre_uint16);
2633 break;
2636 /* Control should never reach here in 16 bit mode. */
2637 #endif /* SUPPORT_PCRE16 */
2639 #endif /* SUPPORT_PCRE[8|16] */
2643 #if defined SUPPORT_PCRE32
2644 static void
2645 regexflip_32(pcre *ere, pcre_extra *extra)
2647 real_pcre32 *re = (real_pcre32 *)ere;
2648 int op;
2649 pcre_uint32 *ptr = (pcre_uint32 *)re + re->name_table_offset;
2650 int length = re->name_count * re->name_entry_size;
2652 /* Always flip the bytes in the main data block and study blocks. */
2654 re->magic_number = REVERSED_MAGIC_NUMBER;
2655 re->size = swap_uint32(re->size);
2656 re->options = swap_uint32(re->options);
2657 re->flags = swap_uint16(re->flags);
2658 re->top_bracket = swap_uint16(re->top_bracket);
2659 re->top_backref = swap_uint16(re->top_backref);
2660 re->first_char = swap_uint32(re->first_char);
2661 re->req_char = swap_uint32(re->req_char);
2662 re->name_table_offset = swap_uint16(re->name_table_offset);
2663 re->name_entry_size = swap_uint16(re->name_entry_size);
2664 re->name_count = swap_uint16(re->name_count);
2666 if (extra != NULL)
2668 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2669 rsd->size = swap_uint32(rsd->size);
2670 rsd->flags = swap_uint32(rsd->flags);
2671 rsd->minlength = swap_uint32(rsd->minlength);
2674 /* In 32-bit mode we must swap bytes in the name table, if present, and then in
2675 the pattern itself. */
2677 while(TRUE)
2679 /* Swap previous characters. */
2680 while (length-- > 0)
2682 *ptr = swap_uint32(*ptr);
2683 ptr++;
2686 /* Get next opcode. */
2688 length = 0;
2689 op = *ptr;
2690 *ptr++ = swap_uint32(op);
2692 switch (op)
2694 case OP_END:
2695 return;
2697 default:
2698 length = OP_lengths32[op] - 1;
2699 break;
2701 case OP_CLASS:
2702 case OP_NCLASS:
2703 /* Skip the character bit map. */
2704 ptr += 32/sizeof(pcre_uint32);
2705 length = 0;
2706 break;
2708 case OP_XCLASS:
2709 /* LINK_SIZE can only be 1 in 32-bit mode. */
2710 length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
2712 /* Reverse the size of the XCLASS instance. */
2713 *ptr = swap_uint32(*ptr);
2714 ptr++;
2716 op = *ptr;
2717 *ptr = swap_uint32(op);
2718 ptr++;
2719 if ((op & XCL_MAP) != 0)
2721 /* Skip the character bit map. */
2722 ptr += 32/sizeof(pcre_uint32);
2723 length -= 32/sizeof(pcre_uint32);
2725 break;
2728 /* Control should never reach here in 32 bit mode. */
2731 #endif /* SUPPORT_PCRE32 */
2735 static void
2736 regexflip(pcre *ere, pcre_extra *extra)
2738 #if defined SUPPORT_PCRE32
2739 if (REAL_PCRE_FLAGS(ere) & PCRE_MODE32)
2740 regexflip_32(ere, extra);
2741 #endif
2742 #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
2743 if (REAL_PCRE_FLAGS(ere) & (PCRE_MODE8 | PCRE_MODE16))
2744 regexflip8_or_16(ere, extra);
2745 #endif
2750 /*************************************************
2751 * Check match or recursion limit *
2752 *************************************************/
2754 static int
2755 check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
2756 int start_offset, int options, int *use_offsets, int use_size_offsets,
2757 int flag, unsigned long int *limit, int errnumber, const char *msg)
2759 int count;
2760 int min = 0;
2761 int mid = 64;
2762 int max = -1;
2764 extra->flags |= flag;
2766 for (;;)
2768 *limit = mid;
2770 PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
2771 use_offsets, use_size_offsets);
2773 if (count == errnumber)
2775 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2776 min = mid;
2777 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
2780 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
2781 count == PCRE_ERROR_PARTIAL)
2783 if (mid == min + 1)
2785 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
2786 break;
2788 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2789 max = mid;
2790 mid = (min + mid)/2;
2792 else break; /* Some other error */
2795 extra->flags &= ~flag;
2796 return count;
2801 /*************************************************
2802 * Case-independent strncmp() function *
2803 *************************************************/
2806 Arguments:
2807 s first string
2808 t second string
2809 n number of characters to compare
2811 Returns: < 0, = 0, or > 0, according to the comparison
2814 static int
2815 strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2817 while (n--)
2819 int c = tolower(*s++) - tolower(*t++);
2820 if (c) return c;
2822 return 0;
2827 /*************************************************
2828 * Check newline indicator *
2829 *************************************************/
2831 /* This is used both at compile and run-time to check for <xxx> escapes. Print
2832 a message and return 0 if there is no match.
2834 Arguments:
2835 p points after the leading '<'
2836 f file for error message
2838 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
2841 static int
2842 check_newline(pcre_uint8 *p, FILE *f)
2844 if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2845 if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2846 if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2847 if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2848 if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2849 if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2850 if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2851 fprintf(f, "Unknown newline type at: <%s\n", p);
2852 return 0;
2857 /*************************************************
2858 * Usage function *
2859 *************************************************/
2861 static void
2862 usage(void)
2864 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
2865 printf("Input and output default to stdin and stdout.\n");
2866 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
2867 printf("If input is a terminal, readline() is used to read from it.\n");
2868 #else
2869 printf("This version of pcretest is not linked with readline().\n");
2870 #endif
2871 printf("\nOptions:\n");
2872 #ifdef SUPPORT_PCRE16
2873 printf(" -16 use the 16-bit library\n");
2874 #endif
2875 #ifdef SUPPORT_PCRE32
2876 printf(" -32 use the 32-bit library\n");
2877 #endif
2878 printf(" -b show compiled code\n");
2879 printf(" -C show PCRE compile-time options and exit\n");
2880 printf(" -C arg show a specific compile-time option\n");
2881 printf(" and exit with its value. The arg can be:\n");
2882 printf(" linksize internal link size [2, 3, 4]\n");
2883 printf(" pcre8 8 bit library support enabled [0, 1]\n");
2884 printf(" pcre16 16 bit library support enabled [0, 1]\n");
2885 printf(" pcre32 32 bit library support enabled [0, 1]\n");
2886 printf(" utf Unicode Transformation Format supported [0, 1]\n");
2887 printf(" ucp Unicode Properties supported [0, 1]\n");
2888 printf(" jit Just-in-time compiler supported [0, 1]\n");
2889 printf(" newline Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n");
2890 printf(" -d debug: show compiled code and information (-b and -i)\n");
2891 #if !defined NODFA
2892 printf(" -dfa force DFA matching for all subjects\n");
2893 #endif
2894 printf(" -help show usage information\n");
2895 printf(" -i show information about compiled patterns\n"
2896 " -M find MATCH_LIMIT minimum for each subject\n"
2897 " -m output memory used information\n"
2898 " -o <n> set size of offsets vector to <n>\n");
2899 #if !defined NOPOSIX
2900 printf(" -p use POSIX interface\n");
2901 #endif
2902 printf(" -q quiet: do not output PCRE version number at start\n");
2903 printf(" -S <n> set stack size to <n> megabytes\n");
2904 printf(" -s force each pattern to be studied at basic level\n"
2905 " -s+ force each pattern to be studied, using JIT if available\n"
2906 " -s++ ditto, verifying when JIT was actually used\n"
2907 " -s+n force each pattern to be studied, using JIT if available,\n"
2908 " where 1 <= n <= 7 selects JIT options\n"
2909 " -s++n ditto, verifying when JIT was actually used\n"
2910 " -t time compilation and execution\n");
2911 printf(" -t <n> time compilation and execution, repeating <n> times\n");
2912 printf(" -tm time execution (matching) only\n");
2913 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
2918 /*************************************************
2919 * Main Program *
2920 *************************************************/
2922 /* Read lines from named file or stdin and write to named file or stdout; lines
2923 consist of a regular expression, in delimiters and optionally followed by
2924 options, followed by a set of test data, terminated by an empty line. */
2926 int main(int argc, char **argv)
2928 FILE *infile = stdin;
2929 const char *version;
2930 int options = 0;
2931 int study_options = 0;
2932 int default_find_match_limit = FALSE;
2933 int op = 1;
2934 int timeit = 0;
2935 int timeitm = 0;
2936 int showinfo = 0;
2937 int showstore = 0;
2938 int force_study = -1;
2939 int force_study_options = 0;
2940 int quiet = 0;
2941 int size_offsets = 45;
2942 int size_offsets_max;
2943 int *offsets = NULL;
2944 int debug = 0;
2945 int done = 0;
2946 int all_use_dfa = 0;
2947 int verify_jit = 0;
2948 int yield = 0;
2949 int stack_size;
2950 pcre_uint8 *dbuffer = NULL;
2951 size_t dbuffer_size = 1u << 14;
2953 #if !defined NOPOSIX
2954 int posix = 0;
2955 #endif
2956 #if !defined NODFA
2957 int *dfa_workspace = NULL;
2958 #endif
2960 pcre_jit_stack *jit_stack = NULL;
2962 /* These vectors store, end-to-end, a list of zero-terminated captured
2963 substring names, each list itself being terminated by an empty name. Assume
2964 that 1024 is plenty long enough for the few names we'll be testing. It is
2965 easiest to keep separate 8-, 16- and 32-bit versions, using the 32-bit version
2966 for the actual memory, to ensure alignment. */
2968 pcre_uint32 copynames[1024];
2969 pcre_uint32 getnames[1024];
2971 #ifdef SUPPORT_PCRE32
2972 pcre_uint32 *cn32ptr;
2973 pcre_uint32 *gn32ptr;
2974 #endif
2976 #ifdef SUPPORT_PCRE16
2977 pcre_uint16 *copynames16 = (pcre_uint16 *)copynames;
2978 pcre_uint16 *getnames16 = (pcre_uint16 *)getnames;
2979 pcre_uint16 *cn16ptr;
2980 pcre_uint16 *gn16ptr;
2981 #endif
2983 #ifdef SUPPORT_PCRE8
2984 pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
2985 pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
2986 pcre_uint8 *cn8ptr;
2987 pcre_uint8 *gn8ptr;
2988 #endif
2990 /* Get buffers from malloc() so that valgrind will check their misuse when
2991 debugging. They grow automatically when very long lines are read. The 16-
2992 and 32-bit buffers (buffer16, buffer32) are obtained only if needed. */
2994 buffer = (pcre_uint8 *)malloc(buffer_size);
2995 pbuffer = (pcre_uint8 *)malloc(buffer_size);
2997 /* The outfile variable is static so that new_malloc can use it. */
2999 outfile = stdout;
3001 /* The following _setmode() stuff is some Windows magic that tells its runtime
3002 library to translate CRLF into a single LF character. At least, that's what
3003 I've been told: never having used Windows I take this all on trust. Originally
3004 it set 0x8000, but then I was advised that _O_BINARY was better. */
3006 #if defined(_WIN32) || defined(WIN32)
3007 _setmode( _fileno( stdout ), _O_BINARY );
3008 #endif
3010 /* Get the version number: both pcre_version() and pcre16_version() give the
3011 same answer. We just need to ensure that we call one that is available. */
3013 #if defined SUPPORT_PCRE8
3014 version = pcre_version();
3015 #elif defined SUPPORT_PCRE16
3016 version = pcre16_version();
3017 #elif defined SUPPORT_PCRE32
3018 version = pcre32_version();
3019 #endif
3021 /* Scan options */
3023 while (argc > 1 && argv[op][0] == '-')
3025 pcre_uint8 *endptr;
3026 char *arg = argv[op];
3028 if (strcmp(arg, "-m") == 0) showstore = 1;
3029 else if (strcmp(arg, "-s") == 0) force_study = 0;
3031 else if (strncmp(arg, "-s+", 3) == 0)
3033 arg += 3;
3034 if (*arg == '+') { arg++; verify_jit = TRUE; }
3035 force_study = 1;
3036 if (*arg == 0)
3037 force_study_options = jit_study_bits[6];
3038 else if (*arg >= '1' && *arg <= '7')
3039 force_study_options = jit_study_bits[*arg - '1'];
3040 else goto BAD_ARG;
3042 else if (strcmp(arg, "-8") == 0)
3044 #ifdef SUPPORT_PCRE8
3045 pcre_mode = PCRE8_MODE;
3046 #else
3047 printf("** This version of PCRE was built without 8-bit support\n");
3048 exit(1);
3049 #endif
3051 else if (strcmp(arg, "-16") == 0)
3053 #ifdef SUPPORT_PCRE16
3054 pcre_mode = PCRE16_MODE;
3055 #else
3056 printf("** This version of PCRE was built without 16-bit support\n");
3057 exit(1);
3058 #endif
3060 else if (strcmp(arg, "-32") == 0)
3062 #ifdef SUPPORT_PCRE32
3063 pcre_mode = PCRE32_MODE;
3064 #else
3065 printf("** This version of PCRE was built without 32-bit support\n");
3066 exit(1);
3067 #endif
3069 else if (strcmp(arg, "-q") == 0) quiet = 1;
3070 else if (strcmp(arg, "-b") == 0) debug = 1;
3071 else if (strcmp(arg, "-i") == 0) showinfo = 1;
3072 else if (strcmp(arg, "-d") == 0) showinfo = debug = 1;
3073 else if (strcmp(arg, "-M") == 0) default_find_match_limit = TRUE;
3074 #if !defined NODFA
3075 else if (strcmp(arg, "-dfa") == 0) all_use_dfa = 1;
3076 #endif
3077 else if (strcmp(arg, "-o") == 0 && argc > 2 &&
3078 ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
3079 *endptr == 0))
3081 op++;
3082 argc--;
3084 else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0)
3086 int both = arg[2] == 0;
3087 int temp;
3088 if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
3089 *endptr == 0))
3091 timeitm = temp;
3092 op++;
3093 argc--;
3095 else timeitm = LOOPREPEAT;
3096 if (both) timeit = timeitm;
3098 else if (strcmp(arg, "-S") == 0 && argc > 2 &&
3099 ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
3100 *endptr == 0))
3102 #if defined(_WIN32) || defined(WIN32) || defined(__minix) || defined(NATIVE_ZOS)
3103 printf("PCRE: -S not supported on this OS\n");
3104 exit(1);
3105 #else
3106 int rc;
3107 struct rlimit rlim;
3108 getrlimit(RLIMIT_STACK, &rlim);
3109 rlim.rlim_cur = stack_size * 1024 * 1024;
3110 rc = setrlimit(RLIMIT_STACK, &rlim);
3111 if (rc != 0)
3113 printf("PCRE: setrlimit() failed with error %d\n", rc);
3114 exit(1);
3116 op++;
3117 argc--;
3118 #endif
3120 #if !defined NOPOSIX
3121 else if (strcmp(arg, "-p") == 0) posix = 1;
3122 #endif
3123 else if (strcmp(arg, "-C") == 0)
3125 int rc;
3126 unsigned long int lrc;
3128 if (argc > 2)
3130 if (strcmp(argv[op + 1], "linksize") == 0)
3132 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
3133 printf("%d\n", rc);
3134 yield = rc;
3136 else if (strcmp(argv[op + 1], "pcre8") == 0)
3138 #ifdef SUPPORT_PCRE8
3139 printf("1\n");
3140 yield = 1;
3141 #else
3142 printf("0\n");
3143 yield = 0;
3144 #endif
3145 goto EXIT;
3147 else if (strcmp(argv[op + 1], "pcre16") == 0)
3149 #ifdef SUPPORT_PCRE16
3150 printf("1\n");
3151 yield = 1;
3152 #else
3153 printf("0\n");
3154 yield = 0;
3155 #endif
3156 goto EXIT;
3158 else if (strcmp(argv[op + 1], "pcre32") == 0)
3160 #ifdef SUPPORT_PCRE32
3161 printf("1\n");
3162 yield = 1;
3163 #else
3164 printf("0\n");
3165 yield = 0;
3166 #endif
3167 goto EXIT;
3169 if (strcmp(argv[op + 1], "utf") == 0)
3171 #ifdef SUPPORT_PCRE8
3172 if (pcre_mode == PCRE8_MODE)
3173 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
3174 #endif
3175 #ifdef SUPPORT_PCRE16
3176 if (pcre_mode == PCRE16_MODE)
3177 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
3178 #endif
3179 #ifdef SUPPORT_PCRE32
3180 if (pcre_mode == PCRE32_MODE)
3181 (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
3182 #endif
3183 printf("%d\n", rc);
3184 yield = rc;
3185 goto EXIT;
3187 else if (strcmp(argv[op + 1], "ucp") == 0)
3189 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
3190 printf("%d\n", rc);
3191 yield = rc;
3193 else if (strcmp(argv[op + 1], "jit") == 0)
3195 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
3196 printf("%d\n", rc);
3197 yield = rc;
3199 else if (strcmp(argv[op + 1], "newline") == 0)
3201 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
3202 print_newline_config(rc, TRUE);
3204 else if (strcmp(argv[op + 1], "ebcdic") == 0)
3206 #ifdef EBCDIC
3207 printf("1\n");
3208 yield = 1;
3209 #else
3210 printf("0\n");
3211 #endif
3213 else if (strcmp(argv[op + 1], "ebcdic-nl") == 0)
3215 #ifdef EBCDIC
3216 printf("0x%02x\n", CHAR_LF);
3217 #else
3218 printf("0\n");
3219 #endif
3221 else
3223 printf("Unknown -C option: %s\n", argv[op + 1]);
3225 goto EXIT;
3228 /* No argument for -C: output all configuration information. */
3230 printf("PCRE version %s\n", version);
3231 printf("Compiled with\n");
3233 #ifdef EBCDIC
3234 printf(" EBCDIC code support: LF is 0x%02x\n", CHAR_LF);
3235 #endif
3237 /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
3238 are set, either both UTFs are supported or both are not supported. */
3240 #ifdef SUPPORT_PCRE8
3241 printf(" 8-bit support\n");
3242 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
3243 printf (" %sUTF-8 support\n", rc ? "" : "No ");
3244 #endif
3245 #ifdef SUPPORT_PCRE16
3246 printf(" 16-bit support\n");
3247 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
3248 printf (" %sUTF-16 support\n", rc ? "" : "No ");
3249 #endif
3250 #ifdef SUPPORT_PCRE32
3251 printf(" 32-bit support\n");
3252 (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
3253 printf (" %sUTF-32 support\n", rc ? "" : "No ");
3254 #endif
3256 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
3257 printf(" %sUnicode properties support\n", rc? "" : "No ");
3258 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
3259 if (rc)
3261 const char *arch;
3262 (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, (void *)(&arch));
3263 printf(" Just-in-time compiler support: %s\n", arch);
3265 else
3266 printf(" No just-in-time compiler support\n");
3267 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
3268 print_newline_config(rc, FALSE);
3269 (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
3270 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
3271 "all Unicode newlines");
3272 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
3273 printf(" Internal link size = %d\n", rc);
3274 (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
3275 printf(" POSIX malloc threshold = %d\n", rc);
3276 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
3277 printf(" Default match limit = %ld\n", lrc);
3278 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
3279 printf(" Default recursion depth limit = %ld\n", lrc);
3280 (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
3281 printf(" Match recursion uses %s", rc? "stack" : "heap");
3282 if (showstore)
3284 PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0);
3285 printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size);
3287 printf("\n");
3288 goto EXIT;
3290 else if (strcmp(arg, "-help") == 0 ||
3291 strcmp(arg, "--help") == 0)
3293 usage();
3294 goto EXIT;
3296 else
3298 BAD_ARG:
3299 printf("** Unknown or malformed option %s\n", arg);
3300 usage();
3301 yield = 1;
3302 goto EXIT;
3304 op++;
3305 argc--;
3308 /* Get the store for the offsets vector, and remember what it was */
3310 size_offsets_max = size_offsets;
3311 offsets = (int *)malloc(size_offsets_max * sizeof(int));
3312 if (offsets == NULL)
3314 printf("** Failed to get %d bytes of memory for offsets vector\n",
3315 (int)(size_offsets_max * sizeof(int)));
3316 yield = 1;
3317 goto EXIT;
3320 /* Sort out the input and output files */
3322 if (argc > 1)
3324 infile = fopen(argv[op], INPUT_MODE);
3325 if (infile == NULL)
3327 printf("** Failed to open %s\n", argv[op]);
3328 yield = 1;
3329 goto EXIT;
3333 if (argc > 2)
3335 outfile = fopen(argv[op+1], OUTPUT_MODE);
3336 if (outfile == NULL)
3338 printf("** Failed to open %s\n", argv[op+1]);
3339 yield = 1;
3340 goto EXIT;
3344 /* Set alternative malloc function */
3346 #ifdef SUPPORT_PCRE8
3347 pcre_malloc = new_malloc;
3348 pcre_free = new_free;
3349 pcre_stack_malloc = stack_malloc;
3350 pcre_stack_free = stack_free;
3351 #endif
3353 #ifdef SUPPORT_PCRE16
3354 pcre16_malloc = new_malloc;
3355 pcre16_free = new_free;
3356 pcre16_stack_malloc = stack_malloc;
3357 pcre16_stack_free = stack_free;
3358 #endif
3360 #ifdef SUPPORT_PCRE32
3361 pcre32_malloc = new_malloc;
3362 pcre32_free = new_free;
3363 pcre32_stack_malloc = stack_malloc;
3364 pcre32_stack_free = stack_free;
3365 #endif
3367 /* Heading line unless quiet, then prompt for first regex if stdin */
3369 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
3371 /* Main loop */
3373 while (!done)
3375 pcre *re = NULL;
3376 pcre_extra *extra = NULL;
3378 #if !defined NOPOSIX /* There are still compilers that require no indent */
3379 regex_t preg;
3380 int do_posix = 0;
3381 #endif
3383 const char *error;
3384 pcre_uint8 *markptr;
3385 pcre_uint8 *p, *pp, *ppp;
3386 pcre_uint8 *to_file = NULL;
3387 const pcre_uint8 *tables = NULL;
3388 unsigned long int get_options;
3389 unsigned long int true_size, true_study_size = 0;
3390 size_t size, regex_gotten_store;
3391 int do_allcaps = 0;
3392 int do_mark = 0;
3393 int do_study = 0;
3394 int no_force_study = 0;
3395 int do_debug = debug;
3396 int do_G = 0;
3397 int do_g = 0;
3398 int do_showinfo = showinfo;
3399 int do_showrest = 0;
3400 int do_showcaprest = 0;
3401 int do_flip = 0;
3402 int erroroffset, len, delimiter, poffset;
3404 #if !defined NODFA
3405 int dfa_matched = 0;
3406 #endif
3408 use_utf = 0;
3409 debug_lengths = 1;
3411 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
3412 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
3413 fflush(outfile);
3415 p = buffer;
3416 while (isspace(*p)) p++;
3417 if (*p == 0) continue;
3419 /* See if the pattern is to be loaded pre-compiled from a file. */
3421 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
3423 pcre_uint32 magic;
3424 pcre_uint8 sbuf[8];
3425 FILE *f;
3427 p++;
3428 if (*p == '!')
3430 do_debug = TRUE;
3431 do_showinfo = TRUE;
3432 p++;
3435 pp = p + (int)strlen((char *)p);
3436 while (isspace(pp[-1])) pp--;
3437 *pp = 0;
3439 f = fopen((char *)p, "rb");
3440 if (f == NULL)
3442 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
3443 continue;
3446 first_gotten_store = 0;
3447 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
3449 true_size =
3450 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
3451 true_study_size =
3452 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
3454 re = (pcre *)new_malloc(true_size);
3455 if (re == NULL)
3457 printf("** Failed to get %d bytes of memory for pcre object\n",
3458 (int)true_size);
3459 yield = 1;
3460 goto EXIT;
3462 regex_gotten_store = first_gotten_store;
3464 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
3466 magic = REAL_PCRE_MAGIC(re);
3467 if (magic != MAGIC_NUMBER)
3469 if (swap_uint32(magic) == MAGIC_NUMBER)
3471 do_flip = 1;
3473 else
3475 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
3476 new_free(re);
3477 fclose(f);
3478 continue;
3482 /* We hide the byte-invert info for little and big endian tests. */
3483 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
3484 do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
3486 /* Now see if there is any following study data. */
3488 if (true_study_size != 0)
3490 pcre_study_data *psd;
3492 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
3493 extra->flags = PCRE_EXTRA_STUDY_DATA;
3495 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
3496 extra->study_data = psd;
3498 if (fread(psd, 1, true_study_size, f) != true_study_size)
3500 FAIL_READ:
3501 fprintf(outfile, "Failed to read data from %s\n", p);
3502 if (extra != NULL)
3504 PCRE_FREE_STUDY(extra);
3506 new_free(re);
3507 fclose(f);
3508 continue;
3510 fprintf(outfile, "Study data loaded from %s\n", p);
3511 do_study = 1; /* To get the data output if requested */
3513 else fprintf(outfile, "No study data\n");
3515 /* Flip the necessary bytes. */
3516 if (do_flip)
3518 int rc;
3519 PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
3520 if (rc == PCRE_ERROR_BADMODE)
3522 pcre_uint16 flags_in_host_byte_order;
3523 if (REAL_PCRE_MAGIC(re) == MAGIC_NUMBER)
3524 flags_in_host_byte_order = REAL_PCRE_FLAGS(re);
3525 else
3526 flags_in_host_byte_order = swap_uint16(REAL_PCRE_FLAGS(re));
3527 /* Simulate the result of the function call below. */
3528 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
3529 pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "",
3530 PCRE_INFO_OPTIONS);
3531 fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
3532 "%d-bit mode\n", 8 * CHAR_SIZE, 8 * (flags_in_host_byte_order & PCRE_MODE_MASK));
3533 new_free(re);
3534 fclose(f);
3535 continue;
3539 /* Need to know if UTF-8 for printing data strings. */
3541 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3543 new_free(re);
3544 fclose(f);
3545 continue;
3547 use_utf = (get_options & PCRE_UTF8) != 0;
3549 fclose(f);
3550 goto SHOW_INFO;
3553 /* In-line pattern (the usual case). Get the delimiter and seek the end of
3554 the pattern; if it isn't complete, read more. */
3556 delimiter = *p++;
3558 if (isalnum(delimiter) || delimiter == '\\')
3560 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
3561 goto SKIP_DATA;
3564 pp = p;
3565 poffset = (int)(p - buffer);
3567 for(;;)
3569 while (*pp != 0)
3571 if (*pp == '\\' && pp[1] != 0) pp++;
3572 else if (*pp == delimiter) break;
3573 pp++;
3575 if (*pp != 0) break;
3576 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
3578 fprintf(outfile, "** Unexpected EOF\n");
3579 done = 1;
3580 goto CONTINUE;
3582 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
3585 /* The buffer may have moved while being extended; reset the start of data
3586 pointer to the correct relative point in the buffer. */
3588 p = buffer + poffset;
3590 /* If the first character after the delimiter is backslash, make
3591 the pattern end with backslash. This is purely to provide a way
3592 of testing for the error message when a pattern ends with backslash. */
3594 if (pp[1] == '\\') *pp++ = '\\';
3596 /* Terminate the pattern at the delimiter, and save a copy of the pattern
3597 for callouts. */
3599 *pp++ = 0;
3600 strcpy((char *)pbuffer, (char *)p);
3602 /* Look for options after final delimiter */
3604 options = 0;
3605 study_options = force_study_options;
3606 log_store = showstore; /* default from command line */
3608 while (*pp != 0)
3610 switch (*pp++)
3612 case 'f': options |= PCRE_FIRSTLINE; break;
3613 case 'g': do_g = 1; break;
3614 case 'i': options |= PCRE_CASELESS; break;
3615 case 'm': options |= PCRE_MULTILINE; break;
3616 case 's': options |= PCRE_DOTALL; break;
3617 case 'x': options |= PCRE_EXTENDED; break;
3619 case '+':
3620 if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
3621 break;
3623 case '=': do_allcaps = 1; break;
3624 case 'A': options |= PCRE_ANCHORED; break;
3625 case 'B': do_debug = 1; break;
3626 case 'C': options |= PCRE_AUTO_CALLOUT; break;
3627 case 'D': do_debug = do_showinfo = 1; break;
3628 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
3629 case 'F': do_flip = 1; break;
3630 case 'G': do_G = 1; break;
3631 case 'I': do_showinfo = 1; break;
3632 case 'J': options |= PCRE_DUPNAMES; break;
3633 case 'K': do_mark = 1; break;
3634 case 'M': log_store = 1; break;
3635 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
3637 #if !defined NOPOSIX
3638 case 'P': do_posix = 1; break;
3639 #endif
3641 case 'S':
3642 do_study = 1;
3643 for (;;)
3645 switch (*pp++)
3647 case 'S':
3648 do_study = 0;
3649 no_force_study = 1;
3650 break;
3652 case '!':
3653 study_options |= PCRE_STUDY_EXTRA_NEEDED;
3654 break;
3656 case '+':
3657 if (*pp == '+')
3659 verify_jit = TRUE;
3660 pp++;
3662 if (*pp >= '1' && *pp <= '7')
3663 study_options |= jit_study_bits[*pp++ - '1'];
3664 else
3665 study_options |= jit_study_bits[6];
3666 break;
3668 case '-':
3669 study_options &= ~PCRE_STUDY_ALLJIT;
3670 break;
3672 default:
3673 pp--;
3674 goto ENDLOOP;
3677 ENDLOOP:
3678 break;
3680 case 'U': options |= PCRE_UNGREEDY; break;
3681 case 'W': options |= PCRE_UCP; break;
3682 case 'X': options |= PCRE_EXTRA; break;
3683 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
3684 case 'Z': debug_lengths = 0; break;
3685 case '8': options |= PCRE_UTF8; use_utf = 1; break;
3686 case '?': options |= PCRE_NO_UTF8_CHECK; break;
3688 case 'T':
3689 switch (*pp++)
3691 case '0': tables = tables0; break;
3692 case '1': tables = tables1; break;
3694 case '\r':
3695 case '\n':
3696 case ' ':
3697 case 0:
3698 fprintf(outfile, "** Missing table number after /T\n");
3699 goto SKIP_DATA;
3701 default:
3702 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
3703 goto SKIP_DATA;
3705 break;
3707 case 'L':
3708 ppp = pp;
3709 /* The '\r' test here is so that it works on Windows. */
3710 /* The '0' test is just in case this is an unterminated line. */
3711 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
3712 *ppp = 0;
3713 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
3715 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
3716 goto SKIP_DATA;
3718 locale_set = 1;
3719 tables = PCRE_MAKETABLES;
3720 pp = ppp;
3721 break;
3723 case '>':
3724 to_file = pp;
3725 while (*pp != 0) pp++;
3726 while (isspace(pp[-1])) pp--;
3727 *pp = 0;
3728 break;
3730 case '<':
3732 if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
3734 options |= PCRE_JAVASCRIPT_COMPAT;
3735 pp += 3;
3737 else
3739 int x = check_newline(pp, outfile);
3740 if (x == 0) goto SKIP_DATA;
3741 options |= x;
3742 while (*pp++ != '>');
3745 break;
3747 case '\r': /* So that it works in Windows */
3748 case '\n':
3749 case ' ':
3750 break;
3752 default:
3753 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
3754 goto SKIP_DATA;
3758 /* Handle compiling via the POSIX interface, which doesn't support the
3759 timing, showing, or debugging options, nor the ability to pass over
3760 local character tables. Neither does it have 16-bit support. */
3762 #if !defined NOPOSIX
3763 if (posix || do_posix)
3765 int rc;
3766 int cflags = 0;
3768 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
3769 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
3770 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
3771 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
3772 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
3773 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
3774 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
3776 first_gotten_store = 0;
3777 rc = regcomp(&preg, (char *)p, cflags);
3779 /* Compilation failed; go back for another re, skipping to blank line
3780 if non-interactive. */
3782 if (rc != 0)
3784 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
3785 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
3786 goto SKIP_DATA;
3790 /* Handle compiling via the native interface */
3792 else
3793 #endif /* !defined NOPOSIX */
3796 /* In 16- or 32-bit mode, convert the input. */
3798 #ifdef SUPPORT_PCRE16
3799 if (pcre_mode == PCRE16_MODE)
3801 switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
3803 case -1:
3804 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3805 "converted to UTF-16\n");
3806 goto SKIP_DATA;
3808 case -2:
3809 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3810 "cannot be converted to UTF-16\n");
3811 goto SKIP_DATA;
3813 case -3: /* "Impossible error" when to16 is called arg1 FALSE */
3814 fprintf(outfile, "**Failed: character value greater than 0xffff "
3815 "cannot be converted to 16-bit in non-UTF mode\n");
3816 goto SKIP_DATA;
3818 default:
3819 break;
3821 p = (pcre_uint8 *)buffer16;
3823 #endif
3825 #ifdef SUPPORT_PCRE32
3826 if (pcre_mode == PCRE32_MODE)
3828 switch(to32(FALSE, p, options & PCRE_UTF32, (int)strlen((char *)p)))
3830 case -1:
3831 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3832 "converted to UTF-32\n");
3833 goto SKIP_DATA;
3835 case -2:
3836 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3837 "cannot be converted to UTF-32\n");
3838 goto SKIP_DATA;
3840 case -3:
3841 fprintf(outfile, "**Failed: character value is ill-formed UTF-32\n");
3842 goto SKIP_DATA;
3844 default:
3845 break;
3847 p = (pcre_uint8 *)buffer32;
3849 #endif
3851 /* Compile many times when timing */
3853 if (timeit > 0)
3855 register int i;
3856 clock_t time_taken;
3857 clock_t start_time = clock();
3858 for (i = 0; i < timeit; i++)
3860 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
3861 if (re != NULL) free(re);
3863 time_taken = clock() - start_time;
3864 fprintf(outfile, "Compile time %.4f milliseconds\n",
3865 (((double)time_taken * 1000.0) / (double)timeit) /
3866 (double)CLOCKS_PER_SEC);
3869 first_gotten_store = 0;
3870 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
3872 /* Compilation failed; go back for another re, skipping to blank line
3873 if non-interactive. */
3875 if (re == NULL)
3877 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
3878 SKIP_DATA:
3879 if (infile != stdin)
3881 for (;;)
3883 if (extend_inputline(infile, buffer, NULL) == NULL)
3885 done = 1;
3886 goto CONTINUE;
3888 len = (int)strlen((char *)buffer);
3889 while (len > 0 && isspace(buffer[len-1])) len--;
3890 if (len == 0) break;
3892 fprintf(outfile, "\n");
3894 goto CONTINUE;
3897 /* Compilation succeeded. It is now possible to set the UTF-8 option from
3898 within the regex; check for this so that we know how to process the data
3899 lines. */
3901 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3902 goto SKIP_DATA;
3903 if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
3905 /* Extract the size for possible writing before possibly flipping it,
3906 and remember the store that was got. */
3908 true_size = REAL_PCRE_SIZE(re);
3909 regex_gotten_store = first_gotten_store;
3911 /* Output code size information if requested */
3913 if (log_store)
3915 int name_count, name_entry_size, real_pcre_size;
3917 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &name_count);
3918 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &name_entry_size);
3919 real_pcre_size = 0;
3920 #ifdef SUPPORT_PCRE8
3921 if (REAL_PCRE_FLAGS(re) & PCRE_MODE8)
3922 real_pcre_size = sizeof(real_pcre);
3923 #endif
3924 #ifdef SUPPORT_PCRE16
3925 if (REAL_PCRE_FLAGS(re) & PCRE_MODE16)
3926 real_pcre_size = sizeof(real_pcre16);
3927 #endif
3928 #ifdef SUPPORT_PCRE32
3929 if (REAL_PCRE_FLAGS(re) & PCRE_MODE32)
3930 real_pcre_size = sizeof(real_pcre32);
3931 #endif
3932 fprintf(outfile, "Memory allocation (code space): %d\n",
3933 (int)(first_gotten_store - real_pcre_size - name_count * name_entry_size));
3936 /* If -s or /S was present, study the regex to generate additional info to
3937 help with the matching, unless the pattern has the SS option, which
3938 suppresses the effect of /S (used for a few test patterns where studying is
3939 never sensible). */
3941 if (do_study || (force_study >= 0 && !no_force_study))
3943 if (timeit > 0)
3945 register int i;
3946 clock_t time_taken;
3947 clock_t start_time = clock();
3948 for (i = 0; i < timeit; i++)
3950 PCRE_STUDY(extra, re, study_options, &error);
3952 time_taken = clock() - start_time;
3953 if (extra != NULL)
3955 PCRE_FREE_STUDY(extra);
3957 fprintf(outfile, " Study time %.4f milliseconds\n",
3958 (((double)time_taken * 1000.0) / (double)timeit) /
3959 (double)CLOCKS_PER_SEC);
3961 PCRE_STUDY(extra, re, study_options, &error);
3962 if (error != NULL)
3963 fprintf(outfile, "Failed to study: %s\n", error);
3964 else if (extra != NULL)
3966 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
3967 if (log_store)
3969 size_t jitsize;
3970 if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
3971 jitsize != 0)
3972 fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
3977 /* If /K was present, we set up for handling MARK data. */
3979 if (do_mark)
3981 if (extra == NULL)
3983 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3984 extra->flags = 0;
3986 extra->mark = &markptr;
3987 extra->flags |= PCRE_EXTRA_MARK;
3990 /* Extract and display information from the compiled data if required. */
3992 SHOW_INFO:
3994 if (do_debug)
3996 fprintf(outfile, "------------------------------------------------------------------\n");
3997 PCRE_PRINTINT(re, outfile, debug_lengths);
4000 /* We already have the options in get_options (see above) */
4002 if (do_showinfo)
4004 unsigned long int all_options;
4005 pcre_uint32 first_char, need_char;
4006 int count, backrefmax, first_char_set, need_char_set, okpartial, jchanged,
4007 hascrorlf, maxlookbehind;
4008 int nameentrysize, namecount;
4009 const pcre_uint8 *nametable;
4011 if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
4012 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
4013 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
4014 new_info(re, NULL, PCRE_INFO_FIRSTCHARACTER, &first_char) +
4015 new_info(re, NULL, PCRE_INFO_FIRSTCHARACTERFLAGS, &first_char_set) +
4016 new_info(re, NULL, PCRE_INFO_REQUIREDCHAR, &need_char) +
4017 new_info(re, NULL, PCRE_INFO_REQUIREDCHARFLAGS, &need_char_set) +
4018 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
4019 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
4020 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
4021 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
4022 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
4023 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf) +
4024 new_info(re, NULL, PCRE_INFO_MAXLOOKBEHIND, &maxlookbehind)
4025 != 0)
4026 goto SKIP_DATA;
4028 if (size != regex_gotten_store) fprintf(outfile,
4029 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
4030 (int)size, (int)regex_gotten_store);
4032 fprintf(outfile, "Capturing subpattern count = %d\n", count);
4033 if (backrefmax > 0)
4034 fprintf(outfile, "Max back reference = %d\n", backrefmax);
4036 if (namecount > 0)
4038 fprintf(outfile, "Named capturing subpatterns:\n");
4039 while (namecount-- > 0)
4041 int imm2_size = pcre_mode == PCRE8_MODE ? 2 : 1;
4042 int length = (int)STRLEN(nametable + imm2_size);
4043 fprintf(outfile, " ");
4044 PCHARSV(nametable, imm2_size, length, outfile);
4045 while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
4046 #ifdef SUPPORT_PCRE32
4047 if (pcre_mode == PCRE32_MODE)
4048 fprintf(outfile, "%3d\n", (int)(((PCRE_SPTR32)nametable)[0]));
4049 #endif
4050 #ifdef SUPPORT_PCRE16
4051 if (pcre_mode == PCRE16_MODE)
4052 fprintf(outfile, "%3d\n", (int)(((PCRE_SPTR16)nametable)[0]));
4053 #endif
4054 #ifdef SUPPORT_PCRE8
4055 if (pcre_mode == PCRE8_MODE)
4056 fprintf(outfile, "%3d\n", ((int)nametable[0] << 8) | (int)nametable[1]);
4057 #endif
4058 nametable += nameentrysize * CHAR_SIZE;
4062 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
4063 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
4065 all_options = REAL_PCRE_OPTIONS(re);
4066 if (do_flip) all_options = swap_uint32(all_options);
4068 if (get_options == 0) fprintf(outfile, "No options\n");
4069 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
4070 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
4071 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
4072 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
4073 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
4074 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
4075 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
4076 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
4077 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
4078 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
4079 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
4080 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
4081 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
4082 ((get_options & PCRE_UTF8) != 0)? " utf" : "",
4083 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
4084 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
4085 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
4086 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
4088 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
4090 switch (get_options & PCRE_NEWLINE_BITS)
4092 case PCRE_NEWLINE_CR:
4093 fprintf(outfile, "Forced newline sequence: CR\n");
4094 break;
4096 case PCRE_NEWLINE_LF:
4097 fprintf(outfile, "Forced newline sequence: LF\n");
4098 break;
4100 case PCRE_NEWLINE_CRLF:
4101 fprintf(outfile, "Forced newline sequence: CRLF\n");
4102 break;
4104 case PCRE_NEWLINE_ANYCRLF:
4105 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
4106 break;
4108 case PCRE_NEWLINE_ANY:
4109 fprintf(outfile, "Forced newline sequence: ANY\n");
4110 break;
4112 default:
4113 break;
4116 if (first_char_set == 2)
4118 fprintf(outfile, "First char at start or follows newline\n");
4120 else if (first_char_set == 1)
4122 const char *caseless =
4123 ((REAL_PCRE_FLAGS(re) & PCRE_FCH_CASELESS) == 0)?
4124 "" : " (caseless)";
4126 if (PRINTOK(first_char))
4127 fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
4128 else
4130 fprintf(outfile, "First char = ");
4131 pchar(first_char, outfile);
4132 fprintf(outfile, "%s\n", caseless);
4135 else
4137 fprintf(outfile, "No first char\n");
4140 if (need_char_set == 0)
4142 fprintf(outfile, "No need char\n");
4144 else
4146 const char *caseless =
4147 ((REAL_PCRE_FLAGS(re) & PCRE_RCH_CASELESS) == 0)?
4148 "" : " (caseless)";
4150 if (PRINTOK(need_char))
4151 fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
4152 else
4154 fprintf(outfile, "Need char = ");
4155 pchar(need_char, outfile);
4156 fprintf(outfile, "%s\n", caseless);
4160 if (maxlookbehind > 0)
4161 fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
4163 /* Don't output study size; at present it is in any case a fixed
4164 value, but it varies, depending on the computer architecture, and
4165 so messes up the test suite. (And with the /F option, it might be
4166 flipped.) If study was forced by an external -s, don't show this
4167 information unless -i or -d was also present. This means that, except
4168 when auto-callouts are involved, the output from runs with and without
4169 -s should be identical. */
4171 if (do_study || (force_study >= 0 && showinfo && !no_force_study))
4173 if (extra == NULL)
4174 fprintf(outfile, "Study returned NULL\n");
4175 else
4177 pcre_uint8 *start_bits = NULL;
4178 int minlength;
4180 if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
4181 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
4183 if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
4185 if (start_bits == NULL)
4186 fprintf(outfile, "No set of starting bytes\n");
4187 else
4189 int i;
4190 int c = 24;
4191 fprintf(outfile, "Starting byte set: ");
4192 for (i = 0; i < 256; i++)
4194 if ((start_bits[i/8] & (1<<(i&7))) != 0)
4196 if (c > 75)
4198 fprintf(outfile, "\n ");
4199 c = 2;
4201 if (PRINTOK(i) && i != ' ')
4203 fprintf(outfile, "%c ", i);
4204 c += 2;
4206 else
4208 fprintf(outfile, "\\x%02x ", i);
4209 c += 5;
4213 fprintf(outfile, "\n");
4218 /* Show this only if the JIT was set by /S, not by -s. */
4220 if ((study_options & PCRE_STUDY_ALLJIT) != 0 &&
4221 (force_study_options & PCRE_STUDY_ALLJIT) == 0)
4223 int jit;
4224 if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
4226 if (jit)
4227 fprintf(outfile, "JIT study was successful\n");
4228 else
4229 #ifdef SUPPORT_JIT
4230 fprintf(outfile, "JIT study was not successful\n");
4231 #else
4232 fprintf(outfile, "JIT support is not available in this version of PCRE\n");
4233 #endif
4239 /* If the '>' option was present, we write out the regex to a file, and
4240 that is all. The first 8 bytes of the file are the regex length and then
4241 the study length, in big-endian order. */
4243 if (to_file != NULL)
4245 FILE *f = fopen((char *)to_file, "wb");
4246 if (f == NULL)
4248 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
4250 else
4252 pcre_uint8 sbuf[8];
4254 if (do_flip) regexflip(re, extra);
4255 sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
4256 sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
4257 sbuf[2] = (pcre_uint8)((true_size >> 8) & 255);
4258 sbuf[3] = (pcre_uint8)((true_size) & 255);
4259 sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
4260 sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
4261 sbuf[6] = (pcre_uint8)((true_study_size >> 8) & 255);
4262 sbuf[7] = (pcre_uint8)((true_study_size) & 255);
4264 if (fwrite(sbuf, 1, 8, f) < 8 ||
4265 fwrite(re, 1, true_size, f) < true_size)
4267 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
4269 else
4271 fprintf(outfile, "Compiled pattern written to %s\n", to_file);
4273 /* If there is study data, write it. */
4275 if (extra != NULL)
4277 if (fwrite(extra->study_data, 1, true_study_size, f) <
4278 true_study_size)
4280 fprintf(outfile, "Write error on %s: %s\n", to_file,
4281 strerror(errno));
4283 else fprintf(outfile, "Study data written to %s\n", to_file);
4286 fclose(f);
4289 new_free(re);
4290 if (extra != NULL)
4292 PCRE_FREE_STUDY(extra);
4294 if (locale_set)
4296 new_free((void *)tables);
4297 setlocale(LC_CTYPE, "C");
4298 locale_set = 0;
4300 continue; /* With next regex */
4302 } /* End of non-POSIX compile */
4304 /* Read data lines and test them */
4306 for (;;)
4308 #ifdef SUPPORT_PCRE8
4309 pcre_uint8 *q8;
4310 #endif
4311 #ifdef SUPPORT_PCRE16
4312 pcre_uint16 *q16;
4313 #endif
4314 #ifdef SUPPORT_PCRE32
4315 pcre_uint32 *q32;
4316 #endif
4317 pcre_uint8 *bptr;
4318 int *use_offsets = offsets;
4319 int use_size_offsets = size_offsets;
4320 int callout_data = 0;
4321 int callout_data_set = 0;
4322 int count;
4323 pcre_uint32 c;
4324 int copystrings = 0;
4325 int find_match_limit = default_find_match_limit;
4326 int getstrings = 0;
4327 int getlist = 0;
4328 int gmatched = 0;
4329 int start_offset = 0;
4330 int start_offset_sign = 1;
4331 int g_notempty = 0;
4332 int use_dfa = 0;
4334 *copynames = 0;
4335 *getnames = 0;
4337 #ifdef SUPPORT_PCRE32
4338 cn32ptr = copynames;
4339 gn32ptr = getnames;
4340 #endif
4341 #ifdef SUPPORT_PCRE16
4342 cn16ptr = copynames16;
4343 gn16ptr = getnames16;
4344 #endif
4345 #ifdef SUPPORT_PCRE8
4346 cn8ptr = copynames8;
4347 gn8ptr = getnames8;
4348 #endif
4350 SET_PCRE_CALLOUT(callout);
4351 first_callout = 1;
4352 last_callout_mark = NULL;
4353 callout_extra = 0;
4354 callout_count = 0;
4355 callout_fail_count = 999999;
4356 callout_fail_id = -1;
4357 show_malloc = 0;
4358 options = 0;
4360 if (extra != NULL) extra->flags &=
4361 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
4363 len = 0;
4364 for (;;)
4366 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
4368 if (len > 0) /* Reached EOF without hitting a newline */
4370 fprintf(outfile, "\n");
4371 break;
4373 done = 1;
4374 goto CONTINUE;
4376 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
4377 len = (int)strlen((char *)buffer);
4378 if (buffer[len-1] == '\n') break;
4381 while (len > 0 && isspace(buffer[len-1])) len--;
4382 buffer[len] = 0;
4383 if (len == 0) break;
4385 p = buffer;
4386 while (isspace(*p)) p++;
4388 #ifndef NOUTF
4389 /* Check that the data is well-formed UTF-8 if we're in UTF mode. To create
4390 invalid input to pcre_exec, you must use \x?? or \x{} sequences. */
4391 if (use_utf)
4393 pcre_uint8 *q;
4394 pcre_uint32 cc;
4395 int n = 1;
4397 for (q = p; n > 0 && *q; q += n) n = utf82ord(q, &cc);
4398 if (n <= 0)
4400 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be used as input in UTF mode\n");
4401 goto NEXT_DATA;
4404 #endif
4406 #ifdef SUPPORT_VALGRIND
4407 /* Mark the dbuffer as addressable but undefined again. */
4408 if (dbuffer != NULL)
4410 VALGRIND_MAKE_MEM_UNDEFINED(dbuffer, dbuffer_size * CHAR_SIZE);
4412 #endif
4414 /* Allocate a buffer to hold the data line. len+1 is an upper bound on
4415 the number of pcre_uchar units that will be needed. */
4416 if (dbuffer == NULL || (size_t)len >= dbuffer_size)
4418 dbuffer_size *= 2;
4419 dbuffer = (pcre_uint8 *)realloc(dbuffer, dbuffer_size * CHAR_SIZE);
4420 if (dbuffer == NULL)
4422 fprintf(stderr, "pcretest: malloc(%d) failed\n", (int)dbuffer_size);
4423 exit(1);
4427 #ifdef SUPPORT_PCRE8
4428 q8 = (pcre_uint8 *) dbuffer;
4429 #endif
4430 #ifdef SUPPORT_PCRE16
4431 q16 = (pcre_uint16 *) dbuffer;
4432 #endif
4433 #ifdef SUPPORT_PCRE32
4434 q32 = (pcre_uint32 *) dbuffer;
4435 #endif
4437 while ((c = *p++) != 0)
4439 int i = 0;
4440 int n = 0;
4442 /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
4443 In non-UTF mode, allow the value of the byte to fall through to later,
4444 where values greater than 127 are turned into UTF-8 when running in
4445 16-bit or 32-bit mode. */
4447 if (c != '\\')
4449 #ifndef NOUTF
4450 if (use_utf && HASUTF8EXTRALEN(c)) { GETUTF8INC(c, p); }
4451 #endif
4454 /* Handle backslash escapes */
4456 else switch ((c = *p++))
4458 case 'a': c = 7; break;
4459 case 'b': c = '\b'; break;
4460 case 'e': c = 27; break;
4461 case 'f': c = '\f'; break;
4462 case 'n': c = '\n'; break;
4463 case 'r': c = '\r'; break;
4464 case 't': c = '\t'; break;
4465 case 'v': c = '\v'; break;
4467 case '0': case '1': case '2': case '3':
4468 case '4': case '5': case '6': case '7':
4469 c -= '0';
4470 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
4471 c = c * 8 + *p++ - '0';
4472 break;
4474 case 'x':
4475 if (*p == '{')
4477 pcre_uint8 *pt = p;
4478 c = 0;
4480 /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
4481 when isxdigit() is a macro that refers to its argument more than
4482 once. This is banned by the C Standard, but apparently happens in at
4483 least one MacOS environment. */
4485 for (pt++; isxdigit(*pt); pt++)
4487 if (++i == 9)
4488 fprintf(outfile, "** Too many hex digits in \\x{...} item; "
4489 "using only the first eight.\n");
4490 else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
4492 if (*pt == '}')
4494 p = pt + 1;
4495 break;
4497 /* Not correct form for \x{...}; fall through */
4500 /* \x without {} always defines just one byte in 8-bit mode. This
4501 allows UTF-8 characters to be constructed byte by byte, and also allows
4502 invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
4503 Otherwise, pass it down to later code so that it can be turned into
4504 UTF-8 when running in 16/32-bit mode. */
4506 c = 0;
4507 while (i++ < 2 && isxdigit(*p))
4509 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
4510 p++;
4512 #if !defined NOUTF && defined SUPPORT_PCRE8
4513 if (use_utf && (pcre_mode == PCRE8_MODE))
4515 *q8++ = c;
4516 continue;
4518 #endif
4519 break;
4521 case 0: /* \ followed by EOF allows for an empty line */
4522 p--;
4523 continue;
4525 case '>':
4526 if (*p == '-')
4528 start_offset_sign = -1;
4529 p++;
4531 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
4532 start_offset *= start_offset_sign;
4533 continue;
4535 case 'A': /* Option setting */
4536 options |= PCRE_ANCHORED;
4537 continue;
4539 case 'B':
4540 options |= PCRE_NOTBOL;
4541 continue;
4543 case 'C':
4544 if (isdigit(*p)) /* Set copy string */
4546 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4547 copystrings |= 1 << n;
4549 else if (isalnum(*p))
4551 READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, &cn32ptr, re);
4553 else if (*p == '+')
4555 callout_extra = 1;
4556 p++;
4558 else if (*p == '-')
4560 SET_PCRE_CALLOUT(NULL);
4561 p++;
4563 else if (*p == '!')
4565 callout_fail_id = 0;
4566 p++;
4567 while(isdigit(*p))
4568 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
4569 callout_fail_count = 0;
4570 if (*p == '!')
4572 p++;
4573 while(isdigit(*p))
4574 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
4577 else if (*p == '*')
4579 int sign = 1;
4580 callout_data = 0;
4581 if (*(++p) == '-') { sign = -1; p++; }
4582 while(isdigit(*p))
4583 callout_data = callout_data * 10 + *p++ - '0';
4584 callout_data *= sign;
4585 callout_data_set = 1;
4587 continue;
4589 #if !defined NODFA
4590 case 'D':
4591 #if !defined NOPOSIX
4592 if (posix || do_posix)
4593 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
4594 else
4595 #endif
4596 use_dfa = 1;
4597 continue;
4598 #endif
4600 #if !defined NODFA
4601 case 'F':
4602 options |= PCRE_DFA_SHORTEST;
4603 continue;
4604 #endif
4606 case 'G':
4607 if (isdigit(*p))
4609 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4610 getstrings |= 1 << n;
4612 else if (isalnum(*p))
4614 READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, &gn32ptr, re);
4616 continue;
4618 case 'J':
4619 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4620 if (extra != NULL
4621 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
4622 && extra->executable_jit != NULL)
4624 if (jit_stack != NULL) { PCRE_JIT_STACK_FREE(jit_stack); }
4625 jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024);
4626 PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack);
4628 continue;
4630 case 'L':
4631 getlist = 1;
4632 continue;
4634 case 'M':
4635 find_match_limit = 1;
4636 continue;
4638 case 'N':
4639 if ((options & PCRE_NOTEMPTY) != 0)
4640 options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
4641 else
4642 options |= PCRE_NOTEMPTY;
4643 continue;
4645 case 'O':
4646 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4647 if (n > size_offsets_max)
4649 size_offsets_max = n;
4650 free(offsets);
4651 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
4652 if (offsets == NULL)
4654 printf("** Failed to get %d bytes of memory for offsets vector\n",
4655 (int)(size_offsets_max * sizeof(int)));
4656 yield = 1;
4657 goto EXIT;
4660 use_size_offsets = n;
4661 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
4662 else use_offsets = offsets + size_offsets_max - n; /* To catch overruns */
4663 continue;
4665 case 'P':
4666 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
4667 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
4668 continue;
4670 case 'Q':
4671 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4672 if (extra == NULL)
4674 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4675 extra->flags = 0;
4677 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
4678 extra->match_limit_recursion = n;
4679 continue;
4681 case 'q':
4682 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4683 if (extra == NULL)
4685 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4686 extra->flags = 0;
4688 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
4689 extra->match_limit = n;
4690 continue;
4692 #if !defined NODFA
4693 case 'R':
4694 options |= PCRE_DFA_RESTART;
4695 continue;
4696 #endif
4698 case 'S':
4699 show_malloc = 1;
4700 continue;
4702 case 'Y':
4703 options |= PCRE_NO_START_OPTIMIZE;
4704 continue;
4706 case 'Z':
4707 options |= PCRE_NOTEOL;
4708 continue;
4710 case '?':
4711 options |= PCRE_NO_UTF8_CHECK;
4712 continue;
4714 case '<':
4716 int x = check_newline(p, outfile);
4717 if (x == 0) goto NEXT_DATA;
4718 options |= x;
4719 while (*p++ != '>');
4721 continue;
4724 /* We now have a character value in c that may be greater than 255.
4725 In 8-bit mode we convert to UTF-8 if we are in UTF mode. Values greater
4726 than 127 in UTF mode must have come from \x{...} or octal constructs
4727 because values from \x.. get this far only in non-UTF mode. */
4729 #ifdef SUPPORT_PCRE8
4730 if (pcre_mode == PCRE8_MODE)
4732 #ifndef NOUTF
4733 if (use_utf)
4735 if (c > 0x7fffffff)
4737 fprintf(outfile, "** Character \\x{%x} is greater than 0x7fffffff "
4738 "and so cannot be converted to UTF-8\n", c);
4739 goto NEXT_DATA;
4741 q8 += ord2utf8(c, q8);
4743 else
4744 #endif
4746 if (c > 0xffu)
4748 fprintf(outfile, "** Character \\x{%x} is greater than 255 "
4749 "and UTF-8 mode is not enabled.\n", c);
4750 fprintf(outfile, "** Truncation will probably give the wrong "
4751 "result.\n");
4753 *q8++ = c;
4756 #endif
4757 #ifdef SUPPORT_PCRE16
4758 if (pcre_mode == PCRE16_MODE)
4760 #ifndef NOUTF
4761 if (use_utf)
4763 if (c > 0x10ffffu)
4765 fprintf(outfile, "** Failed: character \\x{%x} is greater than "
4766 "0x10ffff and so cannot be converted to UTF-16\n", c);
4767 goto NEXT_DATA;
4769 else if (c >= 0x10000u)
4771 c-= 0x10000u;
4772 *q16++ = 0xD800 | (c >> 10);
4773 *q16++ = 0xDC00 | (c & 0x3ff);
4775 else
4776 *q16++ = c;
4778 else
4779 #endif
4781 if (c > 0xffffu)
4783 fprintf(outfile, "** Character \\x{%x} is greater than 0xffff "
4784 "and UTF-16 mode is not enabled.\n", c);
4785 fprintf(outfile, "** Truncation will probably give the wrong "
4786 "result.\n");
4789 *q16++ = c;
4792 #endif
4793 #ifdef SUPPORT_PCRE32
4794 if (pcre_mode == PCRE32_MODE)
4796 *q32++ = c;
4798 #endif
4802 /* Reached end of subject string */
4804 #ifdef SUPPORT_PCRE8
4805 if (pcre_mode == PCRE8_MODE)
4807 *q8 = 0;
4808 len = (int)(q8 - (pcre_uint8 *)dbuffer);
4810 #endif
4811 #ifdef SUPPORT_PCRE16
4812 if (pcre_mode == PCRE16_MODE)
4814 *q16 = 0;
4815 len = (int)(q16 - (pcre_uint16 *)dbuffer);
4817 #endif
4818 #ifdef SUPPORT_PCRE32
4819 if (pcre_mode == PCRE32_MODE)
4821 *q32 = 0;
4822 len = (int)(q32 - (pcre_uint32 *)dbuffer);
4824 #endif
4826 /* If we're compiling with explicit valgrind support, Mark the data from after
4827 its end to the end of the buffer as unaddressable, so that a read over the end
4828 of the buffer will be seen by valgrind, even if it doesn't cause a crash.
4829 If we're not building with valgrind support, at least move the data to the end
4830 of the buffer so that it might at least cause a crash.
4831 If we are using the POSIX interface, we must include the terminating zero. */
4833 bptr = dbuffer;
4835 #if !defined NOPOSIX
4836 if (posix || do_posix)
4838 #ifdef SUPPORT_VALGRIND
4839 VALGRIND_MAKE_MEM_NOACCESS(dbuffer + len + 1, dbuffer_size - (len + 1));
4840 #else
4841 memmove(bptr + dbuffer_size - len - 1, bptr, len + 1);
4842 bptr += dbuffer_size - len - 1;
4843 #endif
4845 else
4846 #endif
4848 #ifdef SUPPORT_VALGRIND
4849 VALGRIND_MAKE_MEM_NOACCESS(dbuffer + len * CHAR_SIZE, (dbuffer_size - len) * CHAR_SIZE);
4850 #else
4851 bptr = memmove(bptr + (dbuffer_size - len) * CHAR_SIZE, bptr, len * CHAR_SIZE);
4852 #endif
4855 if ((all_use_dfa || use_dfa) && find_match_limit)
4857 printf("**Match limit not relevant for DFA matching: ignored\n");
4858 find_match_limit = 0;
4861 /* Handle matching via the POSIX interface, which does not
4862 support timing or playing with the match limit or callout data. */
4864 #if !defined NOPOSIX
4865 if (posix || do_posix)
4867 int rc;
4868 int eflags = 0;
4869 regmatch_t *pmatch = NULL;
4870 if (use_size_offsets > 0)
4871 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
4872 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
4873 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
4874 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
4876 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
4878 if (rc != 0)
4880 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
4881 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
4883 else if ((REAL_PCRE_OPTIONS(preg.re_pcre) & PCRE_NO_AUTO_CAPTURE) != 0)
4885 fprintf(outfile, "Matched with REG_NOSUB\n");
4887 else
4889 size_t i;
4890 for (i = 0; i < (size_t)use_size_offsets; i++)
4892 if (pmatch[i].rm_so >= 0)
4894 fprintf(outfile, "%2d: ", (int)i);
4895 PCHARSV(dbuffer, pmatch[i].rm_so,
4896 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
4897 fprintf(outfile, "\n");
4898 if (do_showcaprest || (i == 0 && do_showrest))
4900 fprintf(outfile, "%2d+ ", (int)i);
4901 PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
4902 outfile);
4903 fprintf(outfile, "\n");
4908 free(pmatch);
4909 goto NEXT_DATA;
4912 #endif /* !defined NOPOSIX */
4914 /* Handle matching via the native interface - repeats for /g and /G */
4916 /* Ensure that there is a JIT callback if we want to verify that JIT was
4917 actually used. If jit_stack == NULL, no stack has yet been assigned. */
4919 if (verify_jit && jit_stack == NULL && extra != NULL)
4920 { PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack); }
4922 for (;; gmatched++) /* Loop for /g or /G */
4924 markptr = NULL;
4925 jit_was_used = FALSE;
4927 if (timeitm > 0)
4929 register int i;
4930 clock_t time_taken;
4931 clock_t start_time = clock();
4933 #if !defined NODFA
4934 if (all_use_dfa || use_dfa)
4936 if ((options & PCRE_DFA_RESTART) != 0)
4938 fprintf(outfile, "Timing DFA restarts is not supported\n");
4939 break;
4941 if (dfa_workspace == NULL)
4942 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
4943 for (i = 0; i < timeitm; i++)
4945 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
4946 (options | g_notempty), use_offsets, use_size_offsets,
4947 dfa_workspace, DFA_WS_DIMENSION);
4950 else
4951 #endif
4953 for (i = 0; i < timeitm; i++)
4955 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
4956 (options | g_notempty), use_offsets, use_size_offsets);
4958 time_taken = clock() - start_time;
4959 fprintf(outfile, "Execute time %.4f milliseconds\n",
4960 (((double)time_taken * 1000.0) / (double)timeitm) /
4961 (double)CLOCKS_PER_SEC);
4964 /* If find_match_limit is set, we want to do repeated matches with
4965 varying limits in order to find the minimum value for the match limit and
4966 for the recursion limit. The match limits are relevant only to the normal
4967 running of pcre_exec(), so disable the JIT optimization. This makes it
4968 possible to run the same set of tests with and without JIT externally
4969 requested. */
4971 if (find_match_limit)
4973 if (extra != NULL) { PCRE_FREE_STUDY(extra); }
4974 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4975 extra->flags = 0;
4977 (void)check_match_limit(re, extra, bptr, len, start_offset,
4978 options|g_notempty, use_offsets, use_size_offsets,
4979 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
4980 PCRE_ERROR_MATCHLIMIT, "match()");
4982 count = check_match_limit(re, extra, bptr, len, start_offset,
4983 options|g_notempty, use_offsets, use_size_offsets,
4984 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
4985 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
4988 /* If callout_data is set, use the interface with additional data */
4990 else if (callout_data_set)
4992 if (extra == NULL)
4994 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4995 extra->flags = 0;
4997 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
4998 extra->callout_data = &callout_data;
4999 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
5000 options | g_notempty, use_offsets, use_size_offsets);
5001 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
5004 /* The normal case is just to do the match once, with the default
5005 value of match_limit. */
5007 #if !defined NODFA
5008 else if (all_use_dfa || use_dfa)
5010 if (dfa_workspace == NULL)
5011 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
5012 if (dfa_matched++ == 0)
5013 dfa_workspace[0] = -1; /* To catch bad restart */
5014 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
5015 (options | g_notempty), use_offsets, use_size_offsets, dfa_workspace,
5016 DFA_WS_DIMENSION);
5017 if (count == 0)
5019 fprintf(outfile, "Matched, but too many subsidiary matches\n");
5020 count = use_size_offsets/2;
5023 #endif
5025 else
5027 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
5028 options | g_notempty, use_offsets, use_size_offsets);
5029 if (count == 0)
5031 fprintf(outfile, "Matched, but too many substrings\n");
5032 count = use_size_offsets/3;
5036 /* Matched */
5038 if (count >= 0)
5040 int i, maxcount;
5041 void *cnptr, *gnptr;
5043 #if !defined NODFA
5044 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
5045 #endif
5046 maxcount = use_size_offsets/3;
5048 /* This is a check against a lunatic return value. */
5050 if (count > maxcount)
5052 fprintf(outfile,
5053 "** PCRE error: returned count %d is too big for offset size %d\n",
5054 count, use_size_offsets);
5055 count = use_size_offsets/3;
5056 if (do_g || do_G)
5058 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
5059 do_g = do_G = FALSE; /* Break g/G loop */
5063 /* do_allcaps requests showing of all captures in the pattern, to check
5064 unset ones at the end. */
5066 if (do_allcaps)
5068 if (new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) < 0)
5069 goto SKIP_DATA;
5070 count++; /* Allow for full match */
5071 if (count * 2 > use_size_offsets) count = use_size_offsets/2;
5074 /* Output the captured substrings */
5076 for (i = 0; i < count * 2; i += 2)
5078 if (use_offsets[i] < 0)
5080 if (use_offsets[i] != -1)
5081 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
5082 use_offsets[i], i);
5083 if (use_offsets[i+1] != -1)
5084 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
5085 use_offsets[i+1], i+1);
5086 fprintf(outfile, "%2d: <unset>\n", i/2);
5088 else
5090 fprintf(outfile, "%2d: ", i/2);
5091 PCHARSV(bptr, use_offsets[i],
5092 use_offsets[i+1] - use_offsets[i], outfile);
5093 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
5094 fprintf(outfile, "\n");
5095 if (do_showcaprest || (i == 0 && do_showrest))
5097 fprintf(outfile, "%2d+ ", i/2);
5098 PCHARSV(bptr, use_offsets[i+1], len - use_offsets[i+1],
5099 outfile);
5100 fprintf(outfile, "\n");
5105 if (markptr != NULL)
5107 fprintf(outfile, "MK: ");
5108 PCHARSV(markptr, 0, -1, outfile);
5109 fprintf(outfile, "\n");
5112 for (i = 0; i < 32; i++)
5114 if ((copystrings & (1 << i)) != 0)
5116 int rc;
5117 char copybuffer[256];
5118 PCRE_COPY_SUBSTRING(rc, bptr, use_offsets, count, i,
5119 copybuffer, sizeof(copybuffer));
5120 if (rc < 0)
5121 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
5122 else
5124 fprintf(outfile, "%2dC ", i);
5125 PCHARSV(copybuffer, 0, rc, outfile);
5126 fprintf(outfile, " (%d)\n", rc);
5131 cnptr = copynames;
5132 for (;;)
5134 int rc;
5135 char copybuffer[256];
5137 #ifdef SUPPORT_PCRE32
5138 if (pcre_mode == PCRE32_MODE)
5140 if (*(pcre_uint32 *)cnptr == 0) break;
5142 #endif
5143 #ifdef SUPPORT_PCRE16
5144 if (pcre_mode == PCRE16_MODE)
5146 if (*(pcre_uint16 *)cnptr == 0) break;
5148 #endif
5149 #ifdef SUPPORT_PCRE8
5150 if (pcre_mode == PCRE8_MODE)
5152 if (*(pcre_uint8 *)cnptr == 0) break;
5154 #endif
5156 PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
5157 cnptr, copybuffer, sizeof(copybuffer));
5159 if (rc < 0)
5161 fprintf(outfile, "copy substring ");
5162 PCHARSV(cnptr, 0, -1, outfile);
5163 fprintf(outfile, " failed %d\n", rc);
5165 else
5167 fprintf(outfile, " C ");
5168 PCHARSV(copybuffer, 0, rc, outfile);
5169 fprintf(outfile, " (%d) ", rc);
5170 PCHARSV(cnptr, 0, -1, outfile);
5171 putc('\n', outfile);
5174 cnptr = (char *)cnptr + (STRLEN(cnptr) + 1) * CHAR_SIZE;
5177 for (i = 0; i < 32; i++)
5179 if ((getstrings & (1 << i)) != 0)
5181 int rc;
5182 const char *substring;
5183 PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, &substring);
5184 if (rc < 0)
5185 fprintf(outfile, "get substring %d failed %d\n", i, rc);
5186 else
5188 fprintf(outfile, "%2dG ", i);
5189 PCHARSV(substring, 0, rc, outfile);
5190 fprintf(outfile, " (%d)\n", rc);
5191 PCRE_FREE_SUBSTRING(substring);
5196 gnptr = getnames;
5197 for (;;)
5199 int rc;
5200 const char *substring;
5202 #ifdef SUPPORT_PCRE32
5203 if (pcre_mode == PCRE32_MODE)
5205 if (*(pcre_uint32 *)gnptr == 0) break;
5207 #endif
5208 #ifdef SUPPORT_PCRE16
5209 if (pcre_mode == PCRE16_MODE)
5211 if (*(pcre_uint16 *)gnptr == 0) break;
5213 #endif
5214 #ifdef SUPPORT_PCRE8
5215 if (pcre_mode == PCRE8_MODE)
5217 if (*(pcre_uint8 *)gnptr == 0) break;
5219 #endif
5221 PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
5222 gnptr, &substring);
5223 if (rc < 0)
5225 fprintf(outfile, "get substring ");
5226 PCHARSV(gnptr, 0, -1, outfile);
5227 fprintf(outfile, " failed %d\n", rc);
5229 else
5231 fprintf(outfile, " G ");
5232 PCHARSV(substring, 0, rc, outfile);
5233 fprintf(outfile, " (%d) ", rc);
5234 PCHARSV(gnptr, 0, -1, outfile);
5235 PCRE_FREE_SUBSTRING(substring);
5236 putc('\n', outfile);
5239 gnptr = (char *)gnptr + (STRLEN(gnptr) + 1) * CHAR_SIZE;
5242 if (getlist)
5244 int rc;
5245 const char **stringlist;
5246 PCRE_GET_SUBSTRING_LIST(rc, bptr, use_offsets, count, &stringlist);
5247 if (rc < 0)
5248 fprintf(outfile, "get substring list failed %d\n", rc);
5249 else
5251 for (i = 0; i < count; i++)
5253 fprintf(outfile, "%2dL ", i);
5254 PCHARSV(stringlist[i], 0, -1, outfile);
5255 putc('\n', outfile);
5257 if (stringlist[i] != NULL)
5258 fprintf(outfile, "string list not terminated by NULL\n");
5259 PCRE_FREE_SUBSTRING_LIST(stringlist);
5264 /* There was a partial match */
5266 else if (count == PCRE_ERROR_PARTIAL)
5268 if (markptr == NULL) fprintf(outfile, "Partial match");
5269 else
5271 fprintf(outfile, "Partial match, mark=");
5272 PCHARSV(markptr, 0, -1, outfile);
5274 if (use_size_offsets > 1)
5276 fprintf(outfile, ": ");
5277 PCHARSV(bptr, use_offsets[0], use_offsets[1] - use_offsets[0],
5278 outfile);
5280 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
5281 fprintf(outfile, "\n");
5282 break; /* Out of the /g loop */
5285 /* Failed to match. If this is a /g or /G loop and we previously set
5286 g_notempty after a null match, this is not necessarily the end. We want
5287 to advance the start offset, and continue. We won't be at the end of the
5288 string - that was checked before setting g_notempty.
5290 Complication arises in the case when the newline convention is "any",
5291 "crlf", or "anycrlf". If the previous match was at the end of a line
5292 terminated by CRLF, an advance of one character just passes the \r,
5293 whereas we should prefer the longer newline sequence, as does the code in
5294 pcre_exec(). Fudge the offset value to achieve this. We check for a
5295 newline setting in the pattern; if none was set, use PCRE_CONFIG() to
5296 find the default.
5298 Otherwise, in the case of UTF-8 matching, the advance must be one
5299 character, not one byte. */
5301 else
5303 if (g_notempty != 0)
5305 int onechar = 1;
5306 unsigned int obits = REAL_PCRE_OPTIONS(re);
5307 use_offsets[0] = start_offset;
5308 if ((obits & PCRE_NEWLINE_BITS) == 0)
5310 int d;
5311 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &d);
5312 /* Note that these values are always the ASCII ones, even in
5313 EBCDIC environments. CR = 13, NL = 10. */
5314 obits = (d == 13)? PCRE_NEWLINE_CR :
5315 (d == 10)? PCRE_NEWLINE_LF :
5316 (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
5317 (d == -2)? PCRE_NEWLINE_ANYCRLF :
5318 (d == -1)? PCRE_NEWLINE_ANY : 0;
5320 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
5321 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
5322 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
5324 start_offset < len - 1 && (
5325 #ifdef SUPPORT_PCRE8
5326 (pcre_mode == PCRE8_MODE &&
5327 bptr[start_offset] == '\r' &&
5328 bptr[start_offset + 1] == '\n') ||
5329 #endif
5330 #ifdef SUPPORT_PCRE16
5331 (pcre_mode == PCRE16_MODE &&
5332 ((PCRE_SPTR16)bptr)[start_offset] == '\r' &&
5333 ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n') ||
5334 #endif
5335 #ifdef SUPPORT_PCRE32
5336 (pcre_mode == PCRE32_MODE &&
5337 ((PCRE_SPTR32)bptr)[start_offset] == '\r' &&
5338 ((PCRE_SPTR32)bptr)[start_offset + 1] == '\n') ||
5339 #endif
5341 onechar++;
5342 else if (use_utf)
5344 while (start_offset + onechar < len)
5346 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
5347 onechar++;
5350 use_offsets[1] = start_offset + onechar;
5352 else
5354 switch(count)
5356 case PCRE_ERROR_NOMATCH:
5357 if (gmatched == 0)
5359 if (markptr == NULL)
5361 fprintf(outfile, "No match");
5363 else
5365 fprintf(outfile, "No match, mark = ");
5366 PCHARSV(markptr, 0, -1, outfile);
5368 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
5369 putc('\n', outfile);
5371 break;
5373 case PCRE_ERROR_BADUTF8:
5374 case PCRE_ERROR_SHORTUTF8:
5375 fprintf(outfile, "Error %d (%s UTF-%d string)", count,
5376 (count == PCRE_ERROR_BADUTF8)? "bad" : "short",
5377 8 * CHAR_SIZE);
5378 if (use_size_offsets >= 2)
5379 fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
5380 use_offsets[1]);
5381 fprintf(outfile, "\n");
5382 break;
5384 case PCRE_ERROR_BADUTF8_OFFSET:
5385 fprintf(outfile, "Error %d (bad UTF-%d offset)\n", count,
5386 8 * CHAR_SIZE);
5387 break;
5389 default:
5390 if (count < 0 &&
5391 (-count) < (int)(sizeof(errtexts)/sizeof(const char *)))
5392 fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
5393 else
5394 fprintf(outfile, "Error %d (Unexpected value)\n", count);
5395 break;
5398 break; /* Out of the /g loop */
5402 /* If not /g or /G we are done */
5404 if (!do_g && !do_G) break;
5406 /* If we have matched an empty string, first check to see if we are at
5407 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
5408 Perl's /g options does. This turns out to be rather cunning. First we set
5409 PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
5410 same point. If this fails (picked up above) we advance to the next
5411 character. */
5413 g_notempty = 0;
5415 if (use_offsets[0] == use_offsets[1])
5417 if (use_offsets[0] == len) break;
5418 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
5421 /* For /g, update the start offset, leaving the rest alone */
5423 if (do_g) start_offset = use_offsets[1];
5425 /* For /G, update the pointer and length */
5427 else
5429 bptr += use_offsets[1] * CHAR_SIZE;
5430 len -= use_offsets[1];
5432 } /* End of loop for /g and /G */
5434 NEXT_DATA: continue;
5435 } /* End of loop for data lines */
5437 CONTINUE:
5439 #if !defined NOPOSIX
5440 if (posix || do_posix) regfree(&preg);
5441 #endif
5443 if (re != NULL) new_free(re);
5444 if (extra != NULL)
5446 PCRE_FREE_STUDY(extra);
5448 if (locale_set)
5450 new_free((void *)tables);
5451 setlocale(LC_CTYPE, "C");
5452 locale_set = 0;
5454 if (jit_stack != NULL)
5456 PCRE_JIT_STACK_FREE(jit_stack);
5457 jit_stack = NULL;
5461 if (infile == stdin) fprintf(outfile, "\n");
5463 EXIT:
5465 if (infile != NULL && infile != stdin) fclose(infile);
5466 if (outfile != NULL && outfile != stdout) fclose(outfile);
5468 free(buffer);
5469 free(dbuffer);
5470 free(pbuffer);
5471 free(offsets);
5473 #ifdef SUPPORT_PCRE16
5474 if (buffer16 != NULL) free(buffer16);
5475 #endif
5476 #ifdef SUPPORT_PCRE32
5477 if (buffer32 != NULL) free(buffer32);
5478 #endif
5480 #if !defined NODFA
5481 if (dfa_workspace != NULL)
5482 free(dfa_workspace);
5483 #endif
5485 return yield;
5488 /* End of pcretest.c */