tgupdate: merge pcreposix-compat base into pcreposix-compat
[pcreposix-compat.git] / pcre_jit_compile.c
blob21408d57856de2a6bac3a54470c3984b9c493033
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
8 Written by Philip Hazel
9 Copyright (c) 1997-2013 University of Cambridge
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2013
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
47 #include "pcre_internal.h"
49 #if !defined SUPPORT_JIT
51 /* Stubs for clients compiled against pcre_jit_exec (and friends)
52 so they gracefully fall back to non-JIT. */
54 #if defined COMPILE_PCRE8
55 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
56 pcre_jit_exec(const pcre *argument_re, const pcre_extra *extra_data,
57 PCRE_SPTR subject, int length, int start_offset, int options,
58 int *offsets, int offset_count, pcre_jit_stack *stack)
59 #elif defined COMPILE_PCRE16
60 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
61 pcre16_jit_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
62 PCRE_SPTR16 subject, int length, int start_offset, int options,
63 int *offsets, int offset_count, pcre16_jit_stack *stack)
64 #elif defined COMPILE_PCRE32
65 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
66 pcre32_jit_exec(const pcre32 *argument_re, const pcre32_extra *extra_data,
67 PCRE_SPTR32 subject, int length, int start_offset, int options,
68 int *offsets, int offset_count, pcre32_jit_stack *stack)
69 #endif
71 (void)stack;
72 return
73 #if defined COMPILE_PCRE8
74 pcre_exec
75 #elif defined COMPILE_PCRE16
76 pcre16_exec
77 #elif defined COMPILE_PCRE32
78 pcre32_exec
79 #endif
80 (argument_re, extra_data, subject, length, start_offset, options, offsets, offset_count);
83 #endif /* !SUPPORT_JIT stubs */
85 #if defined SUPPORT_JIT
87 /* All-in-one: Since we use the JIT compiler only from here,
88 we just include it. This way we don't need to touch the build
89 system files. */
91 #define SLJIT_MALLOC(size, allocator_data) (PUBL(malloc))(size)
92 #define SLJIT_FREE(ptr, allocator_data) (PUBL(free))(ptr)
93 #define SLJIT_CONFIG_AUTO 1
94 #define SLJIT_CONFIG_STATIC 1
95 #define SLJIT_VERBOSE 0
96 #define SLJIT_DEBUG 0
98 #include "sljit/sljitLir.c"
100 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
101 #error Unsupported architecture
102 #endif
104 /* Defines for debugging purposes. */
106 /* 1 - Use unoptimized capturing brackets.
107 2 - Enable capture_last_ptr (includes option 1). */
108 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
110 /* 1 - Always have a control head. */
111 /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
113 /* Allocate memory for the regex stack on the real machine stack.
114 Fast, but limited size. */
115 #define MACHINE_STACK_SIZE 32768
117 /* Growth rate for stack allocated by the OS. Should be the multiply
118 of page size. */
119 #define STACK_GROWTH_RATE 8192
121 /* Enable to check that the allocation could destroy temporaries. */
122 #if defined SLJIT_DEBUG && SLJIT_DEBUG
123 #define DESTROY_REGISTERS 1
124 #endif
127 Short summary about the backtracking mechanism empolyed by the jit code generator:
129 The code generator follows the recursive nature of the PERL compatible regular
130 expressions. The basic blocks of regular expressions are condition checkers
131 whose execute different commands depending on the result of the condition check.
132 The relationship between the operators can be horizontal (concatenation) and
133 vertical (sub-expression) (See struct backtrack_common for more details).
135 'ab' - 'a' and 'b' regexps are concatenated
136 'a+' - 'a' is the sub-expression of the '+' operator
138 The condition checkers are boolean (true/false) checkers. Machine code is generated
139 for the checker itself and for the actions depending on the result of the checker.
140 The 'true' case is called as the matching path (expected path), and the other is called as
141 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
142 branches on the matching path.
144 Greedy star operator (*) :
145 Matching path: match happens.
146 Backtrack path: match failed.
147 Non-greedy star operator (*?) :
148 Matching path: no need to perform a match.
149 Backtrack path: match is required.
151 The following example shows how the code generated for a capturing bracket
152 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
153 we have the following regular expression:
155 A(B|C)D
157 The generated code will be the following:
159 A matching path
160 '(' matching path (pushing arguments to the stack)
161 B matching path
162 ')' matching path (pushing arguments to the stack)
163 D matching path
164 return with successful match
166 D backtrack path
167 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
168 B backtrack path
169 C expected path
170 jump to D matching path
171 C backtrack path
172 A backtrack path
174 Notice, that the order of backtrack code paths are the opposite of the fast
175 code paths. In this way the topmost value on the stack is always belong
176 to the current backtrack code path. The backtrack path must check
177 whether there is a next alternative. If so, it needs to jump back to
178 the matching path eventually. Otherwise it needs to clear out its own stack
179 frame and continue the execution on the backtrack code paths.
183 Saved stack frames:
185 Atomic blocks and asserts require reloading the values of private data
186 when the backtrack mechanism performed. Because of OP_RECURSE, the data
187 are not necessarly known in compile time, thus we need a dynamic restore
188 mechanism.
190 The stack frames are stored in a chain list, and have the following format:
191 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
193 Thus we can restore the private data to a particular point in the stack.
196 typedef struct jit_arguments {
197 /* Pointers first. */
198 struct sljit_stack *stack;
199 const pcre_uchar *str;
200 const pcre_uchar *begin;
201 const pcre_uchar *end;
202 int *offsets;
203 pcre_uchar *mark_ptr;
204 void *callout_data;
205 /* Everything else after. */
206 sljit_u32 limit_match;
207 int real_offset_count;
208 int offset_count;
209 sljit_u8 notbol;
210 sljit_u8 noteol;
211 sljit_u8 notempty;
212 sljit_u8 notempty_atstart;
213 } jit_arguments;
215 typedef struct executable_functions {
216 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
217 void *read_only_data_heads[JIT_NUMBER_OF_COMPILE_MODES];
218 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
219 PUBL(jit_callback) callback;
220 void *userdata;
221 sljit_u32 top_bracket;
222 sljit_u32 limit_match;
223 } executable_functions;
225 typedef struct jump_list {
226 struct sljit_jump *jump;
227 struct jump_list *next;
228 } jump_list;
230 typedef struct stub_list {
231 struct sljit_jump *start;
232 struct sljit_label *quit;
233 struct stub_list *next;
234 } stub_list;
236 typedef struct label_addr_list {
237 struct sljit_label *label;
238 sljit_uw *update_addr;
239 struct label_addr_list *next;
240 } label_addr_list;
242 enum frame_types {
243 no_frame = -1,
244 no_stack = -2
247 enum control_types {
248 type_mark = 0,
249 type_then_trap = 1
252 typedef int (SLJIT_FUNC *jit_function)(jit_arguments *args);
254 /* The following structure is the key data type for the recursive
255 code generator. It is allocated by compile_matchingpath, and contains
256 the arguments for compile_backtrackingpath. Must be the first member
257 of its descendants. */
258 typedef struct backtrack_common {
259 /* Concatenation stack. */
260 struct backtrack_common *prev;
261 jump_list *nextbacktracks;
262 /* Internal stack (for component operators). */
263 struct backtrack_common *top;
264 jump_list *topbacktracks;
265 /* Opcode pointer. */
266 pcre_uchar *cc;
267 } backtrack_common;
269 typedef struct assert_backtrack {
270 backtrack_common common;
271 jump_list *condfailed;
272 /* Less than 0 if a frame is not needed. */
273 int framesize;
274 /* Points to our private memory word on the stack. */
275 int private_data_ptr;
276 /* For iterators. */
277 struct sljit_label *matchingpath;
278 } assert_backtrack;
280 typedef struct bracket_backtrack {
281 backtrack_common common;
282 /* Where to coninue if an alternative is successfully matched. */
283 struct sljit_label *alternative_matchingpath;
284 /* For rmin and rmax iterators. */
285 struct sljit_label *recursive_matchingpath;
286 /* For greedy ? operator. */
287 struct sljit_label *zero_matchingpath;
288 /* Contains the branches of a failed condition. */
289 union {
290 /* Both for OP_COND, OP_SCOND. */
291 jump_list *condfailed;
292 assert_backtrack *assert;
293 /* For OP_ONCE. Less than 0 if not needed. */
294 int framesize;
295 } u;
296 /* Points to our private memory word on the stack. */
297 int private_data_ptr;
298 } bracket_backtrack;
300 typedef struct bracketpos_backtrack {
301 backtrack_common common;
302 /* Points to our private memory word on the stack. */
303 int private_data_ptr;
304 /* Reverting stack is needed. */
305 int framesize;
306 /* Allocated stack size. */
307 int stacksize;
308 } bracketpos_backtrack;
310 typedef struct braminzero_backtrack {
311 backtrack_common common;
312 struct sljit_label *matchingpath;
313 } braminzero_backtrack;
315 typedef struct char_iterator_backtrack {
316 backtrack_common common;
317 /* Next iteration. */
318 struct sljit_label *matchingpath;
319 union {
320 jump_list *backtracks;
321 struct {
322 unsigned int othercasebit;
323 pcre_uchar chr;
324 BOOL enabled;
325 } charpos;
326 } u;
327 } char_iterator_backtrack;
329 typedef struct ref_iterator_backtrack {
330 backtrack_common common;
331 /* Next iteration. */
332 struct sljit_label *matchingpath;
333 } ref_iterator_backtrack;
335 typedef struct recurse_entry {
336 struct recurse_entry *next;
337 /* Contains the function entry. */
338 struct sljit_label *entry;
339 /* Collects the calls until the function is not created. */
340 jump_list *calls;
341 /* Points to the starting opcode. */
342 sljit_sw start;
343 } recurse_entry;
345 typedef struct recurse_backtrack {
346 backtrack_common common;
347 BOOL inlined_pattern;
348 } recurse_backtrack;
350 #define OP_THEN_TRAP OP_TABLE_LENGTH
352 typedef struct then_trap_backtrack {
353 backtrack_common common;
354 /* If then_trap is not NULL, this structure contains the real
355 then_trap for the backtracking path. */
356 struct then_trap_backtrack *then_trap;
357 /* Points to the starting opcode. */
358 sljit_sw start;
359 /* Exit point for the then opcodes of this alternative. */
360 jump_list *quit;
361 /* Frame size of the current alternative. */
362 int framesize;
363 } then_trap_backtrack;
365 #define MAX_RANGE_SIZE 4
367 typedef struct compiler_common {
368 /* The sljit ceneric compiler. */
369 struct sljit_compiler *compiler;
370 /* First byte code. */
371 pcre_uchar *start;
372 /* Maps private data offset to each opcode. */
373 sljit_s32 *private_data_ptrs;
374 /* Chain list of read-only data ptrs. */
375 void *read_only_data_head;
376 /* Tells whether the capturing bracket is optimized. */
377 sljit_u8 *optimized_cbracket;
378 /* Tells whether the starting offset is a target of then. */
379 sljit_u8 *then_offsets;
380 /* Current position where a THEN must jump. */
381 then_trap_backtrack *then_trap;
382 /* Starting offset of private data for capturing brackets. */
383 sljit_s32 cbra_ptr;
384 /* Output vector starting point. Must be divisible by 2. */
385 sljit_s32 ovector_start;
386 /* Points to the starting character of the current match. */
387 sljit_s32 start_ptr;
388 /* Last known position of the requested byte. */
389 sljit_s32 req_char_ptr;
390 /* Head of the last recursion. */
391 sljit_s32 recursive_head_ptr;
392 /* First inspected character for partial matching.
393 (Needed for avoiding zero length partial matches.) */
394 sljit_s32 start_used_ptr;
395 /* Starting pointer for partial soft matches. */
396 sljit_s32 hit_start;
397 /* Pointer of the match end position. */
398 sljit_s32 match_end_ptr;
399 /* Points to the marked string. */
400 sljit_s32 mark_ptr;
401 /* Recursive control verb management chain. */
402 sljit_s32 control_head_ptr;
403 /* Points to the last matched capture block index. */
404 sljit_s32 capture_last_ptr;
405 /* Fast forward skipping byte code pointer. */
406 pcre_uchar *fast_forward_bc_ptr;
407 /* Locals used by fast fail optimization. */
408 sljit_s32 fast_fail_start_ptr;
409 sljit_s32 fast_fail_end_ptr;
411 /* Flipped and lower case tables. */
412 const sljit_u8 *fcc;
413 sljit_sw lcc;
414 /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
415 int mode;
416 /* TRUE, when minlength is greater than 0. */
417 BOOL might_be_empty;
418 /* \K is found in the pattern. */
419 BOOL has_set_som;
420 /* (*SKIP:arg) is found in the pattern. */
421 BOOL has_skip_arg;
422 /* (*THEN) is found in the pattern. */
423 BOOL has_then;
424 /* (*SKIP) or (*SKIP:arg) is found in lookbehind assertion. */
425 BOOL has_skip_in_assert_back;
426 /* Currently in recurse or negative assert. */
427 BOOL local_exit;
428 /* Currently in a positive assert. */
429 BOOL positive_assert;
430 /* Newline control. */
431 int nltype;
432 sljit_u32 nlmax;
433 sljit_u32 nlmin;
434 int newline;
435 int bsr_nltype;
436 sljit_u32 bsr_nlmax;
437 sljit_u32 bsr_nlmin;
438 /* Dollar endonly. */
439 int endonly;
440 /* Tables. */
441 sljit_sw ctypes;
442 /* Named capturing brackets. */
443 pcre_uchar *name_table;
444 sljit_sw name_count;
445 sljit_sw name_entry_size;
447 /* Labels and jump lists. */
448 struct sljit_label *partialmatchlabel;
449 struct sljit_label *quit_label;
450 struct sljit_label *forced_quit_label;
451 struct sljit_label *accept_label;
452 struct sljit_label *ff_newline_shortcut;
453 stub_list *stubs;
454 label_addr_list *label_addrs;
455 recurse_entry *entries;
456 recurse_entry *currententry;
457 jump_list *partialmatch;
458 jump_list *quit;
459 jump_list *positive_assert_quit;
460 jump_list *forced_quit;
461 jump_list *accept;
462 jump_list *calllimit;
463 jump_list *stackalloc;
464 jump_list *revertframes;
465 jump_list *wordboundary;
466 jump_list *anynewline;
467 jump_list *hspace;
468 jump_list *vspace;
469 jump_list *casefulcmp;
470 jump_list *caselesscmp;
471 jump_list *reset_match;
472 BOOL jscript_compat;
473 #ifdef SUPPORT_UTF
474 BOOL utf;
475 #ifdef SUPPORT_UCP
476 BOOL use_ucp;
477 jump_list *getucd;
478 #endif
479 #ifdef COMPILE_PCRE8
480 jump_list *utfreadchar;
481 jump_list *utfreadchar16;
482 jump_list *utfreadtype8;
483 #endif
484 #endif /* SUPPORT_UTF */
485 } compiler_common;
487 /* For byte_sequence_compare. */
489 typedef struct compare_context {
490 int length;
491 int sourcereg;
492 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
493 int ucharptr;
494 union {
495 sljit_s32 asint;
496 sljit_u16 asushort;
497 #if defined COMPILE_PCRE8
498 sljit_u8 asbyte;
499 sljit_u8 asuchars[4];
500 #elif defined COMPILE_PCRE16
501 sljit_u16 asuchars[2];
502 #elif defined COMPILE_PCRE32
503 sljit_u32 asuchars[1];
504 #endif
505 } c;
506 union {
507 sljit_s32 asint;
508 sljit_u16 asushort;
509 #if defined COMPILE_PCRE8
510 sljit_u8 asbyte;
511 sljit_u8 asuchars[4];
512 #elif defined COMPILE_PCRE16
513 sljit_u16 asuchars[2];
514 #elif defined COMPILE_PCRE32
515 sljit_u32 asuchars[1];
516 #endif
517 } oc;
518 #endif
519 } compare_context;
521 /* Undefine sljit macros. */
522 #undef CMP
524 /* Used for accessing the elements of the stack. */
525 #define STACK(i) ((i) * (int)sizeof(sljit_sw))
527 #ifdef SLJIT_PREF_SHIFT_REG
528 #if SLJIT_PREF_SHIFT_REG == SLJIT_R2
529 /* Nothing. */
530 #elif SLJIT_PREF_SHIFT_REG == SLJIT_R3
531 #define SHIFT_REG_IS_R3
532 #else
533 #error "Unsupported shift register"
534 #endif
535 #endif
537 #define TMP1 SLJIT_R0
538 #ifdef SHIFT_REG_IS_R3
539 #define TMP2 SLJIT_R3
540 #define TMP3 SLJIT_R2
541 #else
542 #define TMP2 SLJIT_R2
543 #define TMP3 SLJIT_R3
544 #endif
545 #define STR_PTR SLJIT_S0
546 #define STR_END SLJIT_S1
547 #define STACK_TOP SLJIT_R1
548 #define STACK_LIMIT SLJIT_S2
549 #define COUNT_MATCH SLJIT_S3
550 #define ARGUMENTS SLJIT_S4
551 #define RETURN_ADDR SLJIT_R4
553 /* Local space layout. */
554 /* These two locals can be used by the current opcode. */
555 #define LOCALS0 (0 * sizeof(sljit_sw))
556 #define LOCALS1 (1 * sizeof(sljit_sw))
557 /* Two local variables for possessive quantifiers (char1 cannot use them). */
558 #define POSSESSIVE0 (2 * sizeof(sljit_sw))
559 #define POSSESSIVE1 (3 * sizeof(sljit_sw))
560 /* Max limit of recursions. */
561 #define LIMIT_MATCH (4 * sizeof(sljit_sw))
562 /* The output vector is stored on the stack, and contains pointers
563 to characters. The vector data is divided into two groups: the first
564 group contains the start / end character pointers, and the second is
565 the start pointers when the end of the capturing group has not yet reached. */
566 #define OVECTOR_START (common->ovector_start)
567 #define OVECTOR(i) (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
568 #define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
569 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
571 #if defined COMPILE_PCRE8
572 #define MOV_UCHAR SLJIT_MOV_U8
573 #elif defined COMPILE_PCRE16
574 #define MOV_UCHAR SLJIT_MOV_U16
575 #elif defined COMPILE_PCRE32
576 #define MOV_UCHAR SLJIT_MOV_U32
577 #else
578 #error Unsupported compiling mode
579 #endif
581 /* Shortcuts. */
582 #define DEFINE_COMPILER \
583 struct sljit_compiler *compiler = common->compiler
584 #define OP1(op, dst, dstw, src, srcw) \
585 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
586 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
587 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
588 #define LABEL() \
589 sljit_emit_label(compiler)
590 #define JUMP(type) \
591 sljit_emit_jump(compiler, (type))
592 #define JUMPTO(type, label) \
593 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
594 #define JUMPHERE(jump) \
595 sljit_set_label((jump), sljit_emit_label(compiler))
596 #define SET_LABEL(jump, label) \
597 sljit_set_label((jump), (label))
598 #define CMP(type, src1, src1w, src2, src2w) \
599 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
600 #define CMPTO(type, src1, src1w, src2, src2w, label) \
601 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
602 #define OP_FLAGS(op, dst, dstw, type) \
603 sljit_emit_op_flags(compiler, (op), (dst), (dstw), (type))
604 #define GET_LOCAL_BASE(dst, dstw, offset) \
605 sljit_get_local_base(compiler, (dst), (dstw), (offset))
607 #define READ_CHAR_MAX 0x7fffffff
609 #define INVALID_UTF_CHAR 888
611 static pcre_uchar *bracketend(pcre_uchar *cc)
613 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
614 do cc += GET(cc, 1); while (*cc == OP_ALT);
615 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
616 cc += 1 + LINK_SIZE;
617 return cc;
620 static int no_alternatives(pcre_uchar *cc)
622 int count = 0;
623 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
626 cc += GET(cc, 1);
627 count++;
629 while (*cc == OP_ALT);
630 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
631 return count;
634 /* Functions whose might need modification for all new supported opcodes:
635 next_opcode
636 check_opcode_types
637 set_private_data_ptrs
638 get_framesize
639 init_frame
640 get_private_data_copy_length
641 copy_private_data
642 compile_matchingpath
643 compile_backtrackingpath
646 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
648 SLJIT_UNUSED_ARG(common);
649 switch(*cc)
651 case OP_SOD:
652 case OP_SOM:
653 case OP_SET_SOM:
654 case OP_NOT_WORD_BOUNDARY:
655 case OP_WORD_BOUNDARY:
656 case OP_NOT_DIGIT:
657 case OP_DIGIT:
658 case OP_NOT_WHITESPACE:
659 case OP_WHITESPACE:
660 case OP_NOT_WORDCHAR:
661 case OP_WORDCHAR:
662 case OP_ANY:
663 case OP_ALLANY:
664 case OP_NOTPROP:
665 case OP_PROP:
666 case OP_ANYNL:
667 case OP_NOT_HSPACE:
668 case OP_HSPACE:
669 case OP_NOT_VSPACE:
670 case OP_VSPACE:
671 case OP_EXTUNI:
672 case OP_EODN:
673 case OP_EOD:
674 case OP_CIRC:
675 case OP_CIRCM:
676 case OP_DOLL:
677 case OP_DOLLM:
678 case OP_CRSTAR:
679 case OP_CRMINSTAR:
680 case OP_CRPLUS:
681 case OP_CRMINPLUS:
682 case OP_CRQUERY:
683 case OP_CRMINQUERY:
684 case OP_CRRANGE:
685 case OP_CRMINRANGE:
686 case OP_CRPOSSTAR:
687 case OP_CRPOSPLUS:
688 case OP_CRPOSQUERY:
689 case OP_CRPOSRANGE:
690 case OP_CLASS:
691 case OP_NCLASS:
692 case OP_REF:
693 case OP_REFI:
694 case OP_DNREF:
695 case OP_DNREFI:
696 case OP_RECURSE:
697 case OP_CALLOUT:
698 case OP_ALT:
699 case OP_KET:
700 case OP_KETRMAX:
701 case OP_KETRMIN:
702 case OP_KETRPOS:
703 case OP_REVERSE:
704 case OP_ASSERT:
705 case OP_ASSERT_NOT:
706 case OP_ASSERTBACK:
707 case OP_ASSERTBACK_NOT:
708 case OP_ONCE:
709 case OP_ONCE_NC:
710 case OP_BRA:
711 case OP_BRAPOS:
712 case OP_CBRA:
713 case OP_CBRAPOS:
714 case OP_COND:
715 case OP_SBRA:
716 case OP_SBRAPOS:
717 case OP_SCBRA:
718 case OP_SCBRAPOS:
719 case OP_SCOND:
720 case OP_CREF:
721 case OP_DNCREF:
722 case OP_RREF:
723 case OP_DNRREF:
724 case OP_DEF:
725 case OP_BRAZERO:
726 case OP_BRAMINZERO:
727 case OP_BRAPOSZERO:
728 case OP_PRUNE:
729 case OP_SKIP:
730 case OP_THEN:
731 case OP_COMMIT:
732 case OP_FAIL:
733 case OP_ACCEPT:
734 case OP_ASSERT_ACCEPT:
735 case OP_CLOSE:
736 case OP_SKIPZERO:
737 return cc + PRIV(OP_lengths)[*cc];
739 case OP_CHAR:
740 case OP_CHARI:
741 case OP_NOT:
742 case OP_NOTI:
743 case OP_STAR:
744 case OP_MINSTAR:
745 case OP_PLUS:
746 case OP_MINPLUS:
747 case OP_QUERY:
748 case OP_MINQUERY:
749 case OP_UPTO:
750 case OP_MINUPTO:
751 case OP_EXACT:
752 case OP_POSSTAR:
753 case OP_POSPLUS:
754 case OP_POSQUERY:
755 case OP_POSUPTO:
756 case OP_STARI:
757 case OP_MINSTARI:
758 case OP_PLUSI:
759 case OP_MINPLUSI:
760 case OP_QUERYI:
761 case OP_MINQUERYI:
762 case OP_UPTOI:
763 case OP_MINUPTOI:
764 case OP_EXACTI:
765 case OP_POSSTARI:
766 case OP_POSPLUSI:
767 case OP_POSQUERYI:
768 case OP_POSUPTOI:
769 case OP_NOTSTAR:
770 case OP_NOTMINSTAR:
771 case OP_NOTPLUS:
772 case OP_NOTMINPLUS:
773 case OP_NOTQUERY:
774 case OP_NOTMINQUERY:
775 case OP_NOTUPTO:
776 case OP_NOTMINUPTO:
777 case OP_NOTEXACT:
778 case OP_NOTPOSSTAR:
779 case OP_NOTPOSPLUS:
780 case OP_NOTPOSQUERY:
781 case OP_NOTPOSUPTO:
782 case OP_NOTSTARI:
783 case OP_NOTMINSTARI:
784 case OP_NOTPLUSI:
785 case OP_NOTMINPLUSI:
786 case OP_NOTQUERYI:
787 case OP_NOTMINQUERYI:
788 case OP_NOTUPTOI:
789 case OP_NOTMINUPTOI:
790 case OP_NOTEXACTI:
791 case OP_NOTPOSSTARI:
792 case OP_NOTPOSPLUSI:
793 case OP_NOTPOSQUERYI:
794 case OP_NOTPOSUPTOI:
795 cc += PRIV(OP_lengths)[*cc];
796 #ifdef SUPPORT_UTF
797 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
798 #endif
799 return cc;
801 /* Special cases. */
802 case OP_TYPESTAR:
803 case OP_TYPEMINSTAR:
804 case OP_TYPEPLUS:
805 case OP_TYPEMINPLUS:
806 case OP_TYPEQUERY:
807 case OP_TYPEMINQUERY:
808 case OP_TYPEUPTO:
809 case OP_TYPEMINUPTO:
810 case OP_TYPEEXACT:
811 case OP_TYPEPOSSTAR:
812 case OP_TYPEPOSPLUS:
813 case OP_TYPEPOSQUERY:
814 case OP_TYPEPOSUPTO:
815 return cc + PRIV(OP_lengths)[*cc] - 1;
817 case OP_ANYBYTE:
818 #ifdef SUPPORT_UTF
819 if (common->utf) return NULL;
820 #endif
821 return cc + 1;
823 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
824 case OP_XCLASS:
825 return cc + GET(cc, 1);
826 #endif
828 case OP_MARK:
829 case OP_PRUNE_ARG:
830 case OP_SKIP_ARG:
831 case OP_THEN_ARG:
832 return cc + 1 + 2 + cc[1];
834 default:
835 /* All opcodes are supported now! */
836 SLJIT_UNREACHABLE();
837 return NULL;
841 static BOOL check_opcode_types(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
843 int count;
844 pcre_uchar *slot;
845 pcre_uchar *assert_back_end = cc - 1;
847 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
848 while (cc < ccend)
850 switch(*cc)
852 case OP_SET_SOM:
853 common->has_set_som = TRUE;
854 common->might_be_empty = TRUE;
855 cc += 1;
856 break;
858 case OP_REF:
859 case OP_REFI:
860 common->optimized_cbracket[GET2(cc, 1)] = 0;
861 cc += 1 + IMM2_SIZE;
862 break;
864 case OP_CBRAPOS:
865 case OP_SCBRAPOS:
866 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
867 cc += 1 + LINK_SIZE + IMM2_SIZE;
868 break;
870 case OP_COND:
871 case OP_SCOND:
872 /* Only AUTO_CALLOUT can insert this opcode. We do
873 not intend to support this case. */
874 if (cc[1 + LINK_SIZE] == OP_CALLOUT)
875 return FALSE;
876 cc += 1 + LINK_SIZE;
877 break;
879 case OP_CREF:
880 common->optimized_cbracket[GET2(cc, 1)] = 0;
881 cc += 1 + IMM2_SIZE;
882 break;
884 case OP_DNREF:
885 case OP_DNREFI:
886 case OP_DNCREF:
887 count = GET2(cc, 1 + IMM2_SIZE);
888 slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
889 while (count-- > 0)
891 common->optimized_cbracket[GET2(slot, 0)] = 0;
892 slot += common->name_entry_size;
894 cc += 1 + 2 * IMM2_SIZE;
895 break;
897 case OP_RECURSE:
898 /* Set its value only once. */
899 if (common->recursive_head_ptr == 0)
901 common->recursive_head_ptr = common->ovector_start;
902 common->ovector_start += sizeof(sljit_sw);
904 cc += 1 + LINK_SIZE;
905 break;
907 case OP_CALLOUT:
908 if (common->capture_last_ptr == 0)
910 common->capture_last_ptr = common->ovector_start;
911 common->ovector_start += sizeof(sljit_sw);
913 cc += 2 + 2 * LINK_SIZE;
914 break;
916 case OP_ASSERTBACK:
917 slot = bracketend(cc);
918 if (slot > assert_back_end)
919 assert_back_end = slot;
920 cc += 1 + LINK_SIZE;
921 break;
923 case OP_THEN_ARG:
924 common->has_then = TRUE;
925 common->control_head_ptr = 1;
926 /* Fall through. */
928 case OP_PRUNE_ARG:
929 case OP_MARK:
930 if (common->mark_ptr == 0)
932 common->mark_ptr = common->ovector_start;
933 common->ovector_start += sizeof(sljit_sw);
935 cc += 1 + 2 + cc[1];
936 break;
938 case OP_THEN:
939 common->has_then = TRUE;
940 common->control_head_ptr = 1;
941 cc += 1;
942 break;
944 case OP_SKIP:
945 if (cc < assert_back_end)
946 common->has_skip_in_assert_back = TRUE;
947 cc += 1;
948 break;
950 case OP_SKIP_ARG:
951 common->control_head_ptr = 1;
952 common->has_skip_arg = TRUE;
953 if (cc < assert_back_end)
954 common->has_skip_in_assert_back = TRUE;
955 cc += 1 + 2 + cc[1];
956 break;
958 default:
959 cc = next_opcode(common, cc);
960 if (cc == NULL)
961 return FALSE;
962 break;
965 return TRUE;
968 static BOOL is_accelerated_repeat(pcre_uchar *cc)
970 switch(*cc)
972 case OP_TYPESTAR:
973 case OP_TYPEMINSTAR:
974 case OP_TYPEPLUS:
975 case OP_TYPEMINPLUS:
976 case OP_TYPEPOSSTAR:
977 case OP_TYPEPOSPLUS:
978 return (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI);
980 case OP_STAR:
981 case OP_MINSTAR:
982 case OP_PLUS:
983 case OP_MINPLUS:
984 case OP_POSSTAR:
985 case OP_POSPLUS:
987 case OP_STARI:
988 case OP_MINSTARI:
989 case OP_PLUSI:
990 case OP_MINPLUSI:
991 case OP_POSSTARI:
992 case OP_POSPLUSI:
994 case OP_NOTSTAR:
995 case OP_NOTMINSTAR:
996 case OP_NOTPLUS:
997 case OP_NOTMINPLUS:
998 case OP_NOTPOSSTAR:
999 case OP_NOTPOSPLUS:
1001 case OP_NOTSTARI:
1002 case OP_NOTMINSTARI:
1003 case OP_NOTPLUSI:
1004 case OP_NOTMINPLUSI:
1005 case OP_NOTPOSSTARI:
1006 case OP_NOTPOSPLUSI:
1007 return TRUE;
1009 case OP_CLASS:
1010 case OP_NCLASS:
1011 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1012 case OP_XCLASS:
1013 cc += (*cc == OP_XCLASS) ? GET(cc, 1) : (int)(1 + (32 / sizeof(pcre_uchar)));
1014 #else
1015 cc += (1 + (32 / sizeof(pcre_uchar)));
1016 #endif
1018 switch(*cc)
1020 case OP_CRSTAR:
1021 case OP_CRMINSTAR:
1022 case OP_CRPLUS:
1023 case OP_CRMINPLUS:
1024 case OP_CRPOSSTAR:
1025 case OP_CRPOSPLUS:
1026 return TRUE;
1028 break;
1030 return FALSE;
1033 static SLJIT_INLINE BOOL detect_fast_forward_skip(compiler_common *common, int *private_data_start)
1035 pcre_uchar *cc = common->start;
1036 pcre_uchar *end;
1038 /* Skip not repeated brackets. */
1039 while (TRUE)
1041 switch(*cc)
1043 case OP_SOD:
1044 case OP_SOM:
1045 case OP_SET_SOM:
1046 case OP_NOT_WORD_BOUNDARY:
1047 case OP_WORD_BOUNDARY:
1048 case OP_EODN:
1049 case OP_EOD:
1050 case OP_CIRC:
1051 case OP_CIRCM:
1052 case OP_DOLL:
1053 case OP_DOLLM:
1054 /* Zero width assertions. */
1055 cc++;
1056 continue;
1059 if (*cc != OP_BRA && *cc != OP_CBRA)
1060 break;
1062 end = cc + GET(cc, 1);
1063 if (*end != OP_KET || PRIVATE_DATA(end) != 0)
1064 return FALSE;
1065 if (*cc == OP_CBRA)
1067 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1068 return FALSE;
1069 cc += IMM2_SIZE;
1071 cc += 1 + LINK_SIZE;
1074 if (is_accelerated_repeat(cc))
1076 common->fast_forward_bc_ptr = cc;
1077 common->private_data_ptrs[(cc + 1) - common->start] = *private_data_start;
1078 *private_data_start += sizeof(sljit_sw);
1079 return TRUE;
1081 return FALSE;
1084 static SLJIT_INLINE void detect_fast_fail(compiler_common *common, pcre_uchar *cc, int *private_data_start, sljit_s32 depth)
1086 pcre_uchar *next_alt;
1088 SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA);
1090 if (*cc == OP_CBRA && common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1091 return;
1093 next_alt = bracketend(cc) - (1 + LINK_SIZE);
1094 if (*next_alt != OP_KET || PRIVATE_DATA(next_alt) != 0)
1095 return;
1099 next_alt = cc + GET(cc, 1);
1101 cc += 1 + LINK_SIZE + ((*cc == OP_CBRA) ? IMM2_SIZE : 0);
1103 while (TRUE)
1105 switch(*cc)
1107 case OP_SOD:
1108 case OP_SOM:
1109 case OP_SET_SOM:
1110 case OP_NOT_WORD_BOUNDARY:
1111 case OP_WORD_BOUNDARY:
1112 case OP_EODN:
1113 case OP_EOD:
1114 case OP_CIRC:
1115 case OP_CIRCM:
1116 case OP_DOLL:
1117 case OP_DOLLM:
1118 /* Zero width assertions. */
1119 cc++;
1120 continue;
1122 break;
1125 if (depth > 0 && (*cc == OP_BRA || *cc == OP_CBRA))
1126 detect_fast_fail(common, cc, private_data_start, depth - 1);
1128 if (is_accelerated_repeat(cc))
1130 common->private_data_ptrs[(cc + 1) - common->start] = *private_data_start;
1132 if (common->fast_fail_start_ptr == 0)
1133 common->fast_fail_start_ptr = *private_data_start;
1135 *private_data_start += sizeof(sljit_sw);
1136 common->fast_fail_end_ptr = *private_data_start;
1138 if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
1139 return;
1142 cc = next_alt;
1144 while (*cc == OP_ALT);
1147 static int get_class_iterator_size(pcre_uchar *cc)
1149 sljit_u32 min;
1150 sljit_u32 max;
1151 switch(*cc)
1153 case OP_CRSTAR:
1154 case OP_CRPLUS:
1155 return 2;
1157 case OP_CRMINSTAR:
1158 case OP_CRMINPLUS:
1159 case OP_CRQUERY:
1160 case OP_CRMINQUERY:
1161 return 1;
1163 case OP_CRRANGE:
1164 case OP_CRMINRANGE:
1165 min = GET2(cc, 1);
1166 max = GET2(cc, 1 + IMM2_SIZE);
1167 if (max == 0)
1168 return (*cc == OP_CRRANGE) ? 2 : 1;
1169 max -= min;
1170 if (max > 2)
1171 max = 2;
1172 return max;
1174 default:
1175 return 0;
1179 static BOOL detect_repeat(compiler_common *common, pcre_uchar *begin)
1181 pcre_uchar *end = bracketend(begin);
1182 pcre_uchar *next;
1183 pcre_uchar *next_end;
1184 pcre_uchar *max_end;
1185 pcre_uchar type;
1186 sljit_sw length = end - begin;
1187 int min, max, i;
1189 /* Detect fixed iterations first. */
1190 if (end[-(1 + LINK_SIZE)] != OP_KET)
1191 return FALSE;
1193 /* Already detected repeat. */
1194 if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0)
1195 return TRUE;
1197 next = end;
1198 min = 1;
1199 while (1)
1201 if (*next != *begin)
1202 break;
1203 next_end = bracketend(next);
1204 if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
1205 break;
1206 next = next_end;
1207 min++;
1210 if (min == 2)
1211 return FALSE;
1213 max = 0;
1214 max_end = next;
1215 if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
1217 type = *next;
1218 while (1)
1220 if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
1221 break;
1222 next_end = bracketend(next + 2 + LINK_SIZE);
1223 if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
1224 break;
1225 next = next_end;
1226 max++;
1229 if (next[0] == type && next[1] == *begin && max >= 1)
1231 next_end = bracketend(next + 1);
1232 if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
1234 for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
1235 if (*next_end != OP_KET)
1236 break;
1238 if (i == max)
1240 common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
1241 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
1242 /* +2 the original and the last. */
1243 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
1244 if (min == 1)
1245 return TRUE;
1246 min--;
1247 max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
1253 if (min >= 3)
1255 common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
1256 common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
1257 common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
1258 return TRUE;
1261 return FALSE;
1264 #define CASE_ITERATOR_PRIVATE_DATA_1 \
1265 case OP_MINSTAR: \
1266 case OP_MINPLUS: \
1267 case OP_QUERY: \
1268 case OP_MINQUERY: \
1269 case OP_MINSTARI: \
1270 case OP_MINPLUSI: \
1271 case OP_QUERYI: \
1272 case OP_MINQUERYI: \
1273 case OP_NOTMINSTAR: \
1274 case OP_NOTMINPLUS: \
1275 case OP_NOTQUERY: \
1276 case OP_NOTMINQUERY: \
1277 case OP_NOTMINSTARI: \
1278 case OP_NOTMINPLUSI: \
1279 case OP_NOTQUERYI: \
1280 case OP_NOTMINQUERYI:
1282 #define CASE_ITERATOR_PRIVATE_DATA_2A \
1283 case OP_STAR: \
1284 case OP_PLUS: \
1285 case OP_STARI: \
1286 case OP_PLUSI: \
1287 case OP_NOTSTAR: \
1288 case OP_NOTPLUS: \
1289 case OP_NOTSTARI: \
1290 case OP_NOTPLUSI:
1292 #define CASE_ITERATOR_PRIVATE_DATA_2B \
1293 case OP_UPTO: \
1294 case OP_MINUPTO: \
1295 case OP_UPTOI: \
1296 case OP_MINUPTOI: \
1297 case OP_NOTUPTO: \
1298 case OP_NOTMINUPTO: \
1299 case OP_NOTUPTOI: \
1300 case OP_NOTMINUPTOI:
1302 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1303 case OP_TYPEMINSTAR: \
1304 case OP_TYPEMINPLUS: \
1305 case OP_TYPEQUERY: \
1306 case OP_TYPEMINQUERY:
1308 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1309 case OP_TYPESTAR: \
1310 case OP_TYPEPLUS:
1312 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1313 case OP_TYPEUPTO: \
1314 case OP_TYPEMINUPTO:
1316 static void set_private_data_ptrs(compiler_common *common, int *private_data_start, pcre_uchar *ccend)
1318 pcre_uchar *cc = common->start;
1319 pcre_uchar *alternative;
1320 pcre_uchar *end = NULL;
1321 int private_data_ptr = *private_data_start;
1322 int space, size, bracketlen;
1323 BOOL repeat_check = TRUE;
1325 while (cc < ccend)
1327 space = 0;
1328 size = 0;
1329 bracketlen = 0;
1330 if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1331 break;
1333 if (repeat_check && (*cc == OP_ONCE || *cc == OP_ONCE_NC || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND))
1335 if (detect_repeat(common, cc))
1337 /* These brackets are converted to repeats, so no global
1338 based single character repeat is allowed. */
1339 if (cc >= end)
1340 end = bracketend(cc);
1343 repeat_check = TRUE;
1345 switch(*cc)
1347 case OP_KET:
1348 if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1350 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1351 private_data_ptr += sizeof(sljit_sw);
1352 cc += common->private_data_ptrs[cc + 1 - common->start];
1354 cc += 1 + LINK_SIZE;
1355 break;
1357 case OP_ASSERT:
1358 case OP_ASSERT_NOT:
1359 case OP_ASSERTBACK:
1360 case OP_ASSERTBACK_NOT:
1361 case OP_ONCE:
1362 case OP_ONCE_NC:
1363 case OP_BRAPOS:
1364 case OP_SBRA:
1365 case OP_SBRAPOS:
1366 case OP_SCOND:
1367 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1368 private_data_ptr += sizeof(sljit_sw);
1369 bracketlen = 1 + LINK_SIZE;
1370 break;
1372 case OP_CBRAPOS:
1373 case OP_SCBRAPOS:
1374 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1375 private_data_ptr += sizeof(sljit_sw);
1376 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1377 break;
1379 case OP_COND:
1380 /* Might be a hidden SCOND. */
1381 alternative = cc + GET(cc, 1);
1382 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1384 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1385 private_data_ptr += sizeof(sljit_sw);
1387 bracketlen = 1 + LINK_SIZE;
1388 break;
1390 case OP_BRA:
1391 bracketlen = 1 + LINK_SIZE;
1392 break;
1394 case OP_CBRA:
1395 case OP_SCBRA:
1396 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1397 break;
1399 case OP_BRAZERO:
1400 case OP_BRAMINZERO:
1401 case OP_BRAPOSZERO:
1402 repeat_check = FALSE;
1403 size = 1;
1404 break;
1406 CASE_ITERATOR_PRIVATE_DATA_1
1407 space = 1;
1408 size = -2;
1409 break;
1411 CASE_ITERATOR_PRIVATE_DATA_2A
1412 space = 2;
1413 size = -2;
1414 break;
1416 CASE_ITERATOR_PRIVATE_DATA_2B
1417 space = 2;
1418 size = -(2 + IMM2_SIZE);
1419 break;
1421 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1422 space = 1;
1423 size = 1;
1424 break;
1426 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1427 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1428 space = 2;
1429 size = 1;
1430 break;
1432 case OP_TYPEUPTO:
1433 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1434 space = 2;
1435 size = 1 + IMM2_SIZE;
1436 break;
1438 case OP_TYPEMINUPTO:
1439 space = 2;
1440 size = 1 + IMM2_SIZE;
1441 break;
1443 case OP_CLASS:
1444 case OP_NCLASS:
1445 space = get_class_iterator_size(cc + size);
1446 size = 1 + 32 / sizeof(pcre_uchar);
1447 break;
1449 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1450 case OP_XCLASS:
1451 space = get_class_iterator_size(cc + size);
1452 size = GET(cc, 1);
1453 break;
1454 #endif
1456 default:
1457 cc = next_opcode(common, cc);
1458 SLJIT_ASSERT(cc != NULL);
1459 break;
1462 /* Character iterators, which are not inside a repeated bracket,
1463 gets a private slot instead of allocating it on the stack. */
1464 if (space > 0 && cc >= end)
1466 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1467 private_data_ptr += sizeof(sljit_sw) * space;
1470 if (size != 0)
1472 if (size < 0)
1474 cc += -size;
1475 #ifdef SUPPORT_UTF
1476 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1477 #endif
1479 else
1480 cc += size;
1483 if (bracketlen > 0)
1485 if (cc >= end)
1487 end = bracketend(cc);
1488 if (end[-1 - LINK_SIZE] == OP_KET)
1489 end = NULL;
1491 cc += bracketlen;
1494 *private_data_start = private_data_ptr;
1497 /* Returns with a frame_types (always < 0) if no need for frame. */
1498 static int get_framesize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL recursive, BOOL *needs_control_head)
1500 int length = 0;
1501 int possessive = 0;
1502 BOOL stack_restore = FALSE;
1503 BOOL setsom_found = recursive;
1504 BOOL setmark_found = recursive;
1505 /* The last capture is a local variable even for recursions. */
1506 BOOL capture_last_found = FALSE;
1508 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1509 SLJIT_ASSERT(common->control_head_ptr != 0);
1510 *needs_control_head = TRUE;
1511 #else
1512 *needs_control_head = FALSE;
1513 #endif
1515 if (ccend == NULL)
1517 ccend = bracketend(cc) - (1 + LINK_SIZE);
1518 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1520 possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1521 /* This is correct regardless of common->capture_last_ptr. */
1522 capture_last_found = TRUE;
1524 cc = next_opcode(common, cc);
1527 SLJIT_ASSERT(cc != NULL);
1528 while (cc < ccend)
1529 switch(*cc)
1531 case OP_SET_SOM:
1532 SLJIT_ASSERT(common->has_set_som);
1533 stack_restore = TRUE;
1534 if (!setsom_found)
1536 length += 2;
1537 setsom_found = TRUE;
1539 cc += 1;
1540 break;
1542 case OP_MARK:
1543 case OP_PRUNE_ARG:
1544 case OP_THEN_ARG:
1545 SLJIT_ASSERT(common->mark_ptr != 0);
1546 stack_restore = TRUE;
1547 if (!setmark_found)
1549 length += 2;
1550 setmark_found = TRUE;
1552 if (common->control_head_ptr != 0)
1553 *needs_control_head = TRUE;
1554 cc += 1 + 2 + cc[1];
1555 break;
1557 case OP_RECURSE:
1558 stack_restore = TRUE;
1559 if (common->has_set_som && !setsom_found)
1561 length += 2;
1562 setsom_found = TRUE;
1564 if (common->mark_ptr != 0 && !setmark_found)
1566 length += 2;
1567 setmark_found = TRUE;
1569 if (common->capture_last_ptr != 0 && !capture_last_found)
1571 length += 2;
1572 capture_last_found = TRUE;
1574 cc += 1 + LINK_SIZE;
1575 break;
1577 case OP_CBRA:
1578 case OP_CBRAPOS:
1579 case OP_SCBRA:
1580 case OP_SCBRAPOS:
1581 stack_restore = TRUE;
1582 if (common->capture_last_ptr != 0 && !capture_last_found)
1584 length += 2;
1585 capture_last_found = TRUE;
1587 length += 3;
1588 cc += 1 + LINK_SIZE + IMM2_SIZE;
1589 break;
1591 case OP_THEN:
1592 stack_restore = TRUE;
1593 if (common->control_head_ptr != 0)
1594 *needs_control_head = TRUE;
1595 cc ++;
1596 break;
1598 default:
1599 stack_restore = TRUE;
1600 /* Fall through. */
1602 case OP_NOT_WORD_BOUNDARY:
1603 case OP_WORD_BOUNDARY:
1604 case OP_NOT_DIGIT:
1605 case OP_DIGIT:
1606 case OP_NOT_WHITESPACE:
1607 case OP_WHITESPACE:
1608 case OP_NOT_WORDCHAR:
1609 case OP_WORDCHAR:
1610 case OP_ANY:
1611 case OP_ALLANY:
1612 case OP_ANYBYTE:
1613 case OP_NOTPROP:
1614 case OP_PROP:
1615 case OP_ANYNL:
1616 case OP_NOT_HSPACE:
1617 case OP_HSPACE:
1618 case OP_NOT_VSPACE:
1619 case OP_VSPACE:
1620 case OP_EXTUNI:
1621 case OP_EODN:
1622 case OP_EOD:
1623 case OP_CIRC:
1624 case OP_CIRCM:
1625 case OP_DOLL:
1626 case OP_DOLLM:
1627 case OP_CHAR:
1628 case OP_CHARI:
1629 case OP_NOT:
1630 case OP_NOTI:
1632 case OP_EXACT:
1633 case OP_POSSTAR:
1634 case OP_POSPLUS:
1635 case OP_POSQUERY:
1636 case OP_POSUPTO:
1638 case OP_EXACTI:
1639 case OP_POSSTARI:
1640 case OP_POSPLUSI:
1641 case OP_POSQUERYI:
1642 case OP_POSUPTOI:
1644 case OP_NOTEXACT:
1645 case OP_NOTPOSSTAR:
1646 case OP_NOTPOSPLUS:
1647 case OP_NOTPOSQUERY:
1648 case OP_NOTPOSUPTO:
1650 case OP_NOTEXACTI:
1651 case OP_NOTPOSSTARI:
1652 case OP_NOTPOSPLUSI:
1653 case OP_NOTPOSQUERYI:
1654 case OP_NOTPOSUPTOI:
1656 case OP_TYPEEXACT:
1657 case OP_TYPEPOSSTAR:
1658 case OP_TYPEPOSPLUS:
1659 case OP_TYPEPOSQUERY:
1660 case OP_TYPEPOSUPTO:
1662 case OP_CLASS:
1663 case OP_NCLASS:
1664 case OP_XCLASS:
1665 case OP_CALLOUT:
1667 cc = next_opcode(common, cc);
1668 SLJIT_ASSERT(cc != NULL);
1669 break;
1672 /* Possessive quantifiers can use a special case. */
1673 if (SLJIT_UNLIKELY(possessive == length))
1674 return stack_restore ? no_frame : no_stack;
1676 if (length > 0)
1677 return length + 1;
1678 return stack_restore ? no_frame : no_stack;
1681 static void init_frame(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, int stackpos, int stacktop, BOOL recursive)
1683 DEFINE_COMPILER;
1684 BOOL setsom_found = recursive;
1685 BOOL setmark_found = recursive;
1686 /* The last capture is a local variable even for recursions. */
1687 BOOL capture_last_found = FALSE;
1688 int offset;
1690 /* >= 1 + shortest item size (2) */
1691 SLJIT_UNUSED_ARG(stacktop);
1692 SLJIT_ASSERT(stackpos >= stacktop + 2);
1694 stackpos = STACK(stackpos);
1695 if (ccend == NULL)
1697 ccend = bracketend(cc) - (1 + LINK_SIZE);
1698 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1699 cc = next_opcode(common, cc);
1702 SLJIT_ASSERT(cc != NULL);
1703 while (cc < ccend)
1704 switch(*cc)
1706 case OP_SET_SOM:
1707 SLJIT_ASSERT(common->has_set_som);
1708 if (!setsom_found)
1710 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
1711 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1712 stackpos -= (int)sizeof(sljit_sw);
1713 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1714 stackpos -= (int)sizeof(sljit_sw);
1715 setsom_found = TRUE;
1717 cc += 1;
1718 break;
1720 case OP_MARK:
1721 case OP_PRUNE_ARG:
1722 case OP_THEN_ARG:
1723 SLJIT_ASSERT(common->mark_ptr != 0);
1724 if (!setmark_found)
1726 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
1727 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1728 stackpos -= (int)sizeof(sljit_sw);
1729 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1730 stackpos -= (int)sizeof(sljit_sw);
1731 setmark_found = TRUE;
1733 cc += 1 + 2 + cc[1];
1734 break;
1736 case OP_RECURSE:
1737 if (common->has_set_som && !setsom_found)
1739 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
1740 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1741 stackpos -= (int)sizeof(sljit_sw);
1742 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1743 stackpos -= (int)sizeof(sljit_sw);
1744 setsom_found = TRUE;
1746 if (common->mark_ptr != 0 && !setmark_found)
1748 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
1749 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1750 stackpos -= (int)sizeof(sljit_sw);
1751 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1752 stackpos -= (int)sizeof(sljit_sw);
1753 setmark_found = TRUE;
1755 if (common->capture_last_ptr != 0 && !capture_last_found)
1757 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
1758 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1759 stackpos -= (int)sizeof(sljit_sw);
1760 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1761 stackpos -= (int)sizeof(sljit_sw);
1762 capture_last_found = TRUE;
1764 cc += 1 + LINK_SIZE;
1765 break;
1767 case OP_CBRA:
1768 case OP_CBRAPOS:
1769 case OP_SCBRA:
1770 case OP_SCBRAPOS:
1771 if (common->capture_last_ptr != 0 && !capture_last_found)
1773 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
1774 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1775 stackpos -= (int)sizeof(sljit_sw);
1776 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1777 stackpos -= (int)sizeof(sljit_sw);
1778 capture_last_found = TRUE;
1780 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1781 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1782 stackpos -= (int)sizeof(sljit_sw);
1783 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
1784 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
1785 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1786 stackpos -= (int)sizeof(sljit_sw);
1787 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1788 stackpos -= (int)sizeof(sljit_sw);
1790 cc += 1 + LINK_SIZE + IMM2_SIZE;
1791 break;
1793 default:
1794 cc = next_opcode(common, cc);
1795 SLJIT_ASSERT(cc != NULL);
1796 break;
1799 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
1800 SLJIT_ASSERT(stackpos == STACK(stacktop));
1803 static SLJIT_INLINE int get_private_data_copy_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL needs_control_head)
1805 int private_data_length = needs_control_head ? 3 : 2;
1806 int size;
1807 pcre_uchar *alternative;
1808 /* Calculate the sum of the private machine words. */
1809 while (cc < ccend)
1811 size = 0;
1812 switch(*cc)
1814 case OP_KET:
1815 if (PRIVATE_DATA(cc) != 0)
1817 private_data_length++;
1818 SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
1819 cc += PRIVATE_DATA(cc + 1);
1821 cc += 1 + LINK_SIZE;
1822 break;
1824 case OP_ASSERT:
1825 case OP_ASSERT_NOT:
1826 case OP_ASSERTBACK:
1827 case OP_ASSERTBACK_NOT:
1828 case OP_ONCE:
1829 case OP_ONCE_NC:
1830 case OP_BRAPOS:
1831 case OP_SBRA:
1832 case OP_SBRAPOS:
1833 case OP_SCOND:
1834 private_data_length++;
1835 SLJIT_ASSERT(PRIVATE_DATA(cc) != 0);
1836 cc += 1 + LINK_SIZE;
1837 break;
1839 case OP_CBRA:
1840 case OP_SCBRA:
1841 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1842 private_data_length++;
1843 cc += 1 + LINK_SIZE + IMM2_SIZE;
1844 break;
1846 case OP_CBRAPOS:
1847 case OP_SCBRAPOS:
1848 private_data_length += 2;
1849 cc += 1 + LINK_SIZE + IMM2_SIZE;
1850 break;
1852 case OP_COND:
1853 /* Might be a hidden SCOND. */
1854 alternative = cc + GET(cc, 1);
1855 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1856 private_data_length++;
1857 cc += 1 + LINK_SIZE;
1858 break;
1860 CASE_ITERATOR_PRIVATE_DATA_1
1861 if (PRIVATE_DATA(cc))
1862 private_data_length++;
1863 cc += 2;
1864 #ifdef SUPPORT_UTF
1865 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1866 #endif
1867 break;
1869 CASE_ITERATOR_PRIVATE_DATA_2A
1870 if (PRIVATE_DATA(cc))
1871 private_data_length += 2;
1872 cc += 2;
1873 #ifdef SUPPORT_UTF
1874 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1875 #endif
1876 break;
1878 CASE_ITERATOR_PRIVATE_DATA_2B
1879 if (PRIVATE_DATA(cc))
1880 private_data_length += 2;
1881 cc += 2 + IMM2_SIZE;
1882 #ifdef SUPPORT_UTF
1883 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1884 #endif
1885 break;
1887 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1888 if (PRIVATE_DATA(cc))
1889 private_data_length++;
1890 cc += 1;
1891 break;
1893 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1894 if (PRIVATE_DATA(cc))
1895 private_data_length += 2;
1896 cc += 1;
1897 break;
1899 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1900 if (PRIVATE_DATA(cc))
1901 private_data_length += 2;
1902 cc += 1 + IMM2_SIZE;
1903 break;
1905 case OP_CLASS:
1906 case OP_NCLASS:
1907 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1908 case OP_XCLASS:
1909 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1910 #else
1911 size = 1 + 32 / (int)sizeof(pcre_uchar);
1912 #endif
1913 if (PRIVATE_DATA(cc))
1914 private_data_length += get_class_iterator_size(cc + size);
1915 cc += size;
1916 break;
1918 default:
1919 cc = next_opcode(common, cc);
1920 SLJIT_ASSERT(cc != NULL);
1921 break;
1924 SLJIT_ASSERT(cc == ccend);
1925 return private_data_length;
1928 static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1929 BOOL save, int stackptr, int stacktop, BOOL needs_control_head)
1931 DEFINE_COMPILER;
1932 int srcw[2];
1933 int count, size;
1934 BOOL tmp1next = TRUE;
1935 BOOL tmp1empty = TRUE;
1936 BOOL tmp2empty = TRUE;
1937 pcre_uchar *alternative;
1938 enum {
1939 loop,
1941 } status;
1943 status = loop;
1944 stackptr = STACK(stackptr);
1945 stacktop = STACK(stacktop - 1);
1947 if (!save)
1949 stacktop -= (needs_control_head ? 2 : 1) * sizeof(sljit_sw);
1950 if (stackptr < stacktop)
1952 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1953 stackptr += sizeof(sljit_sw);
1954 tmp1empty = FALSE;
1956 if (stackptr < stacktop)
1958 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1959 stackptr += sizeof(sljit_sw);
1960 tmp2empty = FALSE;
1962 /* The tmp1next must be TRUE in either way. */
1965 SLJIT_ASSERT(common->recursive_head_ptr != 0);
1969 count = 0;
1970 if (cc >= ccend)
1972 if (!save)
1973 break;
1975 count = 1;
1976 srcw[0] = common->recursive_head_ptr;
1977 if (needs_control_head)
1979 SLJIT_ASSERT(common->control_head_ptr != 0);
1980 count = 2;
1981 srcw[0] = common->control_head_ptr;
1982 srcw[1] = common->recursive_head_ptr;
1984 status = end;
1986 else switch(*cc)
1988 case OP_KET:
1989 if (PRIVATE_DATA(cc) != 0)
1991 count = 1;
1992 srcw[0] = PRIVATE_DATA(cc);
1993 SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
1994 cc += PRIVATE_DATA(cc + 1);
1996 cc += 1 + LINK_SIZE;
1997 break;
1999 case OP_ASSERT:
2000 case OP_ASSERT_NOT:
2001 case OP_ASSERTBACK:
2002 case OP_ASSERTBACK_NOT:
2003 case OP_ONCE:
2004 case OP_ONCE_NC:
2005 case OP_BRAPOS:
2006 case OP_SBRA:
2007 case OP_SBRAPOS:
2008 case OP_SCOND:
2009 count = 1;
2010 srcw[0] = PRIVATE_DATA(cc);
2011 SLJIT_ASSERT(srcw[0] != 0);
2012 cc += 1 + LINK_SIZE;
2013 break;
2015 case OP_CBRA:
2016 case OP_SCBRA:
2017 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
2019 count = 1;
2020 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
2022 cc += 1 + LINK_SIZE + IMM2_SIZE;
2023 break;
2025 case OP_CBRAPOS:
2026 case OP_SCBRAPOS:
2027 count = 2;
2028 srcw[0] = PRIVATE_DATA(cc);
2029 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
2030 SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
2031 cc += 1 + LINK_SIZE + IMM2_SIZE;
2032 break;
2034 case OP_COND:
2035 /* Might be a hidden SCOND. */
2036 alternative = cc + GET(cc, 1);
2037 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
2039 count = 1;
2040 srcw[0] = PRIVATE_DATA(cc);
2041 SLJIT_ASSERT(srcw[0] != 0);
2043 cc += 1 + LINK_SIZE;
2044 break;
2046 CASE_ITERATOR_PRIVATE_DATA_1
2047 if (PRIVATE_DATA(cc))
2049 count = 1;
2050 srcw[0] = PRIVATE_DATA(cc);
2052 cc += 2;
2053 #ifdef SUPPORT_UTF
2054 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2055 #endif
2056 break;
2058 CASE_ITERATOR_PRIVATE_DATA_2A
2059 if (PRIVATE_DATA(cc))
2061 count = 2;
2062 srcw[0] = PRIVATE_DATA(cc);
2063 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
2065 cc += 2;
2066 #ifdef SUPPORT_UTF
2067 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2068 #endif
2069 break;
2071 CASE_ITERATOR_PRIVATE_DATA_2B
2072 if (PRIVATE_DATA(cc))
2074 count = 2;
2075 srcw[0] = PRIVATE_DATA(cc);
2076 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
2078 cc += 2 + IMM2_SIZE;
2079 #ifdef SUPPORT_UTF
2080 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2081 #endif
2082 break;
2084 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2085 if (PRIVATE_DATA(cc))
2087 count = 1;
2088 srcw[0] = PRIVATE_DATA(cc);
2090 cc += 1;
2091 break;
2093 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2094 if (PRIVATE_DATA(cc))
2096 count = 2;
2097 srcw[0] = PRIVATE_DATA(cc);
2098 srcw[1] = srcw[0] + sizeof(sljit_sw);
2100 cc += 1;
2101 break;
2103 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2104 if (PRIVATE_DATA(cc))
2106 count = 2;
2107 srcw[0] = PRIVATE_DATA(cc);
2108 srcw[1] = srcw[0] + sizeof(sljit_sw);
2110 cc += 1 + IMM2_SIZE;
2111 break;
2113 case OP_CLASS:
2114 case OP_NCLASS:
2115 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2116 case OP_XCLASS:
2117 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
2118 #else
2119 size = 1 + 32 / (int)sizeof(pcre_uchar);
2120 #endif
2121 if (PRIVATE_DATA(cc))
2122 switch(get_class_iterator_size(cc + size))
2124 case 1:
2125 count = 1;
2126 srcw[0] = PRIVATE_DATA(cc);
2127 break;
2129 case 2:
2130 count = 2;
2131 srcw[0] = PRIVATE_DATA(cc);
2132 srcw[1] = srcw[0] + sizeof(sljit_sw);
2133 break;
2135 default:
2136 SLJIT_UNREACHABLE();
2137 break;
2139 cc += size;
2140 break;
2142 default:
2143 cc = next_opcode(common, cc);
2144 SLJIT_ASSERT(cc != NULL);
2145 break;
2148 while (count > 0)
2150 count--;
2151 if (save)
2153 if (tmp1next)
2155 if (!tmp1empty)
2157 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
2158 stackptr += sizeof(sljit_sw);
2160 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
2161 tmp1empty = FALSE;
2162 tmp1next = FALSE;
2164 else
2166 if (!tmp2empty)
2168 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
2169 stackptr += sizeof(sljit_sw);
2171 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
2172 tmp2empty = FALSE;
2173 tmp1next = TRUE;
2176 else
2178 if (tmp1next)
2180 SLJIT_ASSERT(!tmp1empty);
2181 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP1, 0);
2182 tmp1empty = stackptr >= stacktop;
2183 if (!tmp1empty)
2185 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
2186 stackptr += sizeof(sljit_sw);
2188 tmp1next = FALSE;
2190 else
2192 SLJIT_ASSERT(!tmp2empty);
2193 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP2, 0);
2194 tmp2empty = stackptr >= stacktop;
2195 if (!tmp2empty)
2197 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
2198 stackptr += sizeof(sljit_sw);
2200 tmp1next = TRUE;
2205 while (status != end);
2207 if (save)
2209 if (tmp1next)
2211 if (!tmp1empty)
2213 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
2214 stackptr += sizeof(sljit_sw);
2216 if (!tmp2empty)
2218 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
2219 stackptr += sizeof(sljit_sw);
2222 else
2224 if (!tmp2empty)
2226 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
2227 stackptr += sizeof(sljit_sw);
2229 if (!tmp1empty)
2231 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
2232 stackptr += sizeof(sljit_sw);
2236 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
2239 static SLJIT_INLINE pcre_uchar *set_then_offsets(compiler_common *common, pcre_uchar *cc, sljit_u8 *current_offset)
2241 pcre_uchar *end = bracketend(cc);
2242 BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
2244 /* Assert captures then. */
2245 if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
2246 current_offset = NULL;
2247 /* Conditional block does not. */
2248 if (*cc == OP_COND || *cc == OP_SCOND)
2249 has_alternatives = FALSE;
2251 cc = next_opcode(common, cc);
2252 if (has_alternatives)
2253 current_offset = common->then_offsets + (cc - common->start);
2255 while (cc < end)
2257 if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
2258 cc = set_then_offsets(common, cc, current_offset);
2259 else
2261 if (*cc == OP_ALT && has_alternatives)
2262 current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
2263 if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
2264 *current_offset = 1;
2265 cc = next_opcode(common, cc);
2269 return end;
2272 #undef CASE_ITERATOR_PRIVATE_DATA_1
2273 #undef CASE_ITERATOR_PRIVATE_DATA_2A
2274 #undef CASE_ITERATOR_PRIVATE_DATA_2B
2275 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2276 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2277 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2279 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
2281 return (value & (value - 1)) == 0;
2284 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
2286 while (list)
2288 /* sljit_set_label is clever enough to do nothing
2289 if either the jump or the label is NULL. */
2290 SET_LABEL(list->jump, label);
2291 list = list->next;
2295 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump *jump)
2297 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
2298 if (list_item)
2300 list_item->next = *list;
2301 list_item->jump = jump;
2302 *list = list_item;
2306 static void add_stub(compiler_common *common, struct sljit_jump *start)
2308 DEFINE_COMPILER;
2309 stub_list *list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
2311 if (list_item)
2313 list_item->start = start;
2314 list_item->quit = LABEL();
2315 list_item->next = common->stubs;
2316 common->stubs = list_item;
2320 static void flush_stubs(compiler_common *common)
2322 DEFINE_COMPILER;
2323 stub_list *list_item = common->stubs;
2325 while (list_item)
2327 JUMPHERE(list_item->start);
2328 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
2329 JUMPTO(SLJIT_JUMP, list_item->quit);
2330 list_item = list_item->next;
2332 common->stubs = NULL;
2335 static void add_label_addr(compiler_common *common, sljit_uw *update_addr)
2337 DEFINE_COMPILER;
2338 label_addr_list *label_addr;
2340 label_addr = sljit_alloc_memory(compiler, sizeof(label_addr_list));
2341 if (label_addr == NULL)
2342 return;
2343 label_addr->label = LABEL();
2344 label_addr->update_addr = update_addr;
2345 label_addr->next = common->label_addrs;
2346 common->label_addrs = label_addr;
2349 static SLJIT_INLINE void count_match(compiler_common *common)
2351 DEFINE_COMPILER;
2353 OP2(SLJIT_SUB | SLJIT_SET_Z, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
2354 add_jump(compiler, &common->calllimit, JUMP(SLJIT_ZERO));
2357 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
2359 /* May destroy all locals and registers except TMP2. */
2360 DEFINE_COMPILER;
2362 SLJIT_ASSERT(size > 0);
2363 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2364 #ifdef DESTROY_REGISTERS
2365 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
2366 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2367 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
2368 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP1, 0);
2369 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
2370 #endif
2371 add_stub(common, CMP(SLJIT_LESS, STACK_TOP, 0, STACK_LIMIT, 0));
2374 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
2376 DEFINE_COMPILER;
2378 SLJIT_ASSERT(size > 0);
2379 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2382 static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size)
2384 DEFINE_COMPILER;
2385 sljit_uw *result;
2387 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
2388 return NULL;
2390 result = (sljit_uw *)SLJIT_MALLOC(size + sizeof(sljit_uw), compiler->allocator_data);
2391 if (SLJIT_UNLIKELY(result == NULL))
2393 sljit_set_compiler_memory_error(compiler);
2394 return NULL;
2397 *(void**)result = common->read_only_data_head;
2398 common->read_only_data_head = (void *)result;
2399 return result + 1;
2402 static void free_read_only_data(void *current, void *allocator_data)
2404 void *next;
2406 SLJIT_UNUSED_ARG(allocator_data);
2408 while (current != NULL)
2410 next = *(void**)current;
2411 SLJIT_FREE(current, allocator_data);
2412 current = next;
2416 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
2418 DEFINE_COMPILER;
2419 struct sljit_label *loop;
2420 int i;
2422 /* At this point we can freely use all temporary registers. */
2423 SLJIT_ASSERT(length > 1);
2424 /* TMP1 returns with begin - 1. */
2425 OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
2426 if (length < 8)
2428 for (i = 1; i < length; i++)
2429 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), SLJIT_R0, 0);
2431 else
2433 if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw)) == SLJIT_SUCCESS)
2435 GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START);
2436 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
2437 loop = LABEL();
2438 sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw));
2439 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
2440 JUMPTO(SLJIT_NOT_ZERO, loop);
2442 else
2444 GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START + sizeof(sljit_sw));
2445 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
2446 loop = LABEL();
2447 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R0, 0);
2448 OP2(SLJIT_ADD, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, sizeof(sljit_sw));
2449 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
2450 JUMPTO(SLJIT_NOT_ZERO, loop);
2455 static SLJIT_INLINE void reset_fast_fail(compiler_common *common)
2457 DEFINE_COMPILER;
2458 sljit_s32 i;
2460 SLJIT_ASSERT(common->fast_fail_start_ptr < common->fast_fail_end_ptr);
2462 OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2463 for (i = common->fast_fail_start_ptr; i < common->fast_fail_end_ptr; i += sizeof(sljit_sw))
2464 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), i, TMP1, 0);
2467 static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
2469 DEFINE_COMPILER;
2470 struct sljit_label *loop;
2471 int i;
2473 SLJIT_ASSERT(length > 1);
2474 /* OVECTOR(1) contains the "string begin - 1" constant. */
2475 if (length > 2)
2476 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
2477 if (length < 8)
2479 for (i = 2; i < length; i++)
2480 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), TMP1, 0);
2482 else
2484 if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw)) == SLJIT_SUCCESS)
2486 GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
2487 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2488 loop = LABEL();
2489 sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
2490 OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2491 JUMPTO(SLJIT_NOT_ZERO, loop);
2493 else
2495 GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + 2 * sizeof(sljit_sw));
2496 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2497 loop = LABEL();
2498 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0);
2499 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(sljit_sw));
2500 OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2501 JUMPTO(SLJIT_NOT_ZERO, loop);
2505 OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
2506 if (common->mark_ptr != 0)
2507 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
2508 if (common->control_head_ptr != 0)
2509 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
2510 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
2511 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
2512 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, end));
2515 static sljit_sw SLJIT_FUNC do_search_mark(sljit_sw *current, const pcre_uchar *skip_arg)
2517 while (current != NULL)
2519 switch (current[1])
2521 case type_then_trap:
2522 break;
2524 case type_mark:
2525 if (STRCMP_UC_UC(skip_arg, (pcre_uchar *)current[2]) == 0)
2526 return current[3];
2527 break;
2529 default:
2530 SLJIT_UNREACHABLE();
2531 break;
2533 SLJIT_ASSERT(current[0] == 0 || current < (sljit_sw*)current[0]);
2534 current = (sljit_sw*)current[0];
2536 return 0;
2539 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
2541 DEFINE_COMPILER;
2542 struct sljit_label *loop;
2543 struct sljit_jump *early_quit;
2544 BOOL has_pre;
2546 /* At this point we can freely use all registers. */
2547 OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
2548 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0);
2550 OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
2551 if (common->mark_ptr != 0)
2552 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2553 OP1(SLJIT_MOV_S32, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offset_count));
2554 if (common->mark_ptr != 0)
2555 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0);
2556 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
2557 OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, begin));
2559 has_pre = sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw)) == SLJIT_SUCCESS;
2560 GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START - (has_pre ? sizeof(sljit_sw) : 0));
2562 /* Unlikely, but possible */
2563 early_quit = CMP(SLJIT_EQUAL, SLJIT_R1, 0, SLJIT_IMM, 0);
2564 loop = LABEL();
2566 if (has_pre)
2567 sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw));
2568 else
2570 OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0);
2571 OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw));
2574 OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, sizeof(int));
2575 OP2(SLJIT_SUB, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_R0, 0);
2576 /* Copy the integer value to the output buffer */
2577 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2578 OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
2579 #endif
2581 OP1(SLJIT_MOV_S32, SLJIT_MEM1(SLJIT_R2), 0, SLJIT_S1, 0);
2582 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2583 JUMPTO(SLJIT_NOT_ZERO, loop);
2584 JUMPHERE(early_quit);
2586 /* Calculate the return value, which is the maximum ovector value. */
2587 if (topbracket > 1)
2589 if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw))) == SLJIT_SUCCESS)
2591 GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
2592 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
2594 /* OVECTOR(0) is never equal to SLJIT_S2. */
2595 loop = LABEL();
2596 sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw)));
2597 OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2598 CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
2600 else
2602 GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + (topbracket - 1) * 2 * sizeof(sljit_sw));
2603 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
2605 /* OVECTOR(0) is never equal to SLJIT_S2. */
2606 loop = LABEL();
2607 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), 0);
2608 OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 2 * (sljit_sw)sizeof(sljit_sw));
2609 OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2610 CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
2612 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
2614 else
2615 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
2618 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
2620 DEFINE_COMPILER;
2621 struct sljit_jump *jump;
2623 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S1, str_end_must_be_saved_reg2);
2624 SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
2625 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
2627 OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0);
2628 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
2629 OP1(SLJIT_MOV_S32, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, real_offset_count));
2630 CMPTO(SLJIT_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 2, quit);
2632 /* Store match begin and end. */
2633 OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, begin));
2634 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, offsets));
2636 jump = CMP(SLJIT_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 3);
2637 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_S0, 0);
2638 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2639 OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
2640 #endif
2641 OP1(SLJIT_MOV_S32, SLJIT_MEM1(SLJIT_R1), 2 * sizeof(int), SLJIT_R2, 0);
2642 JUMPHERE(jump);
2644 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
2645 OP2(SLJIT_SUB, SLJIT_S1, 0, STR_END, 0, SLJIT_S0, 0);
2646 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2647 OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
2648 #endif
2649 OP1(SLJIT_MOV_S32, SLJIT_MEM1(SLJIT_R1), sizeof(int), SLJIT_S1, 0);
2651 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_S0, 0);
2652 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2653 OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
2654 #endif
2655 OP1(SLJIT_MOV_S32, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R2, 0);
2657 JUMPTO(SLJIT_JUMP, quit);
2660 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
2662 /* May destroy TMP1. */
2663 DEFINE_COMPILER;
2664 struct sljit_jump *jump;
2666 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2668 /* The value of -1 must be kept for start_used_ptr! */
2669 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, 1);
2670 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
2671 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
2672 jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
2673 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2674 JUMPHERE(jump);
2676 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
2678 jump = CMP(SLJIT_LESS_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2679 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2680 JUMPHERE(jump);
2684 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar *cc)
2686 /* Detects if the character has an othercase. */
2687 unsigned int c;
2689 #ifdef SUPPORT_UTF
2690 if (common->utf)
2692 GETCHAR(c, cc);
2693 if (c > 127)
2695 #ifdef SUPPORT_UCP
2696 return c != UCD_OTHERCASE(c);
2697 #else
2698 return FALSE;
2699 #endif
2701 #ifndef COMPILE_PCRE8
2702 return common->fcc[c] != c;
2703 #endif
2705 else
2706 #endif
2707 c = *cc;
2708 return MAX_255(c) ? common->fcc[c] != c : FALSE;
2711 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
2713 /* Returns with the othercase. */
2714 #ifdef SUPPORT_UTF
2715 if (common->utf && c > 127)
2717 #ifdef SUPPORT_UCP
2718 return UCD_OTHERCASE(c);
2719 #else
2720 return c;
2721 #endif
2723 #endif
2724 return TABLE_GET(c, common->fcc, c);
2727 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar *cc)
2729 /* Detects if the character and its othercase has only 1 bit difference. */
2730 unsigned int c, oc, bit;
2731 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2732 int n;
2733 #endif
2735 #ifdef SUPPORT_UTF
2736 if (common->utf)
2738 GETCHAR(c, cc);
2739 if (c <= 127)
2740 oc = common->fcc[c];
2741 else
2743 #ifdef SUPPORT_UCP
2744 oc = UCD_OTHERCASE(c);
2745 #else
2746 oc = c;
2747 #endif
2750 else
2752 c = *cc;
2753 oc = TABLE_GET(c, common->fcc, c);
2755 #else
2756 c = *cc;
2757 oc = TABLE_GET(c, common->fcc, c);
2758 #endif
2760 SLJIT_ASSERT(c != oc);
2762 bit = c ^ oc;
2763 /* Optimized for English alphabet. */
2764 if (c <= 127 && bit == 0x20)
2765 return (0 << 8) | 0x20;
2767 /* Since c != oc, they must have at least 1 bit difference. */
2768 if (!is_powerof2(bit))
2769 return 0;
2771 #if defined COMPILE_PCRE8
2773 #ifdef SUPPORT_UTF
2774 if (common->utf && c > 127)
2776 n = GET_EXTRALEN(*cc);
2777 while ((bit & 0x3f) == 0)
2779 n--;
2780 bit >>= 6;
2782 return (n << 8) | bit;
2784 #endif /* SUPPORT_UTF */
2785 return (0 << 8) | bit;
2787 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2789 #ifdef SUPPORT_UTF
2790 if (common->utf && c > 65535)
2792 if (bit >= (1 << 10))
2793 bit >>= 10;
2794 else
2795 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2797 #endif /* SUPPORT_UTF */
2798 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2800 #endif /* COMPILE_PCRE[8|16|32] */
2803 static void check_partial(compiler_common *common, BOOL force)
2805 /* Checks whether a partial matching is occurred. Does not modify registers. */
2806 DEFINE_COMPILER;
2807 struct sljit_jump *jump = NULL;
2809 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2811 if (common->mode == JIT_COMPILE)
2812 return;
2814 if (!force)
2815 jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2816 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2817 jump = CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
2819 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2820 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2821 else
2823 if (common->partialmatchlabel != NULL)
2824 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2825 else
2826 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2829 if (jump != NULL)
2830 JUMPHERE(jump);
2833 static void check_str_end(compiler_common *common, jump_list **end_reached)
2835 /* Does not affect registers. Usually used in a tight spot. */
2836 DEFINE_COMPILER;
2837 struct sljit_jump *jump;
2839 if (common->mode == JIT_COMPILE)
2841 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2842 return;
2845 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
2846 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2848 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2849 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2850 add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
2852 else
2854 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2855 if (common->partialmatchlabel != NULL)
2856 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2857 else
2858 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2860 JUMPHERE(jump);
2863 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2865 DEFINE_COMPILER;
2866 struct sljit_jump *jump;
2868 if (common->mode == JIT_COMPILE)
2870 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2871 return;
2874 /* Partial matching mode. */
2875 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
2876 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2877 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2879 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2880 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2882 else
2884 if (common->partialmatchlabel != NULL)
2885 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2886 else
2887 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2889 JUMPHERE(jump);
2892 static void peek_char(compiler_common *common, sljit_u32 max)
2894 /* Reads the character into TMP1, keeps STR_PTR.
2895 Does not check STR_END. TMP2 Destroyed. */
2896 DEFINE_COMPILER;
2897 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2898 struct sljit_jump *jump;
2899 #endif
2901 SLJIT_UNUSED_ARG(max);
2903 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2904 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2905 if (common->utf)
2907 if (max < 128) return;
2909 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2910 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2911 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2912 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2913 JUMPHERE(jump);
2915 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2917 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2918 if (common->utf)
2920 if (max < 0xd800) return;
2922 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2923 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2924 /* TMP2 contains the high surrogate. */
2925 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2926 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2927 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2928 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2929 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2930 JUMPHERE(jump);
2932 #endif
2935 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2937 static BOOL is_char7_bitset(const sljit_u8 *bitset, BOOL nclass)
2939 /* Tells whether the character codes below 128 are enough
2940 to determine a match. */
2941 const sljit_u8 value = nclass ? 0xff : 0;
2942 const sljit_u8 *end = bitset + 32;
2944 bitset += 16;
2947 if (*bitset++ != value)
2948 return FALSE;
2950 while (bitset < end);
2951 return TRUE;
2954 static void read_char7_type(compiler_common *common, BOOL full_read)
2956 /* Reads the precise character type of a character into TMP1, if the character
2957 is less than 128. Otherwise it returns with zero. Does not check STR_END. The
2958 full_read argument tells whether characters above max are accepted or not. */
2959 DEFINE_COMPILER;
2960 struct sljit_jump *jump;
2962 SLJIT_ASSERT(common->utf);
2964 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2965 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2967 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2969 if (full_read)
2971 jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2972 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2973 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2974 JUMPHERE(jump);
2978 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2980 static void read_char_range(compiler_common *common, sljit_u32 min, sljit_u32 max, BOOL update_str_ptr)
2982 /* Reads the precise value of a character into TMP1, if the character is
2983 between min and max (c >= min && c <= max). Otherwise it returns with a value
2984 outside the range. Does not check STR_END. */
2985 DEFINE_COMPILER;
2986 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2987 struct sljit_jump *jump;
2988 #endif
2989 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2990 struct sljit_jump *jump2;
2991 #endif
2993 SLJIT_UNUSED_ARG(update_str_ptr);
2994 SLJIT_UNUSED_ARG(min);
2995 SLJIT_UNUSED_ARG(max);
2996 SLJIT_ASSERT(min <= max);
2998 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2999 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3001 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3002 if (common->utf)
3004 if (max < 128 && !update_str_ptr) return;
3006 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3007 if (min >= 0x10000)
3009 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
3010 if (update_str_ptr)
3011 OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3012 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3013 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
3014 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3015 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3016 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3017 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3018 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3019 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3020 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3021 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
3022 if (!update_str_ptr)
3023 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
3024 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3025 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3026 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3027 JUMPHERE(jump2);
3028 if (update_str_ptr)
3029 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
3031 else if (min >= 0x800 && max <= 0xffff)
3033 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
3034 if (update_str_ptr)
3035 OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3036 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3037 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
3038 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3039 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3040 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3041 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3042 if (!update_str_ptr)
3043 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3044 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3045 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3046 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3047 JUMPHERE(jump2);
3048 if (update_str_ptr)
3049 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
3051 else if (max >= 0x800)
3052 add_jump(compiler, (max < 0x10000) ? &common->utfreadchar16 : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
3053 else if (max < 128)
3055 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3056 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3058 else
3060 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3061 if (!update_str_ptr)
3062 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3063 else
3064 OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3065 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3066 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3067 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3068 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3069 if (update_str_ptr)
3070 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
3072 JUMPHERE(jump);
3074 #endif
3076 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
3077 if (common->utf)
3079 if (max >= 0x10000)
3081 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3082 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
3083 /* TMP2 contains the high surrogate. */
3084 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3085 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
3086 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
3087 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3088 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
3089 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3090 JUMPHERE(jump);
3091 return;
3094 if (max < 0xd800 && !update_str_ptr) return;
3096 /* Skip low surrogate if necessary. */
3097 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3098 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
3099 if (update_str_ptr)
3100 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3101 if (max >= 0xd800)
3102 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
3103 JUMPHERE(jump);
3105 #endif
3108 static SLJIT_INLINE void read_char(compiler_common *common)
3110 read_char_range(common, 0, READ_CHAR_MAX, TRUE);
3113 static void read_char8_type(compiler_common *common, BOOL update_str_ptr)
3115 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
3116 DEFINE_COMPILER;
3117 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3118 struct sljit_jump *jump;
3119 #endif
3120 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3121 struct sljit_jump *jump2;
3122 #endif
3124 SLJIT_UNUSED_ARG(update_str_ptr);
3126 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
3127 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3129 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3130 if (common->utf)
3132 /* This can be an extra read in some situations, but hopefully
3133 it is needed in most cases. */
3134 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3135 jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
3136 if (!update_str_ptr)
3138 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3139 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3140 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3141 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3142 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3143 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
3144 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3145 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
3146 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3147 JUMPHERE(jump2);
3149 else
3150 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
3151 JUMPHERE(jump);
3152 return;
3154 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
3156 #if !defined COMPILE_PCRE8
3157 /* The ctypes array contains only 256 values. */
3158 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3159 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
3160 #endif
3161 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3162 #if !defined COMPILE_PCRE8
3163 JUMPHERE(jump);
3164 #endif
3166 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
3167 if (common->utf && update_str_ptr)
3169 /* Skip low surrogate if necessary. */
3170 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
3171 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
3172 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3173 JUMPHERE(jump);
3175 #endif /* SUPPORT_UTF && COMPILE_PCRE16 */
3178 static void skip_char_back(compiler_common *common)
3180 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
3181 DEFINE_COMPILER;
3182 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3183 #if defined COMPILE_PCRE8
3184 struct sljit_label *label;
3186 if (common->utf)
3188 label = LABEL();
3189 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
3190 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3191 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
3192 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
3193 return;
3195 #elif defined COMPILE_PCRE16
3196 if (common->utf)
3198 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
3199 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3200 /* Skip low surrogate if necessary. */
3201 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3202 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
3203 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
3204 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3205 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3206 return;
3208 #endif /* COMPILE_PCRE[8|16] */
3209 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
3210 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3213 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
3215 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
3216 DEFINE_COMPILER;
3217 struct sljit_jump *jump;
3219 if (nltype == NLTYPE_ANY)
3221 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
3222 sljit_set_current_flags(compiler, SLJIT_SET_Z);
3223 add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_NOT_ZERO : SLJIT_ZERO));
3225 else if (nltype == NLTYPE_ANYCRLF)
3227 if (jumpifmatch)
3229 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
3230 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
3232 else
3234 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3235 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
3236 JUMPHERE(jump);
3239 else
3241 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
3242 add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
3246 #ifdef SUPPORT_UTF
3248 #if defined COMPILE_PCRE8
3249 static void do_utfreadchar(compiler_common *common)
3251 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
3252 of the character (>= 0xc0). Return char value in TMP1, length in TMP2. */
3253 DEFINE_COMPILER;
3254 struct sljit_jump *jump;
3256 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3257 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3258 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3259 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3260 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3261 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3263 /* Searching for the first zero. */
3264 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
3265 jump = JUMP(SLJIT_NOT_ZERO);
3266 /* Two byte sequence. */
3267 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3268 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
3269 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3271 JUMPHERE(jump);
3272 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3273 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
3274 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3275 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3276 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3278 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
3279 jump = JUMP(SLJIT_NOT_ZERO);
3280 /* Three byte sequence. */
3281 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3282 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
3283 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3285 /* Four byte sequence. */
3286 JUMPHERE(jump);
3287 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
3288 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
3289 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3290 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
3291 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3292 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3293 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(4));
3294 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3297 static void do_utfreadchar16(compiler_common *common)
3299 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
3300 of the character (>= 0xc0). Return value in TMP1. */
3301 DEFINE_COMPILER;
3302 struct sljit_jump *jump;
3304 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3305 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3306 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3307 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3308 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3309 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3311 /* Searching for the first zero. */
3312 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
3313 jump = JUMP(SLJIT_NOT_ZERO);
3314 /* Two byte sequence. */
3315 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3316 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3318 JUMPHERE(jump);
3319 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
3320 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO);
3321 /* This code runs only in 8 bit mode. No need to shift the value. */
3322 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3323 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3324 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
3325 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3326 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3327 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3328 /* Three byte sequence. */
3329 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3330 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3333 static void do_utfreadtype8(compiler_common *common)
3335 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
3336 of the character (>= 0xc0). Return value in TMP1. */
3337 DEFINE_COMPILER;
3338 struct sljit_jump *jump;
3339 struct sljit_jump *compare;
3341 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3343 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
3344 jump = JUMP(SLJIT_NOT_ZERO);
3345 /* Two byte sequence. */
3346 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3347 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3348 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
3349 /* The upper 5 bits are known at this point. */
3350 compare = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
3351 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3352 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3353 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
3354 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3355 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3357 JUMPHERE(compare);
3358 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3359 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3361 /* We only have types for characters less than 256. */
3362 JUMPHERE(jump);
3363 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3364 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3365 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3366 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3369 #endif /* COMPILE_PCRE8 */
3371 #endif /* SUPPORT_UTF */
3373 #ifdef SUPPORT_UCP
3375 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
3376 #define UCD_BLOCK_MASK 127
3377 #define UCD_BLOCK_SHIFT 7
3379 static void do_getucd(compiler_common *common)
3381 /* Search the UCD record for the character comes in TMP1.
3382 Returns chartype in TMP1 and UCD offset in TMP2. */
3383 DEFINE_COMPILER;
3384 #ifdef COMPILE_PCRE32
3385 struct sljit_jump *jump;
3386 #endif
3388 #if defined SLJIT_DEBUG && SLJIT_DEBUG
3389 /* dummy_ucd_record */
3390 const ucd_record *record = GET_UCD(INVALID_UTF_CHAR);
3391 SLJIT_ASSERT(record->script == ucp_Common && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther);
3392 SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0);
3393 #endif
3395 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
3397 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3399 #ifdef COMPILE_PCRE32
3400 if (!common->utf)
3402 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10ffff + 1);
3403 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
3404 JUMPHERE(jump);
3406 #endif
3408 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3409 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
3410 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
3411 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3412 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3413 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
3414 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
3415 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
3416 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
3417 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3419 #endif
3421 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf)
3423 DEFINE_COMPILER;
3424 struct sljit_label *mainloop;
3425 struct sljit_label *newlinelabel = NULL;
3426 struct sljit_jump *start;
3427 struct sljit_jump *end = NULL;
3428 struct sljit_jump *end2 = NULL;
3429 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3430 struct sljit_jump *singlechar;
3431 #endif
3432 jump_list *newline = NULL;
3433 BOOL newlinecheck = FALSE;
3434 BOOL readuchar = FALSE;
3436 if (!(hascrorlf || (common->match_end_ptr != 0)) &&
3437 (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
3438 newlinecheck = TRUE;
3440 if (common->match_end_ptr != 0)
3442 /* Search for the end of the first line. */
3443 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
3445 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3447 mainloop = LABEL();
3448 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3449 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3450 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3451 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3452 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
3453 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
3454 JUMPHERE(end);
3455 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3457 else
3459 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3460 mainloop = LABEL();
3461 /* Continual stores does not cause data dependency. */
3462 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
3463 read_char_range(common, common->nlmin, common->nlmax, TRUE);
3464 check_newlinechar(common, common->nltype, &newline, TRUE);
3465 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop);
3466 JUMPHERE(end);
3467 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
3468 set_jumps(newline, LABEL());
3471 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
3474 start = JUMP(SLJIT_JUMP);
3476 if (newlinecheck)
3478 newlinelabel = LABEL();
3479 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3480 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3481 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3482 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
3483 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
3484 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3485 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3486 #endif
3487 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3488 end2 = JUMP(SLJIT_JUMP);
3491 mainloop = LABEL();
3493 /* Increasing the STR_PTR here requires one less jump in the most common case. */
3494 #ifdef SUPPORT_UTF
3495 if (common->utf) readuchar = TRUE;
3496 #endif
3497 if (newlinecheck) readuchar = TRUE;
3499 if (readuchar)
3500 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3502 if (newlinecheck)
3503 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
3505 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3506 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3507 #if defined COMPILE_PCRE8
3508 if (common->utf)
3510 singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3511 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3512 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3513 JUMPHERE(singlechar);
3515 #elif defined COMPILE_PCRE16
3516 if (common->utf)
3518 singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3519 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3520 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3521 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
3522 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3523 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3524 JUMPHERE(singlechar);
3526 #endif /* COMPILE_PCRE[8|16] */
3527 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
3528 JUMPHERE(start);
3530 if (newlinecheck)
3532 JUMPHERE(end);
3533 JUMPHERE(end2);
3536 return mainloop;
3539 #define MAX_N_CHARS 16
3540 #define MAX_DIFF_CHARS 6
3542 static SLJIT_INLINE void add_prefix_char(pcre_uchar chr, pcre_uchar *chars)
3544 pcre_uchar i, len;
3546 len = chars[0];
3547 if (len == 255)
3548 return;
3550 if (len == 0)
3552 chars[0] = 1;
3553 chars[1] = chr;
3554 return;
3557 for (i = len; i > 0; i--)
3558 if (chars[i] == chr)
3559 return;
3561 if (len >= MAX_DIFF_CHARS - 1)
3563 chars[0] = 255;
3564 return;
3567 len++;
3568 chars[len] = chr;
3569 chars[0] = len;
3572 static int scan_prefix(compiler_common *common, pcre_uchar *cc, pcre_uchar *chars, int max_chars, sljit_u32 *rec_count)
3574 /* Recursive function, which scans prefix literals. */
3575 BOOL last, any, class, caseless;
3576 int len, repeat, len_save, consumed = 0;
3577 sljit_u32 chr; /* Any unicode character. */
3578 sljit_u8 *bytes, *bytes_end, byte;
3579 pcre_uchar *alternative, *cc_save, *oc;
3580 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3581 pcre_uchar othercase[8];
3582 #elif defined SUPPORT_UTF && defined COMPILE_PCRE16
3583 pcre_uchar othercase[2];
3584 #else
3585 pcre_uchar othercase[1];
3586 #endif
3588 repeat = 1;
3589 while (TRUE)
3591 if (*rec_count == 0)
3592 return 0;
3593 (*rec_count)--;
3595 last = TRUE;
3596 any = FALSE;
3597 class = FALSE;
3598 caseless = FALSE;
3600 switch (*cc)
3602 case OP_CHARI:
3603 caseless = TRUE;
3604 case OP_CHAR:
3605 last = FALSE;
3606 cc++;
3607 break;
3609 case OP_SOD:
3610 case OP_SOM:
3611 case OP_SET_SOM:
3612 case OP_NOT_WORD_BOUNDARY:
3613 case OP_WORD_BOUNDARY:
3614 case OP_EODN:
3615 case OP_EOD:
3616 case OP_CIRC:
3617 case OP_CIRCM:
3618 case OP_DOLL:
3619 case OP_DOLLM:
3620 /* Zero width assertions. */
3621 cc++;
3622 continue;
3624 case OP_ASSERT:
3625 case OP_ASSERT_NOT:
3626 case OP_ASSERTBACK:
3627 case OP_ASSERTBACK_NOT:
3628 cc = bracketend(cc);
3629 continue;
3631 case OP_PLUSI:
3632 case OP_MINPLUSI:
3633 case OP_POSPLUSI:
3634 caseless = TRUE;
3635 case OP_PLUS:
3636 case OP_MINPLUS:
3637 case OP_POSPLUS:
3638 cc++;
3639 break;
3641 case OP_EXACTI:
3642 caseless = TRUE;
3643 case OP_EXACT:
3644 repeat = GET2(cc, 1);
3645 last = FALSE;
3646 cc += 1 + IMM2_SIZE;
3647 break;
3649 case OP_QUERYI:
3650 case OP_MINQUERYI:
3651 case OP_POSQUERYI:
3652 caseless = TRUE;
3653 case OP_QUERY:
3654 case OP_MINQUERY:
3655 case OP_POSQUERY:
3656 len = 1;
3657 cc++;
3658 #ifdef SUPPORT_UTF
3659 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3660 #endif
3661 max_chars = scan_prefix(common, cc + len, chars, max_chars, rec_count);
3662 if (max_chars == 0)
3663 return consumed;
3664 last = FALSE;
3665 break;
3667 case OP_KET:
3668 cc += 1 + LINK_SIZE;
3669 continue;
3671 case OP_ALT:
3672 cc += GET(cc, 1);
3673 continue;
3675 case OP_ONCE:
3676 case OP_ONCE_NC:
3677 case OP_BRA:
3678 case OP_BRAPOS:
3679 case OP_CBRA:
3680 case OP_CBRAPOS:
3681 alternative = cc + GET(cc, 1);
3682 while (*alternative == OP_ALT)
3684 max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, max_chars, rec_count);
3685 if (max_chars == 0)
3686 return consumed;
3687 alternative += GET(alternative, 1);
3690 if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
3691 cc += IMM2_SIZE;
3692 cc += 1 + LINK_SIZE;
3693 continue;
3695 case OP_CLASS:
3696 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3697 if (common->utf && !is_char7_bitset((const sljit_u8 *)(cc + 1), FALSE))
3698 return consumed;
3699 #endif
3700 class = TRUE;
3701 break;
3703 case OP_NCLASS:
3704 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3705 if (common->utf) return consumed;
3706 #endif
3707 class = TRUE;
3708 break;
3710 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3711 case OP_XCLASS:
3712 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3713 if (common->utf) return consumed;
3714 #endif
3715 any = TRUE;
3716 cc += GET(cc, 1);
3717 break;
3718 #endif
3720 case OP_DIGIT:
3721 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3722 if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
3723 return consumed;
3724 #endif
3725 any = TRUE;
3726 cc++;
3727 break;
3729 case OP_WHITESPACE:
3730 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3731 if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_space, FALSE))
3732 return consumed;
3733 #endif
3734 any = TRUE;
3735 cc++;
3736 break;
3738 case OP_WORDCHAR:
3739 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3740 if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_word, FALSE))
3741 return consumed;
3742 #endif
3743 any = TRUE;
3744 cc++;
3745 break;
3747 case OP_NOT:
3748 case OP_NOTI:
3749 cc++;
3750 /* Fall through. */
3751 case OP_NOT_DIGIT:
3752 case OP_NOT_WHITESPACE:
3753 case OP_NOT_WORDCHAR:
3754 case OP_ANY:
3755 case OP_ALLANY:
3756 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3757 if (common->utf) return consumed;
3758 #endif
3759 any = TRUE;
3760 cc++;
3761 break;
3763 #ifdef SUPPORT_UTF
3764 case OP_NOTPROP:
3765 case OP_PROP:
3766 #ifndef COMPILE_PCRE32
3767 if (common->utf) return consumed;
3768 #endif
3769 any = TRUE;
3770 cc += 1 + 2;
3771 break;
3772 #endif
3774 case OP_TYPEEXACT:
3775 repeat = GET2(cc, 1);
3776 cc += 1 + IMM2_SIZE;
3777 continue;
3779 case OP_NOTEXACT:
3780 case OP_NOTEXACTI:
3781 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3782 if (common->utf) return consumed;
3783 #endif
3784 any = TRUE;
3785 repeat = GET2(cc, 1);
3786 cc += 1 + IMM2_SIZE + 1;
3787 break;
3789 default:
3790 return consumed;
3793 if (any)
3797 chars[0] = 255;
3799 consumed++;
3800 if (--max_chars == 0)
3801 return consumed;
3802 chars += MAX_DIFF_CHARS;
3804 while (--repeat > 0);
3806 repeat = 1;
3807 continue;
3810 if (class)
3812 bytes = (sljit_u8*) (cc + 1);
3813 cc += 1 + 32 / sizeof(pcre_uchar);
3815 switch (*cc)
3817 case OP_CRSTAR:
3818 case OP_CRMINSTAR:
3819 case OP_CRPOSSTAR:
3820 case OP_CRQUERY:
3821 case OP_CRMINQUERY:
3822 case OP_CRPOSQUERY:
3823 max_chars = scan_prefix(common, cc + 1, chars, max_chars, rec_count);
3824 if (max_chars == 0)
3825 return consumed;
3826 break;
3828 default:
3829 case OP_CRPLUS:
3830 case OP_CRMINPLUS:
3831 case OP_CRPOSPLUS:
3832 break;
3834 case OP_CRRANGE:
3835 case OP_CRMINRANGE:
3836 case OP_CRPOSRANGE:
3837 repeat = GET2(cc, 1);
3838 if (repeat <= 0)
3839 return consumed;
3840 break;
3845 if (bytes[31] & 0x80)
3846 chars[0] = 255;
3847 else if (chars[0] != 255)
3849 bytes_end = bytes + 32;
3850 chr = 0;
3853 byte = *bytes++;
3854 SLJIT_ASSERT((chr & 0x7) == 0);
3855 if (byte == 0)
3856 chr += 8;
3857 else
3861 if ((byte & 0x1) != 0)
3862 add_prefix_char(chr, chars);
3863 byte >>= 1;
3864 chr++;
3866 while (byte != 0);
3867 chr = (chr + 7) & ~7;
3870 while (chars[0] != 255 && bytes < bytes_end);
3871 bytes = bytes_end - 32;
3874 consumed++;
3875 if (--max_chars == 0)
3876 return consumed;
3877 chars += MAX_DIFF_CHARS;
3879 while (--repeat > 0);
3881 switch (*cc)
3883 case OP_CRSTAR:
3884 case OP_CRMINSTAR:
3885 case OP_CRPOSSTAR:
3886 return consumed;
3888 case OP_CRQUERY:
3889 case OP_CRMINQUERY:
3890 case OP_CRPOSQUERY:
3891 cc++;
3892 break;
3894 case OP_CRRANGE:
3895 case OP_CRMINRANGE:
3896 case OP_CRPOSRANGE:
3897 if (GET2(cc, 1) != GET2(cc, 1 + IMM2_SIZE))
3898 return consumed;
3899 cc += 1 + 2 * IMM2_SIZE;
3900 break;
3903 repeat = 1;
3904 continue;
3907 len = 1;
3908 #ifdef SUPPORT_UTF
3909 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3910 #endif
3912 if (caseless && char_has_othercase(common, cc))
3914 #ifdef SUPPORT_UTF
3915 if (common->utf)
3917 GETCHAR(chr, cc);
3918 if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len)
3919 return consumed;
3921 else
3922 #endif
3924 chr = *cc;
3925 othercase[0] = TABLE_GET(chr, common->fcc, chr);
3928 else
3930 caseless = FALSE;
3931 othercase[0] = 0; /* Stops compiler warning - PH */
3934 len_save = len;
3935 cc_save = cc;
3936 while (TRUE)
3938 oc = othercase;
3941 chr = *cc;
3942 add_prefix_char(*cc, chars);
3944 if (caseless)
3945 add_prefix_char(*oc, chars);
3947 len--;
3948 consumed++;
3949 if (--max_chars == 0)
3950 return consumed;
3951 chars += MAX_DIFF_CHARS;
3952 cc++;
3953 oc++;
3955 while (len > 0);
3957 if (--repeat == 0)
3958 break;
3960 len = len_save;
3961 cc = cc_save;
3964 repeat = 1;
3965 if (last)
3966 return consumed;
3970 #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) && !(defined SUPPORT_VALGRIND)
3972 static sljit_s32 character_to_int32(pcre_uchar chr)
3974 sljit_s32 value = (sljit_s32)chr;
3975 #if defined COMPILE_PCRE8
3976 #define SSE2_COMPARE_TYPE_INDEX 0
3977 return ((unsigned int)value << 24) | ((unsigned int)value << 16) | ((unsigned int)value << 8) | (unsigned int)value;
3978 #elif defined COMPILE_PCRE16
3979 #define SSE2_COMPARE_TYPE_INDEX 1
3980 return ((unsigned int)value << 16) | value;
3981 #elif defined COMPILE_PCRE32
3982 #define SSE2_COMPARE_TYPE_INDEX 2
3983 return value;
3984 #else
3985 #error "Unsupported unit width"
3986 #endif
3989 static SLJIT_INLINE void fast_forward_first_char2_sse2(compiler_common *common, pcre_uchar char1, pcre_uchar char2)
3991 DEFINE_COMPILER;
3992 struct sljit_label *start;
3993 struct sljit_jump *quit[3];
3994 struct sljit_jump *nomatch;
3995 sljit_u8 instruction[8];
3996 sljit_s32 tmp1_ind = sljit_get_register_index(TMP1);
3997 sljit_s32 tmp2_ind = sljit_get_register_index(TMP2);
3998 sljit_s32 str_ptr_ind = sljit_get_register_index(STR_PTR);
3999 BOOL load_twice = FALSE;
4000 pcre_uchar bit;
4002 bit = char1 ^ char2;
4003 if (!is_powerof2(bit))
4004 bit = 0;
4006 if ((char1 != char2) && bit == 0)
4007 load_twice = TRUE;
4009 quit[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4011 /* First part (unaligned start) */
4013 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1 | bit));
4015 SLJIT_ASSERT(tmp1_ind < 8 && tmp2_ind == 1);
4017 /* MOVD xmm, r/m32 */
4018 instruction[0] = 0x66;
4019 instruction[1] = 0x0f;
4020 instruction[2] = 0x6e;
4021 instruction[3] = 0xc0 | (2 << 3) | tmp1_ind;
4022 sljit_emit_op_custom(compiler, instruction, 4);
4024 if (char1 != char2)
4026 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(bit != 0 ? bit : char2));
4028 /* MOVD xmm, r/m32 */
4029 instruction[3] = 0xc0 | (3 << 3) | tmp1_ind;
4030 sljit_emit_op_custom(compiler, instruction, 4);
4033 /* PSHUFD xmm1, xmm2/m128, imm8 */
4034 instruction[2] = 0x70;
4035 instruction[3] = 0xc0 | (2 << 3) | 2;
4036 instruction[4] = 0;
4037 sljit_emit_op_custom(compiler, instruction, 5);
4039 if (char1 != char2)
4041 /* PSHUFD xmm1, xmm2/m128, imm8 */
4042 instruction[3] = 0xc0 | (3 << 3) | 3;
4043 instruction[4] = 0;
4044 sljit_emit_op_custom(compiler, instruction, 5);
4047 OP2(SLJIT_AND, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 0xf);
4048 OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~0xf);
4050 /* MOVDQA xmm1, xmm2/m128 */
4051 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4053 if (str_ptr_ind < 8)
4055 instruction[2] = 0x6f;
4056 instruction[3] = (0 << 3) | str_ptr_ind;
4057 sljit_emit_op_custom(compiler, instruction, 4);
4059 if (load_twice)
4061 instruction[3] = (1 << 3) | str_ptr_ind;
4062 sljit_emit_op_custom(compiler, instruction, 4);
4065 else
4067 instruction[1] = 0x41;
4068 instruction[2] = 0x0f;
4069 instruction[3] = 0x6f;
4070 instruction[4] = (0 << 3) | (str_ptr_ind & 0x7);
4071 sljit_emit_op_custom(compiler, instruction, 5);
4073 if (load_twice)
4075 instruction[4] = (1 << 3) | str_ptr_ind;
4076 sljit_emit_op_custom(compiler, instruction, 5);
4078 instruction[1] = 0x0f;
4081 #else
4083 instruction[2] = 0x6f;
4084 instruction[3] = (0 << 3) | str_ptr_ind;
4085 sljit_emit_op_custom(compiler, instruction, 4);
4087 if (load_twice)
4089 instruction[3] = (1 << 3) | str_ptr_ind;
4090 sljit_emit_op_custom(compiler, instruction, 4);
4093 #endif
4095 if (bit != 0)
4097 /* POR xmm1, xmm2/m128 */
4098 instruction[2] = 0xeb;
4099 instruction[3] = 0xc0 | (0 << 3) | 3;
4100 sljit_emit_op_custom(compiler, instruction, 4);
4103 /* PCMPEQB/W/D xmm1, xmm2/m128 */
4104 instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX;
4105 instruction[3] = 0xc0 | (0 << 3) | 2;
4106 sljit_emit_op_custom(compiler, instruction, 4);
4108 if (load_twice)
4110 instruction[3] = 0xc0 | (1 << 3) | 3;
4111 sljit_emit_op_custom(compiler, instruction, 4);
4114 /* PMOVMSKB reg, xmm */
4115 instruction[2] = 0xd7;
4116 instruction[3] = 0xc0 | (tmp1_ind << 3) | 0;
4117 sljit_emit_op_custom(compiler, instruction, 4);
4119 if (load_twice)
4121 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP2, 0);
4122 instruction[3] = 0xc0 | (tmp2_ind << 3) | 1;
4123 sljit_emit_op_custom(compiler, instruction, 4);
4125 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4126 OP1(SLJIT_MOV, TMP2, 0, RETURN_ADDR, 0);
4129 OP2(SLJIT_ASHR, TMP1, 0, TMP1, 0, TMP2, 0);
4131 /* BSF r32, r/m32 */
4132 instruction[0] = 0x0f;
4133 instruction[1] = 0xbc;
4134 instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind;
4135 sljit_emit_op_custom(compiler, instruction, 3);
4136 sljit_set_current_flags(compiler, SLJIT_SET_Z);
4138 nomatch = JUMP(SLJIT_ZERO);
4140 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4141 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4142 quit[1] = JUMP(SLJIT_JUMP);
4144 JUMPHERE(nomatch);
4146 start = LABEL();
4147 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
4148 quit[2] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4150 /* Second part (aligned) */
4152 instruction[0] = 0x66;
4153 instruction[1] = 0x0f;
4155 /* MOVDQA xmm1, xmm2/m128 */
4156 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4158 if (str_ptr_ind < 8)
4160 instruction[2] = 0x6f;
4161 instruction[3] = (0 << 3) | str_ptr_ind;
4162 sljit_emit_op_custom(compiler, instruction, 4);
4164 if (load_twice)
4166 instruction[3] = (1 << 3) | str_ptr_ind;
4167 sljit_emit_op_custom(compiler, instruction, 4);
4170 else
4172 instruction[1] = 0x41;
4173 instruction[2] = 0x0f;
4174 instruction[3] = 0x6f;
4175 instruction[4] = (0 << 3) | (str_ptr_ind & 0x7);
4176 sljit_emit_op_custom(compiler, instruction, 5);
4178 if (load_twice)
4180 instruction[4] = (1 << 3) | str_ptr_ind;
4181 sljit_emit_op_custom(compiler, instruction, 5);
4183 instruction[1] = 0x0f;
4186 #else
4188 instruction[2] = 0x6f;
4189 instruction[3] = (0 << 3) | str_ptr_ind;
4190 sljit_emit_op_custom(compiler, instruction, 4);
4192 if (load_twice)
4194 instruction[3] = (1 << 3) | str_ptr_ind;
4195 sljit_emit_op_custom(compiler, instruction, 4);
4198 #endif
4200 if (bit != 0)
4202 /* POR xmm1, xmm2/m128 */
4203 instruction[2] = 0xeb;
4204 instruction[3] = 0xc0 | (0 << 3) | 3;
4205 sljit_emit_op_custom(compiler, instruction, 4);
4208 /* PCMPEQB/W/D xmm1, xmm2/m128 */
4209 instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX;
4210 instruction[3] = 0xc0 | (0 << 3) | 2;
4211 sljit_emit_op_custom(compiler, instruction, 4);
4213 if (load_twice)
4215 instruction[3] = 0xc0 | (1 << 3) | 3;
4216 sljit_emit_op_custom(compiler, instruction, 4);
4219 /* PMOVMSKB reg, xmm */
4220 instruction[2] = 0xd7;
4221 instruction[3] = 0xc0 | (tmp1_ind << 3) | 0;
4222 sljit_emit_op_custom(compiler, instruction, 4);
4224 if (load_twice)
4226 instruction[3] = 0xc0 | (tmp2_ind << 3) | 1;
4227 sljit_emit_op_custom(compiler, instruction, 4);
4229 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4232 /* BSF r32, r/m32 */
4233 instruction[0] = 0x0f;
4234 instruction[1] = 0xbc;
4235 instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind;
4236 sljit_emit_op_custom(compiler, instruction, 3);
4237 sljit_set_current_flags(compiler, SLJIT_SET_Z);
4239 JUMPTO(SLJIT_ZERO, start);
4241 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4243 start = LABEL();
4244 SET_LABEL(quit[0], start);
4245 SET_LABEL(quit[1], start);
4246 SET_LABEL(quit[2], start);
4249 #undef SSE2_COMPARE_TYPE_INDEX
4251 #endif
4253 static void fast_forward_first_char2(compiler_common *common, pcre_uchar char1, pcre_uchar char2, sljit_s32 offset)
4255 DEFINE_COMPILER;
4256 struct sljit_label *start;
4257 struct sljit_jump *quit;
4258 struct sljit_jump *found;
4259 pcre_uchar mask;
4260 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4261 struct sljit_label *utf_start = NULL;
4262 struct sljit_jump *utf_quit = NULL;
4263 #endif
4264 BOOL has_match_end = (common->match_end_ptr != 0);
4266 if (offset > 0)
4267 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
4269 if (has_match_end)
4271 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
4273 OP2(SLJIT_ADD, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, SLJIT_IMM, IN_UCHARS(offset + 1));
4274 OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_END, 0, TMP3, 0);
4275 sljit_emit_cmov(compiler, SLJIT_GREATER, STR_END, TMP3, 0);
4278 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4279 if (common->utf && offset > 0)
4280 utf_start = LABEL();
4281 #endif
4283 #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) && !(defined SUPPORT_VALGRIND)
4285 /* SSE2 accelerated first character search. */
4287 if (sljit_has_cpu_feature(SLJIT_HAS_SSE2))
4289 fast_forward_first_char2_sse2(common, char1, char2);
4291 SLJIT_ASSERT(common->mode == JIT_COMPILE || offset == 0);
4292 if (common->mode == JIT_COMPILE)
4294 /* In complete mode, we don't need to run a match when STR_PTR == STR_END. */
4295 SLJIT_ASSERT(common->forced_quit_label == NULL);
4296 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
4297 add_jump(compiler, &common->forced_quit, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4299 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4300 if (common->utf && offset > 0)
4302 SLJIT_ASSERT(common->mode == JIT_COMPILE);
4304 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset));
4305 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4306 #if defined COMPILE_PCRE8
4307 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4308 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, utf_start);
4309 #elif defined COMPILE_PCRE16
4310 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4311 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00, utf_start);
4312 #else
4313 #error "Unknown code width"
4314 #endif
4315 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4317 #endif
4319 if (offset > 0)
4320 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
4322 else
4324 OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0);
4325 if (has_match_end)
4327 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4328 sljit_emit_cmov(compiler, SLJIT_GREATER_EQUAL, STR_PTR, TMP1, 0);
4330 else
4331 sljit_emit_cmov(compiler, SLJIT_GREATER_EQUAL, STR_PTR, STR_END, 0);
4334 if (has_match_end)
4335 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4336 return;
4339 #endif
4341 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4343 start = LABEL();
4344 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4346 if (char1 == char2)
4347 found = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1);
4348 else
4350 mask = char1 ^ char2;
4351 if (is_powerof2(mask))
4353 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
4354 found = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1 | mask);
4356 else
4358 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char1);
4359 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
4360 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char2);
4361 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
4362 found = JUMP(SLJIT_NOT_ZERO);
4366 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4367 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, start);
4369 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4370 if (common->utf && offset > 0)
4371 utf_quit = JUMP(SLJIT_JUMP);
4372 #endif
4374 JUMPHERE(found);
4376 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4377 if (common->utf && offset > 0)
4379 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset));
4380 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4381 #if defined COMPILE_PCRE8
4382 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4383 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, utf_start);
4384 #elif defined COMPILE_PCRE16
4385 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4386 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00, utf_start);
4387 #else
4388 #error "Unknown code width"
4389 #endif
4390 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4391 JUMPHERE(utf_quit);
4393 #endif
4395 JUMPHERE(quit);
4397 if (has_match_end)
4399 quit = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
4400 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4401 if (offset > 0)
4402 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
4403 JUMPHERE(quit);
4404 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4407 if (offset > 0)
4408 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
4411 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common)
4413 DEFINE_COMPILER;
4414 struct sljit_label *start;
4415 struct sljit_jump *quit;
4416 struct sljit_jump *match;
4417 /* bytes[0] represent the number of characters between 0
4418 and MAX_N_BYTES - 1, 255 represents any character. */
4419 pcre_uchar chars[MAX_N_CHARS * MAX_DIFF_CHARS];
4420 sljit_s32 offset;
4421 pcre_uchar mask;
4422 pcre_uchar *char_set, *char_set_end;
4423 int i, max, from;
4424 int range_right = -1, range_len;
4425 sljit_u8 *update_table = NULL;
4426 BOOL in_range;
4427 sljit_u32 rec_count;
4429 for (i = 0; i < MAX_N_CHARS; i++)
4430 chars[i * MAX_DIFF_CHARS] = 0;
4432 rec_count = 10000;
4433 max = scan_prefix(common, common->start, chars, MAX_N_CHARS, &rec_count);
4435 if (max < 1)
4436 return FALSE;
4438 in_range = FALSE;
4439 /* Prevent compiler "uninitialized" warning */
4440 from = 0;
4441 range_len = 4 /* minimum length */ - 1;
4442 for (i = 0; i <= max; i++)
4444 if (in_range && (i - from) > range_len && (chars[(i - 1) * MAX_DIFF_CHARS] < 255))
4446 range_len = i - from;
4447 range_right = i - 1;
4450 if (i < max && chars[i * MAX_DIFF_CHARS] < 255)
4452 SLJIT_ASSERT(chars[i * MAX_DIFF_CHARS] > 0);
4453 if (!in_range)
4455 in_range = TRUE;
4456 from = i;
4459 else
4460 in_range = FALSE;
4463 if (range_right >= 0)
4465 update_table = (sljit_u8 *)allocate_read_only_data(common, 256);
4466 if (update_table == NULL)
4467 return TRUE;
4468 memset(update_table, IN_UCHARS(range_len), 256);
4470 for (i = 0; i < range_len; i++)
4472 char_set = chars + ((range_right - i) * MAX_DIFF_CHARS);
4473 SLJIT_ASSERT(char_set[0] > 0 && char_set[0] < 255);
4474 char_set_end = char_set + char_set[0];
4475 char_set++;
4476 while (char_set <= char_set_end)
4478 if (update_table[(*char_set) & 0xff] > IN_UCHARS(i))
4479 update_table[(*char_set) & 0xff] = IN_UCHARS(i);
4480 char_set++;
4485 offset = -1;
4486 /* Scan forward. */
4487 for (i = 0; i < max; i++)
4489 if (offset == -1)
4491 if (chars[i * MAX_DIFF_CHARS] <= 2)
4492 offset = i;
4494 else if (chars[offset * MAX_DIFF_CHARS] == 2 && chars[i * MAX_DIFF_CHARS] <= 2)
4496 if (chars[i * MAX_DIFF_CHARS] == 1)
4497 offset = i;
4498 else
4500 mask = chars[offset * MAX_DIFF_CHARS + 1] ^ chars[offset * MAX_DIFF_CHARS + 2];
4501 if (!is_powerof2(mask))
4503 mask = chars[i * MAX_DIFF_CHARS + 1] ^ chars[i * MAX_DIFF_CHARS + 2];
4504 if (is_powerof2(mask))
4505 offset = i;
4511 if (range_right < 0)
4513 if (offset < 0)
4514 return FALSE;
4515 SLJIT_ASSERT(chars[offset * MAX_DIFF_CHARS] >= 1 && chars[offset * MAX_DIFF_CHARS] <= 2);
4516 /* Works regardless the value is 1 or 2. */
4517 mask = chars[offset * MAX_DIFF_CHARS + chars[offset * MAX_DIFF_CHARS]];
4518 fast_forward_first_char2(common, chars[offset * MAX_DIFF_CHARS + 1], mask, offset);
4519 return TRUE;
4522 if (range_right == offset)
4523 offset = -1;
4525 SLJIT_ASSERT(offset == -1 || (chars[offset * MAX_DIFF_CHARS] >= 1 && chars[offset * MAX_DIFF_CHARS] <= 2));
4527 max -= 1;
4528 SLJIT_ASSERT(max > 0);
4529 if (common->match_end_ptr != 0)
4531 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4532 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
4533 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
4534 quit = CMP(SLJIT_LESS_EQUAL, STR_END, 0, TMP1, 0);
4535 OP1(SLJIT_MOV, STR_END, 0, TMP1, 0);
4536 JUMPHERE(quit);
4538 else
4539 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
4541 SLJIT_ASSERT(range_right >= 0);
4543 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
4544 OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);
4545 #endif
4547 start = LABEL();
4548 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4550 #if defined COMPILE_PCRE8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
4551 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));
4552 #else
4553 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);
4554 #endif
4556 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
4557 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0);
4558 #else
4559 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table);
4560 #endif
4561 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4562 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);
4564 if (offset >= 0)
4566 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offset));
4567 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4569 if (chars[offset * MAX_DIFF_CHARS] == 1)
4570 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 1], start);
4571 else
4573 mask = chars[offset * MAX_DIFF_CHARS + 1] ^ chars[offset * MAX_DIFF_CHARS + 2];
4574 if (is_powerof2(mask))
4576 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
4577 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 1] | mask, start);
4579 else
4581 match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 1]);
4582 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 2], start);
4583 JUMPHERE(match);
4588 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4589 if (common->utf && offset != 0)
4591 if (offset < 0)
4593 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4594 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4596 else
4597 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4598 #if defined COMPILE_PCRE8
4599 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4600 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, start);
4601 #elif defined COMPILE_PCRE16
4602 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4603 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00, start);
4604 #else
4605 #error "Unknown code width"
4606 #endif
4607 if (offset < 0)
4608 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4610 #endif
4612 if (offset >= 0)
4613 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4615 JUMPHERE(quit);
4617 if (common->match_end_ptr != 0)
4619 if (range_right >= 0)
4620 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4621 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4622 if (range_right >= 0)
4624 quit = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
4625 OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
4626 JUMPHERE(quit);
4629 else
4630 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
4631 return TRUE;
4634 #undef MAX_N_CHARS
4635 #undef MAX_DIFF_CHARS
4637 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless)
4639 pcre_uchar oc;
4641 oc = first_char;
4642 if (caseless)
4644 oc = TABLE_GET(first_char, common->fcc, first_char);
4645 #if defined SUPPORT_UCP && !defined COMPILE_PCRE8
4646 if (first_char > 127 && common->utf)
4647 oc = UCD_OTHERCASE(first_char);
4648 #endif
4651 fast_forward_first_char2(common, first_char, oc, 0);
4654 static SLJIT_INLINE void fast_forward_newline(compiler_common *common)
4656 DEFINE_COMPILER;
4657 struct sljit_label *loop;
4658 struct sljit_jump *lastchar;
4659 struct sljit_jump *firstchar;
4660 struct sljit_jump *quit;
4661 struct sljit_jump *foundcr = NULL;
4662 struct sljit_jump *notfoundnl;
4663 jump_list *newline = NULL;
4665 if (common->match_end_ptr != 0)
4667 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
4668 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4671 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4673 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4674 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4675 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
4676 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4677 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
4679 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
4680 OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
4681 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER_EQUAL);
4682 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4683 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
4684 #endif
4685 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4687 loop = LABEL();
4688 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4689 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4690 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4691 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4692 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
4693 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
4695 JUMPHERE(quit);
4696 JUMPHERE(firstchar);
4697 JUMPHERE(lastchar);
4699 if (common->match_end_ptr != 0)
4700 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4701 return;
4704 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4705 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
4706 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
4707 skip_char_back(common);
4709 loop = LABEL();
4710 common->ff_newline_shortcut = loop;
4712 read_char_range(common, common->nlmin, common->nlmax, TRUE);
4713 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4714 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
4715 foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4716 check_newlinechar(common, common->nltype, &newline, FALSE);
4717 set_jumps(newline, loop);
4719 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
4721 quit = JUMP(SLJIT_JUMP);
4722 JUMPHERE(foundcr);
4723 notfoundnl = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4724 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4725 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
4726 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
4727 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4728 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
4729 #endif
4730 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4731 JUMPHERE(notfoundnl);
4732 JUMPHERE(quit);
4734 JUMPHERE(lastchar);
4735 JUMPHERE(firstchar);
4737 if (common->match_end_ptr != 0)
4738 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4741 static BOOL check_class_ranges(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
4743 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, const sljit_u8 *start_bits)
4745 DEFINE_COMPILER;
4746 struct sljit_label *start;
4747 struct sljit_jump *quit;
4748 struct sljit_jump *found = NULL;
4749 jump_list *matches = NULL;
4750 #ifndef COMPILE_PCRE8
4751 struct sljit_jump *jump;
4752 #endif
4754 if (common->match_end_ptr != 0)
4756 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
4757 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4760 start = LABEL();
4761 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4762 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4763 #ifdef SUPPORT_UTF
4764 if (common->utf)
4765 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4766 #endif
4768 if (!check_class_ranges(common, start_bits, (start_bits[31] & 0x80) != 0, TRUE, &matches))
4770 #ifndef COMPILE_PCRE8
4771 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 255);
4772 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
4773 JUMPHERE(jump);
4774 #endif
4775 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4776 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4777 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
4778 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4779 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4780 found = JUMP(SLJIT_NOT_ZERO);
4783 #ifdef SUPPORT_UTF
4784 if (common->utf)
4785 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4786 #endif
4787 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4788 #ifdef SUPPORT_UTF
4789 #if defined COMPILE_PCRE8
4790 if (common->utf)
4792 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
4793 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4794 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4796 #elif defined COMPILE_PCRE16
4797 if (common->utf)
4799 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
4800 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4801 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4802 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
4803 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4804 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4806 #endif /* COMPILE_PCRE[8|16] */
4807 #endif /* SUPPORT_UTF */
4808 JUMPTO(SLJIT_JUMP, start);
4809 if (found != NULL)
4810 JUMPHERE(found);
4811 if (matches != NULL)
4812 set_jumps(matches, LABEL());
4813 JUMPHERE(quit);
4815 if (common->match_end_ptr != 0)
4816 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
4819 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
4821 DEFINE_COMPILER;
4822 struct sljit_label *loop;
4823 struct sljit_jump *toolong;
4824 struct sljit_jump *alreadyfound;
4825 struct sljit_jump *found;
4826 struct sljit_jump *foundoc = NULL;
4827 struct sljit_jump *notfound;
4828 sljit_u32 oc, bit;
4830 SLJIT_ASSERT(common->req_char_ptr != 0);
4831 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr);
4832 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
4833 toolong = CMP(SLJIT_LESS, TMP1, 0, STR_END, 0);
4834 alreadyfound = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4836 if (has_firstchar)
4837 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4838 else
4839 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
4841 loop = LABEL();
4842 notfound = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0);
4844 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
4845 oc = req_char;
4846 if (caseless)
4848 oc = TABLE_GET(req_char, common->fcc, req_char);
4849 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
4850 if (req_char > 127 && common->utf)
4851 oc = UCD_OTHERCASE(req_char);
4852 #endif
4854 if (req_char == oc)
4855 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
4856 else
4858 bit = req_char ^ oc;
4859 if (is_powerof2(bit))
4861 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
4862 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
4864 else
4866 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
4867 foundoc = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, oc);
4870 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4871 JUMPTO(SLJIT_JUMP, loop);
4873 JUMPHERE(found);
4874 if (foundoc)
4875 JUMPHERE(foundoc);
4876 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, TMP1, 0);
4877 JUMPHERE(alreadyfound);
4878 JUMPHERE(toolong);
4879 return notfound;
4882 static void do_revertframes(compiler_common *common)
4884 DEFINE_COMPILER;
4885 struct sljit_jump *jump;
4886 struct sljit_label *mainloop;
4888 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4889 OP1(SLJIT_MOV, TMP3, 0, STACK_TOP, 0);
4890 GET_LOCAL_BASE(TMP1, 0, 0);
4892 /* Drop frames until we reach STACK_TOP. */
4893 mainloop = LABEL();
4894 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), -sizeof(sljit_sw));
4895 jump = CMP(SLJIT_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, 0);
4897 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
4898 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -2 * sizeof(sljit_sw));
4899 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(STACK_TOP), -3 * sizeof(sljit_sw));
4900 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
4901 JUMPTO(SLJIT_JUMP, mainloop);
4903 JUMPHERE(jump);
4904 jump = CMP(SLJIT_NOT_ZERO /* SIG_LESS */, TMP2, 0, SLJIT_IMM, 0);
4905 /* End of reverting values. */
4906 OP1(SLJIT_MOV, STACK_TOP, 0, TMP3, 0);
4907 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4909 JUMPHERE(jump);
4910 OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
4911 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
4912 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -2 * sizeof(sljit_sw));
4913 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
4914 JUMPTO(SLJIT_JUMP, mainloop);
4917 static void check_wordboundary(compiler_common *common)
4919 DEFINE_COMPILER;
4920 struct sljit_jump *skipread;
4921 jump_list *skipread_list = NULL;
4922 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
4923 struct sljit_jump *jump;
4924 #endif
4926 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
4928 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4929 /* Get type of the previous char, and put it to LOCALS1. */
4930 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4931 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4932 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, SLJIT_IMM, 0);
4933 skipread = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
4934 skip_char_back(common);
4935 check_start_used_ptr(common);
4936 read_char(common);
4938 /* Testing char type. */
4939 #ifdef SUPPORT_UCP
4940 if (common->use_ucp)
4942 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
4943 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
4944 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4945 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
4946 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4947 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
4948 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
4949 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4950 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
4951 JUMPHERE(jump);
4952 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP2, 0);
4954 else
4955 #endif
4957 #ifndef COMPILE_PCRE8
4958 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4959 #elif defined SUPPORT_UTF
4960 /* Here LOCALS1 has already been zeroed. */
4961 jump = NULL;
4962 if (common->utf)
4963 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4964 #endif /* COMPILE_PCRE8 */
4965 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
4966 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
4967 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4968 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
4969 #ifndef COMPILE_PCRE8
4970 JUMPHERE(jump);
4971 #elif defined SUPPORT_UTF
4972 if (jump != NULL)
4973 JUMPHERE(jump);
4974 #endif /* COMPILE_PCRE8 */
4976 JUMPHERE(skipread);
4978 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4979 check_str_end(common, &skipread_list);
4980 peek_char(common, READ_CHAR_MAX);
4982 /* Testing char type. This is a code duplication. */
4983 #ifdef SUPPORT_UCP
4984 if (common->use_ucp)
4986 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
4987 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
4988 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4989 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
4990 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4991 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
4992 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
4993 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4994 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
4995 JUMPHERE(jump);
4997 else
4998 #endif
5000 #ifndef COMPILE_PCRE8
5001 /* TMP2 may be destroyed by peek_char. */
5002 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
5003 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
5004 #elif defined SUPPORT_UTF
5005 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
5006 jump = NULL;
5007 if (common->utf)
5008 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
5009 #endif
5010 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
5011 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
5012 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5013 #ifndef COMPILE_PCRE8
5014 JUMPHERE(jump);
5015 #elif defined SUPPORT_UTF
5016 if (jump != NULL)
5017 JUMPHERE(jump);
5018 #endif /* COMPILE_PCRE8 */
5020 set_jumps(skipread_list, LABEL());
5022 OP2(SLJIT_XOR | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
5023 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
5026 static BOOL check_class_ranges(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
5028 /* May destroy TMP1. */
5029 DEFINE_COMPILER;
5030 int ranges[MAX_RANGE_SIZE];
5031 sljit_u8 bit, cbit, all;
5032 int i, byte, length = 0;
5034 bit = bits[0] & 0x1;
5035 /* All bits will be zero or one (since bit is zero or one). */
5036 all = -bit;
5038 for (i = 0; i < 256; )
5040 byte = i >> 3;
5041 if ((i & 0x7) == 0 && bits[byte] == all)
5042 i += 8;
5043 else
5045 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
5046 if (cbit != bit)
5048 if (length >= MAX_RANGE_SIZE)
5049 return FALSE;
5050 ranges[length] = i;
5051 length++;
5052 bit = cbit;
5053 all = -cbit;
5055 i++;
5059 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
5061 if (length >= MAX_RANGE_SIZE)
5062 return FALSE;
5063 ranges[length] = 256;
5064 length++;
5067 if (length < 0 || length > 4)
5068 return FALSE;
5070 bit = bits[0] & 0x1;
5071 if (invert) bit ^= 0x1;
5073 /* No character is accepted. */
5074 if (length == 0 && bit == 0)
5075 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5077 switch(length)
5079 case 0:
5080 /* When bit != 0, all characters are accepted. */
5081 return TRUE;
5083 case 1:
5084 add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
5085 return TRUE;
5087 case 2:
5088 if (ranges[0] + 1 != ranges[1])
5090 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
5091 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
5093 else
5094 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
5095 return TRUE;
5097 case 3:
5098 if (bit != 0)
5100 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
5101 if (ranges[0] + 1 != ranges[1])
5103 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
5104 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
5106 else
5107 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
5108 return TRUE;
5111 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
5112 if (ranges[1] + 1 != ranges[2])
5114 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
5115 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
5117 else
5118 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
5119 return TRUE;
5121 case 4:
5122 if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
5123 && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
5124 && (ranges[1] & (ranges[2] - ranges[0])) == 0
5125 && is_powerof2(ranges[2] - ranges[0]))
5127 SLJIT_ASSERT((ranges[0] & (ranges[2] - ranges[0])) == 0 && (ranges[2] & ranges[3] & (ranges[2] - ranges[0])) != 0);
5128 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
5129 if (ranges[2] + 1 != ranges[3])
5131 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
5132 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
5134 else
5135 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
5136 return TRUE;
5139 if (bit != 0)
5141 i = 0;
5142 if (ranges[0] + 1 != ranges[1])
5144 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
5145 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
5146 i = ranges[0];
5148 else
5149 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
5151 if (ranges[2] + 1 != ranges[3])
5153 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
5154 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
5156 else
5157 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
5158 return TRUE;
5161 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
5162 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
5163 if (ranges[1] + 1 != ranges[2])
5165 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
5166 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
5168 else
5169 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
5170 return TRUE;
5172 default:
5173 SLJIT_UNREACHABLE();
5174 return FALSE;
5178 static void check_anynewline(compiler_common *common)
5180 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
5181 DEFINE_COMPILER;
5183 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5185 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
5186 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
5187 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
5188 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
5189 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5190 #ifdef COMPILE_PCRE8
5191 if (common->utf)
5193 #endif
5194 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5195 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
5196 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
5197 #ifdef COMPILE_PCRE8
5199 #endif
5200 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
5201 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
5202 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
5205 static void check_hspace(compiler_common *common)
5207 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
5208 DEFINE_COMPILER;
5210 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5212 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
5213 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
5214 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
5215 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5216 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
5217 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5218 #ifdef COMPILE_PCRE8
5219 if (common->utf)
5221 #endif
5222 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5223 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
5224 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5225 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
5226 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5227 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
5228 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
5229 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
5230 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
5231 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5232 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
5233 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5234 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
5235 #ifdef COMPILE_PCRE8
5237 #endif
5238 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
5239 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
5241 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
5244 static void check_vspace(compiler_common *common)
5246 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
5247 DEFINE_COMPILER;
5249 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5251 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
5252 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
5253 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
5254 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
5255 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5256 #ifdef COMPILE_PCRE8
5257 if (common->utf)
5259 #endif
5260 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5261 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
5262 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
5263 #ifdef COMPILE_PCRE8
5265 #endif
5266 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
5267 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
5269 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
5272 static void do_casefulcmp(compiler_common *common)
5274 DEFINE_COMPILER;
5275 struct sljit_jump *jump;
5276 struct sljit_label *label;
5277 int char1_reg;
5278 int char2_reg;
5280 if (sljit_get_register_index(TMP3) < 0)
5282 char1_reg = STR_END;
5283 char2_reg = STACK_TOP;
5285 else
5287 char1_reg = TMP3;
5288 char2_reg = RETURN_ADDR;
5291 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
5292 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5294 if (char1_reg == STR_END)
5296 OP1(SLJIT_MOV, TMP3, 0, char1_reg, 0);
5297 OP1(SLJIT_MOV, RETURN_ADDR, 0, char2_reg, 0);
5300 if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
5302 label = LABEL();
5303 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
5304 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5305 jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
5306 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
5307 JUMPTO(SLJIT_NOT_ZERO, label);
5309 JUMPHERE(jump);
5310 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
5312 else if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
5314 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
5315 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5317 label = LABEL();
5318 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
5319 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5320 jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
5321 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
5322 JUMPTO(SLJIT_NOT_ZERO, label);
5324 JUMPHERE(jump);
5325 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
5326 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5328 else
5330 label = LABEL();
5331 OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0);
5332 OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0);
5333 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
5334 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5335 jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
5336 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
5337 JUMPTO(SLJIT_NOT_ZERO, label);
5339 JUMPHERE(jump);
5340 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
5343 if (char1_reg == STR_END)
5345 OP1(SLJIT_MOV, char1_reg, 0, TMP3, 0);
5346 OP1(SLJIT_MOV, char2_reg, 0, RETURN_ADDR, 0);
5349 sljit_emit_fast_return(compiler, TMP1, 0);
5352 static void do_caselesscmp(compiler_common *common)
5354 DEFINE_COMPILER;
5355 struct sljit_jump *jump;
5356 struct sljit_label *label;
5357 int char1_reg = STR_END;
5358 int char2_reg;
5359 int lcc_table;
5360 int opt_type = 0;
5362 if (sljit_get_register_index(TMP3) < 0)
5364 char2_reg = STACK_TOP;
5365 lcc_table = STACK_LIMIT;
5367 else
5369 char2_reg = RETURN_ADDR;
5370 lcc_table = TMP3;
5373 if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
5374 opt_type = 1;
5375 else if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
5376 opt_type = 2;
5378 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
5379 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5381 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, char1_reg, 0);
5383 if (char2_reg == STACK_TOP)
5385 OP1(SLJIT_MOV, TMP3, 0, char2_reg, 0);
5386 OP1(SLJIT_MOV, RETURN_ADDR, 0, lcc_table, 0);
5389 OP1(SLJIT_MOV, lcc_table, 0, SLJIT_IMM, common->lcc);
5391 if (opt_type == 1)
5393 label = LABEL();
5394 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
5395 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5397 else if (opt_type == 2)
5399 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
5400 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5402 label = LABEL();
5403 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
5404 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5406 else
5408 label = LABEL();
5409 OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0);
5410 OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0);
5411 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
5414 #ifndef COMPILE_PCRE8
5415 jump = CMP(SLJIT_GREATER, char1_reg, 0, SLJIT_IMM, 255);
5416 #endif
5417 OP1(SLJIT_MOV_U8, char1_reg, 0, SLJIT_MEM2(lcc_table, char1_reg), 0);
5418 #ifndef COMPILE_PCRE8
5419 JUMPHERE(jump);
5420 jump = CMP(SLJIT_GREATER, char2_reg, 0, SLJIT_IMM, 255);
5421 #endif
5422 OP1(SLJIT_MOV_U8, char2_reg, 0, SLJIT_MEM2(lcc_table, char2_reg), 0);
5423 #ifndef COMPILE_PCRE8
5424 JUMPHERE(jump);
5425 #endif
5427 if (opt_type == 0)
5428 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5430 jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
5431 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
5432 JUMPTO(SLJIT_NOT_ZERO, label);
5434 JUMPHERE(jump);
5435 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
5437 if (opt_type == 2)
5438 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5440 if (char2_reg == STACK_TOP)
5442 OP1(SLJIT_MOV, char2_reg, 0, TMP3, 0);
5443 OP1(SLJIT_MOV, lcc_table, 0, RETURN_ADDR, 0);
5446 OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
5447 sljit_emit_fast_return(compiler, TMP1, 0);
5450 #if defined SUPPORT_UTF && defined SUPPORT_UCP
5452 static const pcre_uchar * SLJIT_FUNC do_utf_caselesscmp(pcre_uchar *src1, pcre_uchar *src2, pcre_uchar *end1, pcre_uchar *end2)
5454 /* This function would be ineffective to do in JIT level. */
5455 sljit_u32 c1, c2;
5456 const ucd_record *ur;
5457 const sljit_u32 *pp;
5459 while (src1 < end1)
5461 if (src2 >= end2)
5462 return (pcre_uchar*)1;
5463 GETCHARINC(c1, src1);
5464 GETCHARINC(c2, src2);
5465 ur = GET_UCD(c2);
5466 if (c1 != c2 && c1 != c2 + ur->other_case)
5468 pp = PRIV(ucd_caseless_sets) + ur->caseset;
5469 for (;;)
5471 if (c1 < *pp) return NULL;
5472 if (c1 == *pp++) break;
5476 return src2;
5479 #endif /* SUPPORT_UTF && SUPPORT_UCP */
5481 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
5482 compare_context *context, jump_list **backtracks)
5484 DEFINE_COMPILER;
5485 unsigned int othercasebit = 0;
5486 pcre_uchar *othercasechar = NULL;
5487 #ifdef SUPPORT_UTF
5488 int utflength;
5489 #endif
5491 if (caseless && char_has_othercase(common, cc))
5493 othercasebit = char_get_othercase_bit(common, cc);
5494 SLJIT_ASSERT(othercasebit);
5495 /* Extracting bit difference info. */
5496 #if defined COMPILE_PCRE8
5497 othercasechar = cc + (othercasebit >> 8);
5498 othercasebit &= 0xff;
5499 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5500 /* Note that this code only handles characters in the BMP. If there
5501 ever are characters outside the BMP whose othercase differs in only one
5502 bit from itself (there currently are none), this code will need to be
5503 revised for COMPILE_PCRE32. */
5504 othercasechar = cc + (othercasebit >> 9);
5505 if ((othercasebit & 0x100) != 0)
5506 othercasebit = (othercasebit & 0xff) << 8;
5507 else
5508 othercasebit &= 0xff;
5509 #endif /* COMPILE_PCRE[8|16|32] */
5512 if (context->sourcereg == -1)
5514 #if defined COMPILE_PCRE8
5515 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5516 if (context->length >= 4)
5517 OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5518 else if (context->length >= 2)
5519 OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5520 else
5521 #endif
5522 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5523 #elif defined COMPILE_PCRE16
5524 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5525 if (context->length >= 4)
5526 OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5527 else
5528 #endif
5529 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5530 #elif defined COMPILE_PCRE32
5531 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5532 #endif /* COMPILE_PCRE[8|16|32] */
5533 context->sourcereg = TMP2;
5536 #ifdef SUPPORT_UTF
5537 utflength = 1;
5538 if (common->utf && HAS_EXTRALEN(*cc))
5539 utflength += GET_EXTRALEN(*cc);
5543 #endif
5545 context->length -= IN_UCHARS(1);
5546 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
5548 /* Unaligned read is supported. */
5549 if (othercasebit != 0 && othercasechar == cc)
5551 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
5552 context->oc.asuchars[context->ucharptr] = othercasebit;
5554 else
5556 context->c.asuchars[context->ucharptr] = *cc;
5557 context->oc.asuchars[context->ucharptr] = 0;
5559 context->ucharptr++;
5561 #if defined COMPILE_PCRE8
5562 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
5563 #else
5564 if (context->ucharptr >= 2 || context->length == 0)
5565 #endif
5567 if (context->length >= 4)
5568 OP1(SLJIT_MOV_S32, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
5569 else if (context->length >= 2)
5570 OP1(SLJIT_MOV_U16, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
5571 #if defined COMPILE_PCRE8
5572 else if (context->length >= 1)
5573 OP1(SLJIT_MOV_U8, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
5574 #endif /* COMPILE_PCRE8 */
5575 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
5577 switch(context->ucharptr)
5579 case 4 / sizeof(pcre_uchar):
5580 if (context->oc.asint != 0)
5581 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
5582 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
5583 break;
5585 case 2 / sizeof(pcre_uchar):
5586 if (context->oc.asushort != 0)
5587 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
5588 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
5589 break;
5591 #ifdef COMPILE_PCRE8
5592 case 1:
5593 if (context->oc.asbyte != 0)
5594 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
5595 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
5596 break;
5597 #endif
5599 default:
5600 SLJIT_UNREACHABLE();
5601 break;
5603 context->ucharptr = 0;
5606 #else
5608 /* Unaligned read is unsupported or in 32 bit mode. */
5609 if (context->length >= 1)
5610 OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
5612 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
5614 if (othercasebit != 0 && othercasechar == cc)
5616 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
5617 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
5619 else
5620 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
5622 #endif
5624 cc++;
5625 #ifdef SUPPORT_UTF
5626 utflength--;
5628 while (utflength > 0);
5629 #endif
5631 return cc;
5634 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
5636 #define SET_TYPE_OFFSET(value) \
5637 if ((value) != typeoffset) \
5639 if ((value) < typeoffset) \
5640 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
5641 else \
5642 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
5644 typeoffset = (value);
5646 #define SET_CHAR_OFFSET(value) \
5647 if ((value) != charoffset) \
5649 if ((value) < charoffset) \
5650 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
5651 else \
5652 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
5654 charoffset = (value);
5656 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks, BOOL check_str_ptr);
5658 static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
5660 DEFINE_COMPILER;
5661 jump_list *found = NULL;
5662 jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
5663 sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX;
5664 struct sljit_jump *jump = NULL;
5665 pcre_uchar *ccbegin;
5666 int compares, invertcmp, numberofcmps;
5667 #if defined SUPPORT_UTF && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
5668 BOOL utf = common->utf;
5669 #endif
5671 #ifdef SUPPORT_UCP
5672 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
5673 BOOL charsaved = FALSE;
5674 int typereg = TMP1;
5675 const sljit_u32 *other_cases;
5676 sljit_uw typeoffset;
5677 #endif
5679 /* Scanning the necessary info. */
5680 cc++;
5681 ccbegin = cc;
5682 compares = 0;
5683 if (cc[-1] & XCL_MAP)
5685 min = 0;
5686 cc += 32 / sizeof(pcre_uchar);
5689 while (*cc != XCL_END)
5691 compares++;
5692 if (*cc == XCL_SINGLE)
5694 cc ++;
5695 GETCHARINCTEST(c, cc);
5696 if (c > max) max = c;
5697 if (c < min) min = c;
5698 #ifdef SUPPORT_UCP
5699 needschar = TRUE;
5700 #endif
5702 else if (*cc == XCL_RANGE)
5704 cc ++;
5705 GETCHARINCTEST(c, cc);
5706 if (c < min) min = c;
5707 GETCHARINCTEST(c, cc);
5708 if (c > max) max = c;
5709 #ifdef SUPPORT_UCP
5710 needschar = TRUE;
5711 #endif
5713 #ifdef SUPPORT_UCP
5714 else
5716 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
5717 cc++;
5718 if (*cc == PT_CLIST)
5720 other_cases = PRIV(ucd_caseless_sets) + cc[1];
5721 while (*other_cases != NOTACHAR)
5723 if (*other_cases > max) max = *other_cases;
5724 if (*other_cases < min) min = *other_cases;
5725 other_cases++;
5728 else
5730 max = READ_CHAR_MAX;
5731 min = 0;
5734 switch(*cc)
5736 case PT_ANY:
5737 /* Any either accepts everything or ignored. */
5738 if (cc[-1] == XCL_PROP)
5740 compile_char1_matchingpath(common, OP_ALLANY, cc, backtracks, FALSE);
5741 if (list == backtracks)
5742 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5743 return;
5745 break;
5747 case PT_LAMP:
5748 case PT_GC:
5749 case PT_PC:
5750 case PT_ALNUM:
5751 needstype = TRUE;
5752 break;
5754 case PT_SC:
5755 needsscript = TRUE;
5756 break;
5758 case PT_SPACE:
5759 case PT_PXSPACE:
5760 case PT_WORD:
5761 case PT_PXGRAPH:
5762 case PT_PXPRINT:
5763 case PT_PXPUNCT:
5764 needstype = TRUE;
5765 needschar = TRUE;
5766 break;
5768 case PT_CLIST:
5769 case PT_UCNC:
5770 needschar = TRUE;
5771 break;
5773 default:
5774 SLJIT_UNREACHABLE();
5775 break;
5777 cc += 2;
5779 #endif
5781 SLJIT_ASSERT(compares > 0);
5783 /* We are not necessary in utf mode even in 8 bit mode. */
5784 cc = ccbegin;
5785 read_char_range(common, min, max, (cc[-1] & XCL_NOT) != 0);
5787 if ((cc[-1] & XCL_HASPROP) == 0)
5789 if ((cc[-1] & XCL_MAP) != 0)
5791 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
5792 if (!check_class_ranges(common, (const sljit_u8 *)cc, (((const sljit_u8 *)cc)[31] & 0x80) != 0, TRUE, &found))
5794 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
5795 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
5796 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
5797 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5798 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5799 add_jump(compiler, &found, JUMP(SLJIT_NOT_ZERO));
5802 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5803 JUMPHERE(jump);
5805 cc += 32 / sizeof(pcre_uchar);
5807 else
5809 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min);
5810 add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, max - min));
5813 else if ((cc[-1] & XCL_MAP) != 0)
5815 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
5816 #ifdef SUPPORT_UCP
5817 charsaved = TRUE;
5818 #endif
5819 if (!check_class_ranges(common, (const sljit_u8 *)cc, FALSE, TRUE, list))
5821 #ifdef COMPILE_PCRE8
5822 jump = NULL;
5823 if (common->utf)
5824 #endif
5825 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
5827 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
5828 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
5829 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
5830 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5831 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5832 add_jump(compiler, list, JUMP(SLJIT_NOT_ZERO));
5834 #ifdef COMPILE_PCRE8
5835 if (common->utf)
5836 #endif
5837 JUMPHERE(jump);
5840 OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
5841 cc += 32 / sizeof(pcre_uchar);
5844 #ifdef SUPPORT_UCP
5845 if (needstype || needsscript)
5847 if (needschar && !charsaved)
5848 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
5850 #ifdef COMPILE_PCRE32
5851 if (!common->utf)
5853 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10ffff + 1);
5854 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5855 JUMPHERE(jump);
5857 #endif
5859 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5860 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
5861 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
5862 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5863 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5864 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
5865 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
5867 /* Before anything else, we deal with scripts. */
5868 if (needsscript)
5870 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
5871 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5873 ccbegin = cc;
5875 while (*cc != XCL_END)
5877 if (*cc == XCL_SINGLE)
5879 cc ++;
5880 GETCHARINCTEST(c, cc);
5882 else if (*cc == XCL_RANGE)
5884 cc ++;
5885 GETCHARINCTEST(c, cc);
5886 GETCHARINCTEST(c, cc);
5888 else
5890 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
5891 cc++;
5892 if (*cc == PT_SC)
5894 compares--;
5895 invertcmp = (compares == 0 && list != backtracks);
5896 if (cc[-1] == XCL_NOTPROP)
5897 invertcmp ^= 0x1;
5898 jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1]);
5899 add_jump(compiler, compares > 0 ? list : backtracks, jump);
5901 cc += 2;
5905 cc = ccbegin;
5908 if (needschar)
5910 OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
5913 if (needstype)
5915 if (!needschar)
5917 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
5918 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5920 else
5922 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
5923 OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
5924 typereg = RETURN_ADDR;
5928 #endif
5930 /* Generating code. */
5931 charoffset = 0;
5932 numberofcmps = 0;
5933 #ifdef SUPPORT_UCP
5934 typeoffset = 0;
5935 #endif
5937 while (*cc != XCL_END)
5939 compares--;
5940 invertcmp = (compares == 0 && list != backtracks);
5941 jump = NULL;
5943 if (*cc == XCL_SINGLE)
5945 cc ++;
5946 GETCHARINCTEST(c, cc);
5948 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
5950 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5951 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5952 numberofcmps++;
5954 else if (numberofcmps > 0)
5956 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5957 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
5958 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5959 numberofcmps = 0;
5961 else
5963 jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5964 numberofcmps = 0;
5967 else if (*cc == XCL_RANGE)
5969 cc ++;
5970 GETCHARINCTEST(c, cc);
5971 SET_CHAR_OFFSET(c);
5972 GETCHARINCTEST(c, cc);
5974 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
5976 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5977 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
5978 numberofcmps++;
5980 else if (numberofcmps > 0)
5982 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5983 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
5984 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5985 numberofcmps = 0;
5987 else
5989 jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5990 numberofcmps = 0;
5993 #ifdef SUPPORT_UCP
5994 else
5996 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
5997 if (*cc == XCL_NOTPROP)
5998 invertcmp ^= 0x1;
5999 cc++;
6000 switch(*cc)
6002 case PT_ANY:
6003 if (!invertcmp)
6004 jump = JUMP(SLJIT_JUMP);
6005 break;
6007 case PT_LAMP:
6008 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
6009 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
6010 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
6011 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6012 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
6013 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
6014 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
6015 break;
6017 case PT_GC:
6018 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
6019 SET_TYPE_OFFSET(c);
6020 jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
6021 break;
6023 case PT_PC:
6024 jump = CMP(SLJIT_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
6025 break;
6027 case PT_SC:
6028 compares++;
6029 /* Do nothing. */
6030 break;
6032 case PT_SPACE:
6033 case PT_PXSPACE:
6034 SET_CHAR_OFFSET(9);
6035 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
6036 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
6038 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
6039 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6041 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
6042 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6044 SET_TYPE_OFFSET(ucp_Zl);
6045 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
6046 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
6047 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
6048 break;
6050 case PT_WORD:
6051 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset));
6052 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
6053 /* Fall through. */
6055 case PT_ALNUM:
6056 SET_TYPE_OFFSET(ucp_Ll);
6057 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
6058 OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
6059 SET_TYPE_OFFSET(ucp_Nd);
6060 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
6061 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
6062 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
6063 break;
6065 case PT_CLIST:
6066 other_cases = PRIV(ucd_caseless_sets) + cc[1];
6068 /* At least three characters are required.
6069 Otherwise this case would be handled by the normal code path. */
6070 SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
6071 SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
6073 /* Optimizing character pairs, if their difference is power of 2. */
6074 if (is_powerof2(other_cases[1] ^ other_cases[0]))
6076 if (charoffset == 0)
6077 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
6078 else
6080 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
6081 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
6083 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
6084 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
6085 other_cases += 2;
6087 else if (is_powerof2(other_cases[2] ^ other_cases[1]))
6089 if (charoffset == 0)
6090 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
6091 else
6093 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
6094 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
6096 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
6097 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
6099 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
6100 OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
6102 other_cases += 3;
6104 else
6106 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
6107 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
6110 while (*other_cases != NOTACHAR)
6112 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
6113 OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
6115 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
6116 break;
6118 case PT_UCNC:
6119 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
6120 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
6121 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
6122 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6123 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
6124 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6126 SET_CHAR_OFFSET(0xa0);
6127 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
6128 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
6129 SET_CHAR_OFFSET(0);
6130 OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
6131 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_GREATER_EQUAL);
6132 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
6133 break;
6135 case PT_PXGRAPH:
6136 /* C and Z groups are the farthest two groups. */
6137 SET_TYPE_OFFSET(ucp_Ll);
6138 OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
6139 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER);
6141 jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
6143 /* In case of ucp_Cf, we overwrite the result. */
6144 SET_CHAR_OFFSET(0x2066);
6145 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
6146 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
6148 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
6149 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6151 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
6152 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6154 JUMPHERE(jump);
6155 jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
6156 break;
6158 case PT_PXPRINT:
6159 /* C and Z groups are the farthest two groups. */
6160 SET_TYPE_OFFSET(ucp_Ll);
6161 OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
6162 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER);
6164 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
6165 OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_NOT_EQUAL);
6167 jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
6169 /* In case of ucp_Cf, we overwrite the result. */
6170 SET_CHAR_OFFSET(0x2066);
6171 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
6172 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
6174 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
6175 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6177 JUMPHERE(jump);
6178 jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
6179 break;
6181 case PT_PXPUNCT:
6182 SET_TYPE_OFFSET(ucp_Sc);
6183 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
6184 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
6186 SET_CHAR_OFFSET(0);
6187 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x7f);
6188 OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_LESS_EQUAL);
6190 SET_TYPE_OFFSET(ucp_Pc);
6191 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
6192 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
6193 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
6194 break;
6196 default:
6197 SLJIT_UNREACHABLE();
6198 break;
6200 cc += 2;
6202 #endif
6204 if (jump != NULL)
6205 add_jump(compiler, compares > 0 ? list : backtracks, jump);
6208 if (found != NULL)
6209 set_jumps(found, LABEL());
6212 #undef SET_TYPE_OFFSET
6213 #undef SET_CHAR_OFFSET
6215 #endif
6217 static pcre_uchar *compile_simple_assertion_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
6219 DEFINE_COMPILER;
6220 int length;
6221 struct sljit_jump *jump[4];
6222 #ifdef SUPPORT_UTF
6223 struct sljit_label *label;
6224 #endif /* SUPPORT_UTF */
6226 switch(type)
6228 case OP_SOD:
6229 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6230 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6231 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
6232 return cc;
6234 case OP_SOM:
6235 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6236 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6237 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
6238 return cc;
6240 case OP_NOT_WORD_BOUNDARY:
6241 case OP_WORD_BOUNDARY:
6242 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
6243 sljit_set_current_flags(compiler, SLJIT_SET_Z);
6244 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_NOT_ZERO : SLJIT_ZERO));
6245 return cc;
6247 case OP_EODN:
6248 /* Requires rather complex checks. */
6249 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6250 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6252 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
6253 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
6254 if (common->mode == JIT_COMPILE)
6255 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
6256 else
6258 jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0);
6259 OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
6260 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS);
6261 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
6262 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_EQUAL);
6263 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL));
6264 check_partial(common, TRUE);
6265 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6266 JUMPHERE(jump[1]);
6268 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
6269 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
6270 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
6272 else if (common->nltype == NLTYPE_FIXED)
6274 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6275 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
6276 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
6277 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
6279 else
6281 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
6282 jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
6283 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
6284 OP2(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
6285 jump[2] = JUMP(SLJIT_GREATER);
6286 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL) /* LESS */);
6287 /* Equal. */
6288 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
6289 jump[3] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
6290 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6292 JUMPHERE(jump[1]);
6293 if (common->nltype == NLTYPE_ANYCRLF)
6295 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6296 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, STR_END, 0));
6297 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
6299 else
6301 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0);
6302 read_char_range(common, common->nlmin, common->nlmax, TRUE);
6303 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
6304 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
6305 sljit_set_current_flags(compiler, SLJIT_SET_Z);
6306 add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
6307 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
6309 JUMPHERE(jump[2]);
6310 JUMPHERE(jump[3]);
6312 JUMPHERE(jump[0]);
6313 check_partial(common, FALSE);
6314 return cc;
6316 case OP_EOD:
6317 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
6318 check_partial(common, FALSE);
6319 return cc;
6321 case OP_DOLL:
6322 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
6323 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
6324 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6326 if (!common->endonly)
6327 compile_simple_assertion_matchingpath(common, OP_EODN, cc, backtracks);
6328 else
6330 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
6331 check_partial(common, FALSE);
6333 return cc;
6335 case OP_DOLLM:
6336 jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
6337 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
6338 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
6339 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6340 check_partial(common, FALSE);
6341 jump[0] = JUMP(SLJIT_JUMP);
6342 JUMPHERE(jump[1]);
6344 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6346 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
6347 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
6348 if (common->mode == JIT_COMPILE)
6349 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0));
6350 else
6352 jump[1] = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
6353 /* STR_PTR = STR_END - IN_UCHARS(1) */
6354 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
6355 check_partial(common, TRUE);
6356 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6357 JUMPHERE(jump[1]);
6360 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
6361 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
6362 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
6364 else
6366 peek_char(common, common->nlmax);
6367 check_newlinechar(common, common->nltype, backtracks, FALSE);
6369 JUMPHERE(jump[0]);
6370 return cc;
6372 case OP_CIRC:
6373 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
6374 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
6375 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
6376 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
6377 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6378 return cc;
6380 case OP_CIRCM:
6381 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
6382 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
6383 jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0);
6384 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
6385 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6386 jump[0] = JUMP(SLJIT_JUMP);
6387 JUMPHERE(jump[1]);
6389 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
6390 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6392 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
6393 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, TMP1, 0));
6394 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
6395 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
6396 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
6397 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
6399 else
6401 skip_char_back(common);
6402 read_char_range(common, common->nlmin, common->nlmax, TRUE);
6403 check_newlinechar(common, common->nltype, backtracks, FALSE);
6405 JUMPHERE(jump[0]);
6406 return cc;
6408 case OP_REVERSE:
6409 length = GET(cc, 0);
6410 if (length == 0)
6411 return cc + LINK_SIZE;
6412 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6413 #ifdef SUPPORT_UTF
6414 if (common->utf)
6416 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6417 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
6418 label = LABEL();
6419 add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
6420 skip_char_back(common);
6421 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
6422 JUMPTO(SLJIT_NOT_ZERO, label);
6424 else
6425 #endif
6427 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6428 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
6429 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0));
6431 check_start_used_ptr(common);
6432 return cc + LINK_SIZE;
6434 SLJIT_UNREACHABLE();
6435 return cc;
6438 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks, BOOL check_str_ptr)
6440 DEFINE_COMPILER;
6441 int length;
6442 unsigned int c, oc, bit;
6443 compare_context context;
6444 struct sljit_jump *jump[3];
6445 jump_list *end_list;
6446 #ifdef SUPPORT_UTF
6447 struct sljit_label *label;
6448 #ifdef SUPPORT_UCP
6449 pcre_uchar propdata[5];
6450 #endif
6451 #endif /* SUPPORT_UTF */
6453 switch(type)
6455 case OP_NOT_DIGIT:
6456 case OP_DIGIT:
6457 /* Digits are usually 0-9, so it is worth to optimize them. */
6458 if (check_str_ptr)
6459 detect_partial_match(common, backtracks);
6460 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
6461 if (common->utf && is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
6462 read_char7_type(common, type == OP_NOT_DIGIT);
6463 else
6464 #endif
6465 read_char8_type(common, type == OP_NOT_DIGIT);
6466 /* Flip the starting bit in the negative case. */
6467 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
6468 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_ZERO : SLJIT_NOT_ZERO));
6469 return cc;
6471 case OP_NOT_WHITESPACE:
6472 case OP_WHITESPACE:
6473 if (check_str_ptr)
6474 detect_partial_match(common, backtracks);
6475 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
6476 if (common->utf && is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_space, FALSE))
6477 read_char7_type(common, type == OP_NOT_WHITESPACE);
6478 else
6479 #endif
6480 read_char8_type(common, type == OP_NOT_WHITESPACE);
6481 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
6482 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_ZERO : SLJIT_NOT_ZERO));
6483 return cc;
6485 case OP_NOT_WORDCHAR:
6486 case OP_WORDCHAR:
6487 if (check_str_ptr)
6488 detect_partial_match(common, backtracks);
6489 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
6490 if (common->utf && is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_word, FALSE))
6491 read_char7_type(common, type == OP_NOT_WORDCHAR);
6492 else
6493 #endif
6494 read_char8_type(common, type == OP_NOT_WORDCHAR);
6495 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
6496 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_ZERO : SLJIT_NOT_ZERO));
6497 return cc;
6499 case OP_ANY:
6500 if (check_str_ptr)
6501 detect_partial_match(common, backtracks);
6502 read_char_range(common, common->nlmin, common->nlmax, TRUE);
6503 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6505 jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
6506 end_list = NULL;
6507 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
6508 add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
6509 else
6510 check_str_end(common, &end_list);
6512 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6513 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
6514 set_jumps(end_list, LABEL());
6515 JUMPHERE(jump[0]);
6517 else
6518 check_newlinechar(common, common->nltype, backtracks, TRUE);
6519 return cc;
6521 case OP_ALLANY:
6522 if (check_str_ptr)
6523 detect_partial_match(common, backtracks);
6524 #ifdef SUPPORT_UTF
6525 if (common->utf)
6527 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6528 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6529 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
6530 #if defined COMPILE_PCRE8
6531 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
6532 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
6533 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6534 #elif defined COMPILE_PCRE16
6535 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
6536 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
6537 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
6538 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
6539 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6540 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6541 #endif
6542 JUMPHERE(jump[0]);
6543 #endif /* COMPILE_PCRE[8|16] */
6544 return cc;
6546 #endif
6547 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6548 return cc;
6550 case OP_ANYBYTE:
6551 if (check_str_ptr)
6552 detect_partial_match(common, backtracks);
6553 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6554 return cc;
6556 #ifdef SUPPORT_UTF
6557 #ifdef SUPPORT_UCP
6558 case OP_NOTPROP:
6559 case OP_PROP:
6560 propdata[0] = XCL_HASPROP;
6561 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
6562 propdata[2] = cc[0];
6563 propdata[3] = cc[1];
6564 propdata[4] = XCL_END;
6565 if (check_str_ptr)
6566 detect_partial_match(common, backtracks);
6567 compile_xclass_matchingpath(common, propdata, backtracks);
6568 return cc + 2;
6569 #endif
6570 #endif
6572 case OP_ANYNL:
6573 if (check_str_ptr)
6574 detect_partial_match(common, backtracks);
6575 read_char_range(common, common->bsr_nlmin, common->bsr_nlmax, FALSE);
6576 jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
6577 /* We don't need to handle soft partial matching case. */
6578 end_list = NULL;
6579 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
6580 add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
6581 else
6582 check_str_end(common, &end_list);
6583 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6584 jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
6585 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6586 jump[2] = JUMP(SLJIT_JUMP);
6587 JUMPHERE(jump[0]);
6588 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
6589 set_jumps(end_list, LABEL());
6590 JUMPHERE(jump[1]);
6591 JUMPHERE(jump[2]);
6592 return cc;
6594 case OP_NOT_HSPACE:
6595 case OP_HSPACE:
6596 if (check_str_ptr)
6597 detect_partial_match(common, backtracks);
6598 read_char_range(common, 0x9, 0x3000, type == OP_NOT_HSPACE);
6599 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
6600 sljit_set_current_flags(compiler, SLJIT_SET_Z);
6601 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
6602 return cc;
6604 case OP_NOT_VSPACE:
6605 case OP_VSPACE:
6606 if (check_str_ptr)
6607 detect_partial_match(common, backtracks);
6608 read_char_range(common, 0xa, 0x2029, type == OP_NOT_VSPACE);
6609 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
6610 sljit_set_current_flags(compiler, SLJIT_SET_Z);
6611 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
6612 return cc;
6614 #ifdef SUPPORT_UCP
6615 case OP_EXTUNI:
6616 if (check_str_ptr)
6617 detect_partial_match(common, backtracks);
6618 read_char(common);
6619 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
6620 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
6621 /* Optimize register allocation: use a real register. */
6622 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
6623 OP1(SLJIT_MOV_U8, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);
6625 label = LABEL();
6626 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6627 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
6628 read_char(common);
6629 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
6630 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
6631 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
6633 OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2);
6634 OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable));
6635 OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
6636 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
6637 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
6638 JUMPTO(SLJIT_NOT_ZERO, label);
6640 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
6641 JUMPHERE(jump[0]);
6642 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6644 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
6646 jump[0] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
6647 /* Since we successfully read a char above, partial matching must occure. */
6648 check_partial(common, TRUE);
6649 JUMPHERE(jump[0]);
6651 return cc;
6652 #endif
6654 case OP_CHAR:
6655 case OP_CHARI:
6656 length = 1;
6657 #ifdef SUPPORT_UTF
6658 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
6659 #endif
6660 if (common->mode == JIT_COMPILE && check_str_ptr
6661 && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
6663 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
6664 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
6666 context.length = IN_UCHARS(length);
6667 context.sourcereg = -1;
6668 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
6669 context.ucharptr = 0;
6670 #endif
6671 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
6674 if (check_str_ptr)
6675 detect_partial_match(common, backtracks);
6676 #ifdef SUPPORT_UTF
6677 if (common->utf)
6679 GETCHAR(c, cc);
6681 else
6682 #endif
6683 c = *cc;
6685 if (type == OP_CHAR || !char_has_othercase(common, cc))
6687 read_char_range(common, c, c, FALSE);
6688 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
6689 return cc + length;
6691 oc = char_othercase(common, c);
6692 read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, FALSE);
6693 bit = c ^ oc;
6694 if (is_powerof2(bit))
6696 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
6697 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
6698 return cc + length;
6700 jump[0] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c);
6701 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
6702 JUMPHERE(jump[0]);
6703 return cc + length;
6705 case OP_NOT:
6706 case OP_NOTI:
6707 if (check_str_ptr)
6708 detect_partial_match(common, backtracks);
6709 length = 1;
6710 #ifdef SUPPORT_UTF
6711 if (common->utf)
6713 #ifdef COMPILE_PCRE8
6714 c = *cc;
6715 if (c < 128)
6717 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6718 if (type == OP_NOT || !char_has_othercase(common, cc))
6719 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
6720 else
6722 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
6723 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
6724 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
6726 /* Skip the variable-length character. */
6727 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6728 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
6729 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
6730 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6731 JUMPHERE(jump[0]);
6732 return cc + 1;
6734 else
6735 #endif /* COMPILE_PCRE8 */
6737 GETCHARLEN(c, cc, length);
6740 else
6741 #endif /* SUPPORT_UTF */
6742 c = *cc;
6744 if (type == OP_NOT || !char_has_othercase(common, cc))
6746 read_char_range(common, c, c, TRUE);
6747 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
6749 else
6751 oc = char_othercase(common, c);
6752 read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, TRUE);
6753 bit = c ^ oc;
6754 if (is_powerof2(bit))
6756 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
6757 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
6759 else
6761 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
6762 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
6765 return cc + length;
6767 case OP_CLASS:
6768 case OP_NCLASS:
6769 if (check_str_ptr)
6770 detect_partial_match(common, backtracks);
6772 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
6773 bit = (common->utf && is_char7_bitset((const sljit_u8 *)cc, type == OP_NCLASS)) ? 127 : 255;
6774 read_char_range(common, 0, bit, type == OP_NCLASS);
6775 #else
6776 read_char_range(common, 0, 255, type == OP_NCLASS);
6777 #endif
6779 if (check_class_ranges(common, (const sljit_u8 *)cc, type == OP_NCLASS, FALSE, backtracks))
6780 return cc + 32 / sizeof(pcre_uchar);
6782 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
6783 jump[0] = NULL;
6784 if (common->utf)
6786 jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, bit);
6787 if (type == OP_CLASS)
6789 add_jump(compiler, backtracks, jump[0]);
6790 jump[0] = NULL;
6793 #elif !defined COMPILE_PCRE8
6794 jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6795 if (type == OP_CLASS)
6797 add_jump(compiler, backtracks, jump[0]);
6798 jump[0] = NULL;
6800 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
6802 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
6803 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
6804 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
6805 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
6806 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
6807 add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
6809 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
6810 if (jump[0] != NULL)
6811 JUMPHERE(jump[0]);
6812 #endif
6813 return cc + 32 / sizeof(pcre_uchar);
6815 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
6816 case OP_XCLASS:
6817 if (check_str_ptr)
6818 detect_partial_match(common, backtracks);
6819 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
6820 return cc + GET(cc, 0) - 1;
6821 #endif
6823 SLJIT_UNREACHABLE();
6824 return cc;
6827 static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
6829 /* This function consumes at least one input character. */
6830 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
6831 DEFINE_COMPILER;
6832 pcre_uchar *ccbegin = cc;
6833 compare_context context;
6834 int size;
6836 context.length = 0;
6839 if (cc >= ccend)
6840 break;
6842 if (*cc == OP_CHAR)
6844 size = 1;
6845 #ifdef SUPPORT_UTF
6846 if (common->utf && HAS_EXTRALEN(cc[1]))
6847 size += GET_EXTRALEN(cc[1]);
6848 #endif
6850 else if (*cc == OP_CHARI)
6852 size = 1;
6853 #ifdef SUPPORT_UTF
6854 if (common->utf)
6856 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
6857 size = 0;
6858 else if (HAS_EXTRALEN(cc[1]))
6859 size += GET_EXTRALEN(cc[1]);
6861 else
6862 #endif
6863 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
6864 size = 0;
6866 else
6867 size = 0;
6869 cc += 1 + size;
6870 context.length += IN_UCHARS(size);
6872 while (size > 0 && context.length <= 128);
6874 cc = ccbegin;
6875 if (context.length > 0)
6877 /* We have a fixed-length byte sequence. */
6878 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
6879 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
6881 context.sourcereg = -1;
6882 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
6883 context.ucharptr = 0;
6884 #endif
6885 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
6886 return cc;
6889 /* A non-fixed length character will be checked if length == 0. */
6890 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks, TRUE);
6893 /* Forward definitions. */
6894 static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
6895 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
6897 #define PUSH_BACKTRACK(size, ccstart, error) \
6898 do \
6900 backtrack = sljit_alloc_memory(compiler, (size)); \
6901 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
6902 return error; \
6903 memset(backtrack, 0, size); \
6904 backtrack->prev = parent->top; \
6905 backtrack->cc = (ccstart); \
6906 parent->top = backtrack; \
6908 while (0)
6910 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
6911 do \
6913 backtrack = sljit_alloc_memory(compiler, (size)); \
6914 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
6915 return; \
6916 memset(backtrack, 0, size); \
6917 backtrack->prev = parent->top; \
6918 backtrack->cc = (ccstart); \
6919 parent->top = backtrack; \
6921 while (0)
6923 #define BACKTRACK_AS(type) ((type *)backtrack)
6925 static void compile_dnref_search(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
6927 /* The OVECTOR offset goes to TMP2. */
6928 DEFINE_COMPILER;
6929 int count = GET2(cc, 1 + IMM2_SIZE);
6930 pcre_uchar *slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
6931 unsigned int offset;
6932 jump_list *found = NULL;
6934 SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
6936 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
6938 count--;
6939 while (count-- > 0)
6941 offset = GET2(slot, 0) << 1;
6942 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
6943 add_jump(compiler, &found, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
6944 slot += common->name_entry_size;
6947 offset = GET2(slot, 0) << 1;
6948 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
6949 if (backtracks != NULL && !common->jscript_compat)
6950 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
6952 set_jumps(found, LABEL());
6955 static void compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
6957 DEFINE_COMPILER;
6958 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
6959 int offset = 0;
6960 struct sljit_jump *jump = NULL;
6961 struct sljit_jump *partial;
6962 struct sljit_jump *nopartial;
6964 if (ref)
6966 offset = GET2(cc, 1) << 1;
6967 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6968 /* OVECTOR(1) contains the "string begin - 1" constant. */
6969 if (withchecks && !common->jscript_compat)
6970 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
6972 else
6973 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6975 #if defined SUPPORT_UTF && defined SUPPORT_UCP
6976 if (common->utf && *cc == OP_REFI)
6978 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STACK_TOP == SLJIT_R1);
6979 if (ref)
6980 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6981 else
6982 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6984 if (withchecks)
6985 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_R2, 0);
6987 /* No free saved registers so save data on stack. */
6988 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
6989 OP1(SLJIT_MOV, SLJIT_R1, 0, STR_PTR, 0);
6990 OP1(SLJIT_MOV, SLJIT_R3, 0, STR_END, 0);
6991 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW) | SLJIT_ARG4(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
6992 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6993 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
6995 if (common->mode == JIT_COMPILE)
6996 add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
6997 else
6999 OP2(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_LESS, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
7001 add_jump(compiler, backtracks, JUMP(SLJIT_LESS));
7003 nopartial = JUMP(SLJIT_NOT_EQUAL);
7004 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
7005 check_partial(common, FALSE);
7006 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
7007 JUMPHERE(nopartial);
7010 else
7011 #endif /* SUPPORT_UTF && SUPPORT_UCP */
7013 if (ref)
7014 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
7015 else
7016 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
7018 if (withchecks)
7019 jump = JUMP(SLJIT_ZERO);
7021 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
7022 partial = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
7023 if (common->mode == JIT_COMPILE)
7024 add_jump(compiler, backtracks, partial);
7026 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
7027 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
7029 if (common->mode != JIT_COMPILE)
7031 nopartial = JUMP(SLJIT_JUMP);
7032 JUMPHERE(partial);
7033 /* TMP2 -= STR_END - STR_PTR */
7034 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
7035 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
7036 partial = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0);
7037 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
7038 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
7039 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
7040 JUMPHERE(partial);
7041 check_partial(common, FALSE);
7042 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
7043 JUMPHERE(nopartial);
7047 if (jump != NULL)
7049 if (emptyfail)
7050 add_jump(compiler, backtracks, jump);
7051 else
7052 JUMPHERE(jump);
7056 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
7058 DEFINE_COMPILER;
7059 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
7060 backtrack_common *backtrack;
7061 pcre_uchar type;
7062 int offset = 0;
7063 struct sljit_label *label;
7064 struct sljit_jump *zerolength;
7065 struct sljit_jump *jump = NULL;
7066 pcre_uchar *ccbegin = cc;
7067 int min = 0, max = 0;
7068 BOOL minimize;
7070 PUSH_BACKTRACK(sizeof(ref_iterator_backtrack), cc, NULL);
7072 if (ref)
7073 offset = GET2(cc, 1) << 1;
7074 else
7075 cc += IMM2_SIZE;
7076 type = cc[1 + IMM2_SIZE];
7078 SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
7079 minimize = (type & 0x1) != 0;
7080 switch(type)
7082 case OP_CRSTAR:
7083 case OP_CRMINSTAR:
7084 min = 0;
7085 max = 0;
7086 cc += 1 + IMM2_SIZE + 1;
7087 break;
7088 case OP_CRPLUS:
7089 case OP_CRMINPLUS:
7090 min = 1;
7091 max = 0;
7092 cc += 1 + IMM2_SIZE + 1;
7093 break;
7094 case OP_CRQUERY:
7095 case OP_CRMINQUERY:
7096 min = 0;
7097 max = 1;
7098 cc += 1 + IMM2_SIZE + 1;
7099 break;
7100 case OP_CRRANGE:
7101 case OP_CRMINRANGE:
7102 min = GET2(cc, 1 + IMM2_SIZE + 1);
7103 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
7104 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
7105 break;
7106 default:
7107 SLJIT_UNREACHABLE();
7108 break;
7111 if (!minimize)
7113 if (min == 0)
7115 allocate_stack(common, 2);
7116 if (ref)
7117 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
7118 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7119 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
7120 /* Temporary release of STR_PTR. */
7121 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
7122 /* Handles both invalid and empty cases. Since the minimum repeat,
7123 is zero the invalid case is basically the same as an empty case. */
7124 if (ref)
7125 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
7126 else
7128 compile_dnref_search(common, ccbegin, NULL);
7129 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
7130 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
7131 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
7133 /* Restore if not zero length. */
7134 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
7136 else
7138 allocate_stack(common, 1);
7139 if (ref)
7140 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
7141 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7142 if (ref)
7144 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
7145 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
7147 else
7149 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
7150 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
7151 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
7152 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
7156 if (min > 1 || max > 1)
7157 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, 0);
7159 label = LABEL();
7160 if (!ref)
7161 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
7162 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
7164 if (min > 1 || max > 1)
7166 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
7167 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
7168 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
7169 if (min > 1)
7170 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, label);
7171 if (max > 1)
7173 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
7174 allocate_stack(common, 1);
7175 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7176 JUMPTO(SLJIT_JUMP, label);
7177 JUMPHERE(jump);
7181 if (max == 0)
7183 /* Includes min > 1 case as well. */
7184 allocate_stack(common, 1);
7185 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7186 JUMPTO(SLJIT_JUMP, label);
7189 JUMPHERE(zerolength);
7190 BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
7192 count_match(common);
7193 return cc;
7196 allocate_stack(common, ref ? 2 : 3);
7197 if (ref)
7198 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
7199 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7200 if (type != OP_CRMINSTAR)
7201 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
7203 if (min == 0)
7205 /* Handles both invalid and empty cases. Since the minimum repeat,
7206 is zero the invalid case is basically the same as an empty case. */
7207 if (ref)
7208 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
7209 else
7211 compile_dnref_search(common, ccbegin, NULL);
7212 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
7213 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
7214 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
7216 /* Length is non-zero, we can match real repeats. */
7217 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7218 jump = JUMP(SLJIT_JUMP);
7220 else
7222 if (ref)
7224 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
7225 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
7227 else
7229 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
7230 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
7231 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
7232 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
7236 BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
7237 if (max > 0)
7238 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
7240 if (!ref)
7241 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
7242 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
7243 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7245 if (min > 1)
7247 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
7248 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
7249 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
7250 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(ref_iterator_backtrack)->matchingpath);
7252 else if (max > 0)
7253 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
7255 if (jump != NULL)
7256 JUMPHERE(jump);
7257 JUMPHERE(zerolength);
7259 count_match(common);
7260 return cc;
7263 static SLJIT_INLINE pcre_uchar *compile_recurse_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
7265 DEFINE_COMPILER;
7266 backtrack_common *backtrack;
7267 recurse_entry *entry = common->entries;
7268 recurse_entry *prev = NULL;
7269 sljit_sw start = GET(cc, 1);
7270 pcre_uchar *start_cc;
7271 BOOL needs_control_head;
7273 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
7275 /* Inlining simple patterns. */
7276 if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
7278 start_cc = common->start + start;
7279 compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
7280 BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
7281 return cc + 1 + LINK_SIZE;
7284 while (entry != NULL)
7286 if (entry->start == start)
7287 break;
7288 prev = entry;
7289 entry = entry->next;
7292 if (entry == NULL)
7294 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
7295 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
7296 return NULL;
7297 entry->next = NULL;
7298 entry->entry = NULL;
7299 entry->calls = NULL;
7300 entry->start = start;
7302 if (prev != NULL)
7303 prev->next = entry;
7304 else
7305 common->entries = entry;
7308 if (common->has_set_som && common->mark_ptr != 0)
7310 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
7311 allocate_stack(common, 2);
7312 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
7313 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
7314 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
7316 else if (common->has_set_som || common->mark_ptr != 0)
7318 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
7319 allocate_stack(common, 1);
7320 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
7323 if (entry->entry == NULL)
7324 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
7325 else
7326 JUMPTO(SLJIT_FAST_CALL, entry->entry);
7327 /* Leave if the match is failed. */
7328 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
7329 return cc + 1 + LINK_SIZE;
7332 static sljit_s32 SLJIT_FUNC do_callout(struct jit_arguments *arguments, PUBL(callout_block) *callout_block, pcre_uchar **jit_ovector)
7334 const pcre_uchar *begin = arguments->begin;
7335 int *offset_vector = arguments->offsets;
7336 int offset_count = arguments->offset_count;
7337 int i;
7339 if (PUBL(callout) == NULL)
7340 return 0;
7342 callout_block->version = 2;
7343 callout_block->callout_data = arguments->callout_data;
7345 /* Offsets in subject. */
7346 callout_block->subject_length = arguments->end - arguments->begin;
7347 callout_block->start_match = (pcre_uchar*)callout_block->subject - arguments->begin;
7348 callout_block->current_position = (pcre_uchar*)callout_block->offset_vector - arguments->begin;
7349 #if defined COMPILE_PCRE8
7350 callout_block->subject = (PCRE_SPTR)begin;
7351 #elif defined COMPILE_PCRE16
7352 callout_block->subject = (PCRE_SPTR16)begin;
7353 #elif defined COMPILE_PCRE32
7354 callout_block->subject = (PCRE_SPTR32)begin;
7355 #endif
7357 /* Convert and copy the JIT offset vector to the offset_vector array. */
7358 callout_block->capture_top = 0;
7359 callout_block->offset_vector = offset_vector;
7360 for (i = 2; i < offset_count; i += 2)
7362 offset_vector[i] = jit_ovector[i] - begin;
7363 offset_vector[i + 1] = jit_ovector[i + 1] - begin;
7364 if (jit_ovector[i] >= begin)
7365 callout_block->capture_top = i;
7368 callout_block->capture_top = (callout_block->capture_top >> 1) + 1;
7369 if (offset_count > 0)
7370 offset_vector[0] = -1;
7371 if (offset_count > 1)
7372 offset_vector[1] = -1;
7373 return (*PUBL(callout))(callout_block);
7376 /* Aligning to 8 byte. */
7377 #define CALLOUT_ARG_SIZE \
7378 (((int)sizeof(PUBL(callout_block)) + 7) & ~7)
7380 #define CALLOUT_ARG_OFFSET(arg) \
7381 SLJIT_OFFSETOF(PUBL(callout_block), arg)
7383 static SLJIT_INLINE pcre_uchar *compile_callout_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
7385 DEFINE_COMPILER;
7386 backtrack_common *backtrack;
7388 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
7390 allocate_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
7392 SLJIT_ASSERT(common->capture_last_ptr != 0);
7393 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
7394 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
7395 OP1(SLJIT_MOV_S32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]);
7396 OP1(SLJIT_MOV_S32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
7398 /* These pointer sized fields temporarly stores internal variables. */
7399 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
7400 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
7401 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(subject), TMP2, 0);
7403 if (common->mark_ptr != 0)
7404 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
7405 OP1(SLJIT_MOV_S32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 2));
7406 OP1(SLJIT_MOV_S32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 2 + LINK_SIZE));
7407 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
7409 /* Needed to save important temporary registers. */
7410 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
7411 /* SLJIT_R0 = arguments */
7412 OP1(SLJIT_MOV, SLJIT_R1, 0, STACK_TOP, 0);
7413 GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START);
7414 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(S32) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
7415 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7416 free_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
7418 /* Check return value. */
7419 OP2(SLJIT_SUB32 | SLJIT_SET_Z | SLJIT_SET_SIG_GREATER, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
7420 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_SIG_GREATER32));
7421 if (common->forced_quit_label == NULL)
7422 add_jump(compiler, &common->forced_quit, JUMP(SLJIT_NOT_EQUAL32) /* SIG_LESS */);
7423 else
7424 JUMPTO(SLJIT_NOT_EQUAL32 /* SIG_LESS */, common->forced_quit_label);
7425 return cc + 2 + 2 * LINK_SIZE;
7428 #undef CALLOUT_ARG_SIZE
7429 #undef CALLOUT_ARG_OFFSET
7431 static SLJIT_INLINE BOOL assert_needs_str_ptr_saving(pcre_uchar *cc)
7433 while (TRUE)
7435 switch (*cc)
7437 case OP_NOT_WORD_BOUNDARY:
7438 case OP_WORD_BOUNDARY:
7439 case OP_CIRC:
7440 case OP_CIRCM:
7441 case OP_DOLL:
7442 case OP_DOLLM:
7443 case OP_CALLOUT:
7444 case OP_ALT:
7445 cc += PRIV(OP_lengths)[*cc];
7446 break;
7448 case OP_KET:
7449 return FALSE;
7451 default:
7452 return TRUE;
7457 static pcre_uchar *compile_assert_matchingpath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
7459 DEFINE_COMPILER;
7460 int framesize;
7461 int extrasize;
7462 BOOL needs_control_head;
7463 int private_data_ptr;
7464 backtrack_common altbacktrack;
7465 pcre_uchar *ccbegin;
7466 pcre_uchar opcode;
7467 pcre_uchar bra = OP_BRA;
7468 jump_list *tmp = NULL;
7469 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
7470 jump_list **found;
7471 /* Saving previous accept variables. */
7472 BOOL save_local_exit = common->local_exit;
7473 BOOL save_positive_assert = common->positive_assert;
7474 then_trap_backtrack *save_then_trap = common->then_trap;
7475 struct sljit_label *save_quit_label = common->quit_label;
7476 struct sljit_label *save_accept_label = common->accept_label;
7477 jump_list *save_quit = common->quit;
7478 jump_list *save_positive_assert_quit = common->positive_assert_quit;
7479 jump_list *save_accept = common->accept;
7480 struct sljit_jump *jump;
7481 struct sljit_jump *brajump = NULL;
7483 /* Assert captures then. */
7484 common->then_trap = NULL;
7486 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
7488 SLJIT_ASSERT(!conditional);
7489 bra = *cc;
7490 cc++;
7492 private_data_ptr = PRIVATE_DATA(cc);
7493 SLJIT_ASSERT(private_data_ptr != 0);
7494 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
7495 backtrack->framesize = framesize;
7496 backtrack->private_data_ptr = private_data_ptr;
7497 opcode = *cc;
7498 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
7499 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
7500 ccbegin = cc;
7501 cc += GET(cc, 1);
7503 if (bra == OP_BRAMINZERO)
7505 /* This is a braminzero backtrack path. */
7506 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7507 free_stack(common, 1);
7508 brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
7511 if (framesize < 0)
7513 extrasize = 1;
7514 if (bra == OP_BRA && !assert_needs_str_ptr_saving(ccbegin + 1 + LINK_SIZE))
7515 extrasize = 0;
7517 if (needs_control_head)
7518 extrasize++;
7520 if (framesize == no_frame)
7521 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
7523 if (extrasize > 0)
7524 allocate_stack(common, extrasize);
7526 if (needs_control_head)
7527 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
7529 if (extrasize > 0)
7530 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7532 if (needs_control_head)
7534 SLJIT_ASSERT(extrasize == 2);
7535 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
7536 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
7539 else
7541 extrasize = needs_control_head ? 3 : 2;
7542 allocate_stack(common, framesize + extrasize);
7544 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7545 OP2(SLJIT_ADD, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
7546 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
7547 if (needs_control_head)
7548 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
7549 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7551 if (needs_control_head)
7553 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
7554 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
7555 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
7557 else
7558 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
7560 init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize, FALSE);
7563 memset(&altbacktrack, 0, sizeof(backtrack_common));
7564 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
7566 /* Negative assert is stronger than positive assert. */
7567 common->local_exit = TRUE;
7568 common->quit_label = NULL;
7569 common->quit = NULL;
7570 common->positive_assert = FALSE;
7572 else
7573 common->positive_assert = TRUE;
7574 common->positive_assert_quit = NULL;
7576 while (1)
7578 common->accept_label = NULL;
7579 common->accept = NULL;
7580 altbacktrack.top = NULL;
7581 altbacktrack.topbacktracks = NULL;
7583 if (*ccbegin == OP_ALT && extrasize > 0)
7584 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7586 altbacktrack.cc = ccbegin;
7587 compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
7588 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
7590 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
7592 common->local_exit = save_local_exit;
7593 common->quit_label = save_quit_label;
7594 common->quit = save_quit;
7596 common->positive_assert = save_positive_assert;
7597 common->then_trap = save_then_trap;
7598 common->accept_label = save_accept_label;
7599 common->positive_assert_quit = save_positive_assert_quit;
7600 common->accept = save_accept;
7601 return NULL;
7603 common->accept_label = LABEL();
7604 if (common->accept != NULL)
7605 set_jumps(common->accept, common->accept_label);
7607 /* Reset stack. */
7608 if (framesize < 0)
7610 if (framesize == no_frame)
7611 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7612 else if (extrasize > 0)
7613 free_stack(common, extrasize);
7615 if (needs_control_head)
7616 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
7618 else
7620 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
7622 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
7623 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
7624 if (needs_control_head)
7625 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
7627 else
7629 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7630 if (needs_control_head)
7631 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 2));
7632 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
7636 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
7638 /* We know that STR_PTR was stored on the top of the stack. */
7639 if (conditional)
7641 if (extrasize > 0)
7642 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? STACK(-2) : STACK(-1));
7644 else if (bra == OP_BRAZERO)
7646 if (framesize < 0)
7647 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize));
7648 else
7650 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
7651 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - extrasize));
7652 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
7654 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
7655 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7657 else if (framesize >= 0)
7659 /* For OP_BRA and OP_BRAMINZERO. */
7660 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
7663 add_jump(compiler, found, JUMP(SLJIT_JUMP));
7665 compile_backtrackingpath(common, altbacktrack.top);
7666 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
7668 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
7670 common->local_exit = save_local_exit;
7671 common->quit_label = save_quit_label;
7672 common->quit = save_quit;
7674 common->positive_assert = save_positive_assert;
7675 common->then_trap = save_then_trap;
7676 common->accept_label = save_accept_label;
7677 common->positive_assert_quit = save_positive_assert_quit;
7678 common->accept = save_accept;
7679 return NULL;
7681 set_jumps(altbacktrack.topbacktracks, LABEL());
7683 if (*cc != OP_ALT)
7684 break;
7686 ccbegin = cc;
7687 cc += GET(cc, 1);
7690 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
7692 SLJIT_ASSERT(common->positive_assert_quit == NULL);
7693 /* Makes the check less complicated below. */
7694 common->positive_assert_quit = common->quit;
7697 /* None of them matched. */
7698 if (common->positive_assert_quit != NULL)
7700 jump = JUMP(SLJIT_JUMP);
7701 set_jumps(common->positive_assert_quit, LABEL());
7702 SLJIT_ASSERT(framesize != no_stack);
7703 if (framesize < 0)
7704 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
7705 else
7707 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7708 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
7709 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
7711 JUMPHERE(jump);
7714 if (needs_control_head)
7715 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
7717 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
7719 /* Assert is failed. */
7720 if ((conditional && extrasize > 0) || bra == OP_BRAZERO)
7721 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7723 if (framesize < 0)
7725 /* The topmost item should be 0. */
7726 if (bra == OP_BRAZERO)
7728 if (extrasize == 2)
7729 free_stack(common, 1);
7730 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7732 else if (extrasize > 0)
7733 free_stack(common, extrasize);
7735 else
7737 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
7738 /* The topmost item should be 0. */
7739 if (bra == OP_BRAZERO)
7741 free_stack(common, framesize + extrasize - 1);
7742 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7744 else
7745 free_stack(common, framesize + extrasize);
7746 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
7748 jump = JUMP(SLJIT_JUMP);
7749 if (bra != OP_BRAZERO)
7750 add_jump(compiler, target, jump);
7752 /* Assert is successful. */
7753 set_jumps(tmp, LABEL());
7754 if (framesize < 0)
7756 /* We know that STR_PTR was stored on the top of the stack. */
7757 if (extrasize > 0)
7758 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize));
7760 /* Keep the STR_PTR on the top of the stack. */
7761 if (bra == OP_BRAZERO)
7763 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
7764 if (extrasize == 2)
7765 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7767 else if (bra == OP_BRAMINZERO)
7769 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
7770 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7773 else
7775 if (bra == OP_BRA)
7777 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
7778 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
7779 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize + 1));
7781 else
7783 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
7784 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
7785 if (extrasize == 2)
7787 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7788 if (bra == OP_BRAMINZERO)
7789 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7791 else
7793 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
7794 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
7799 if (bra == OP_BRAZERO)
7801 backtrack->matchingpath = LABEL();
7802 SET_LABEL(jump, backtrack->matchingpath);
7804 else if (bra == OP_BRAMINZERO)
7806 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
7807 JUMPHERE(brajump);
7808 if (framesize >= 0)
7810 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7811 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
7812 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
7814 set_jumps(backtrack->common.topbacktracks, LABEL());
7817 else
7819 /* AssertNot is successful. */
7820 if (framesize < 0)
7822 if (extrasize > 0)
7823 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7825 if (bra != OP_BRA)
7827 if (extrasize == 2)
7828 free_stack(common, 1);
7829 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7831 else if (extrasize > 0)
7832 free_stack(common, extrasize);
7834 else
7836 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7837 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
7838 /* The topmost item should be 0. */
7839 if (bra != OP_BRA)
7841 free_stack(common, framesize + extrasize - 1);
7842 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7844 else
7845 free_stack(common, framesize + extrasize);
7846 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
7849 if (bra == OP_BRAZERO)
7850 backtrack->matchingpath = LABEL();
7851 else if (bra == OP_BRAMINZERO)
7853 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
7854 JUMPHERE(brajump);
7857 if (bra != OP_BRA)
7859 SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
7860 set_jumps(backtrack->common.topbacktracks, LABEL());
7861 backtrack->common.topbacktracks = NULL;
7865 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
7867 common->local_exit = save_local_exit;
7868 common->quit_label = save_quit_label;
7869 common->quit = save_quit;
7871 common->positive_assert = save_positive_assert;
7872 common->then_trap = save_then_trap;
7873 common->accept_label = save_accept_label;
7874 common->positive_assert_quit = save_positive_assert_quit;
7875 common->accept = save_accept;
7876 return cc + 1 + LINK_SIZE;
7879 static SLJIT_INLINE void match_once_common(compiler_common *common, pcre_uchar ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)
7881 DEFINE_COMPILER;
7882 int stacksize;
7884 if (framesize < 0)
7886 if (framesize == no_frame)
7887 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7888 else
7890 stacksize = needs_control_head ? 1 : 0;
7891 if (ket != OP_KET || has_alternatives)
7892 stacksize++;
7894 if (stacksize > 0)
7895 free_stack(common, stacksize);
7898 if (needs_control_head)
7899 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? STACK(-2) : STACK(-1));
7901 /* TMP2 which is set here used by OP_KETRMAX below. */
7902 if (ket == OP_KETRMAX)
7903 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
7904 else if (ket == OP_KETRMIN)
7906 /* Move the STR_PTR to the private_data_ptr. */
7907 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
7910 else
7912 stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1;
7913 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));
7914 if (needs_control_head)
7915 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
7917 if (ket == OP_KETRMAX)
7919 /* TMP2 which is set here used by OP_KETRMAX below. */
7920 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7923 if (needs_control_head)
7924 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
7927 static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr)
7929 DEFINE_COMPILER;
7931 if (common->capture_last_ptr != 0)
7933 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
7934 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
7935 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
7936 stacksize++;
7938 if (common->optimized_cbracket[offset >> 1] == 0)
7940 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
7941 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
7942 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
7943 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7944 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
7945 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
7946 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
7947 stacksize += 2;
7949 return stacksize;
7953 Handling bracketed expressions is probably the most complex part.
7955 Stack layout naming characters:
7956 S - Push the current STR_PTR
7957 0 - Push a 0 (NULL)
7958 A - Push the current STR_PTR. Needed for restoring the STR_PTR
7959 before the next alternative. Not pushed if there are no alternatives.
7960 M - Any values pushed by the current alternative. Can be empty, or anything.
7961 C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
7962 L - Push the previous local (pointed by localptr) to the stack
7963 () - opional values stored on the stack
7964 ()* - optonal, can be stored multiple times
7966 The following list shows the regular expression templates, their PCRE byte codes
7967 and stack layout supported by pcre-sljit.
7969 (?:) OP_BRA | OP_KET A M
7970 () OP_CBRA | OP_KET C M
7971 (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
7972 OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
7973 (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
7974 OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
7975 ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
7976 OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
7977 ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
7978 OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
7979 (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
7980 (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
7981 ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
7982 ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
7983 (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
7984 OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
7985 (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
7986 OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
7987 ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
7988 OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
7989 ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
7990 OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
7993 Stack layout naming characters:
7994 A - Push the alternative index (starting from 0) on the stack.
7995 Not pushed if there is no alternatives.
7996 M - Any values pushed by the current alternative. Can be empty, or anything.
7998 The next list shows the possible content of a bracket:
7999 (|) OP_*BRA | OP_ALT ... M A
8000 (?()|) OP_*COND | OP_ALT M A
8001 (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
8002 (?>|) OP_ONCE_NC | OP_ALT ... [stack trace] M A
8003 Or nothing, if trace is unnecessary
8006 static pcre_uchar *compile_bracket_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
8008 DEFINE_COMPILER;
8009 backtrack_common *backtrack;
8010 pcre_uchar opcode;
8011 int private_data_ptr = 0;
8012 int offset = 0;
8013 int i, stacksize;
8014 int repeat_ptr = 0, repeat_length = 0;
8015 int repeat_type = 0, repeat_count = 0;
8016 pcre_uchar *ccbegin;
8017 pcre_uchar *matchingpath;
8018 pcre_uchar *slot;
8019 pcre_uchar bra = OP_BRA;
8020 pcre_uchar ket;
8021 assert_backtrack *assert;
8022 BOOL has_alternatives;
8023 BOOL needs_control_head = FALSE;
8024 struct sljit_jump *jump;
8025 struct sljit_jump *skip;
8026 struct sljit_label *rmax_label = NULL;
8027 struct sljit_jump *braminzero = NULL;
8029 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
8031 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
8033 bra = *cc;
8034 cc++;
8035 opcode = *cc;
8038 opcode = *cc;
8039 ccbegin = cc;
8040 matchingpath = bracketend(cc) - 1 - LINK_SIZE;
8041 ket = *matchingpath;
8042 if (ket == OP_KET && PRIVATE_DATA(matchingpath) != 0)
8044 repeat_ptr = PRIVATE_DATA(matchingpath);
8045 repeat_length = PRIVATE_DATA(matchingpath + 1);
8046 repeat_type = PRIVATE_DATA(matchingpath + 2);
8047 repeat_count = PRIVATE_DATA(matchingpath + 3);
8048 SLJIT_ASSERT(repeat_length != 0 && repeat_type != 0 && repeat_count != 0);
8049 if (repeat_type == OP_UPTO)
8050 ket = OP_KETRMAX;
8051 if (repeat_type == OP_MINUPTO)
8052 ket = OP_KETRMIN;
8055 if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)
8057 /* Drop this bracket_backtrack. */
8058 parent->top = backtrack->prev;
8059 return matchingpath + 1 + LINK_SIZE + repeat_length;
8062 matchingpath = ccbegin + 1 + LINK_SIZE;
8063 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
8064 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
8065 cc += GET(cc, 1);
8067 has_alternatives = *cc == OP_ALT;
8068 if (SLJIT_UNLIKELY(opcode == OP_COND || opcode == OP_SCOND))
8069 has_alternatives = (*matchingpath == OP_RREF || *matchingpath == OP_DNRREF || *matchingpath == OP_FAIL) ? FALSE : TRUE;
8071 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
8072 opcode = OP_SCOND;
8073 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
8074 opcode = OP_ONCE;
8076 if (opcode == OP_CBRA || opcode == OP_SCBRA)
8078 /* Capturing brackets has a pre-allocated space. */
8079 offset = GET2(ccbegin, 1 + LINK_SIZE);
8080 if (common->optimized_cbracket[offset] == 0)
8082 private_data_ptr = OVECTOR_PRIV(offset);
8083 offset <<= 1;
8085 else
8087 offset <<= 1;
8088 private_data_ptr = OVECTOR(offset);
8090 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
8091 matchingpath += IMM2_SIZE;
8093 else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND)
8095 /* Other brackets simply allocate the next entry. */
8096 private_data_ptr = PRIVATE_DATA(ccbegin);
8097 SLJIT_ASSERT(private_data_ptr != 0);
8098 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
8099 if (opcode == OP_ONCE)
8100 BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head);
8103 /* Instructions before the first alternative. */
8104 stacksize = 0;
8105 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
8106 stacksize++;
8107 if (bra == OP_BRAZERO)
8108 stacksize++;
8110 if (stacksize > 0)
8111 allocate_stack(common, stacksize);
8113 stacksize = 0;
8114 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
8116 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
8117 stacksize++;
8120 if (bra == OP_BRAZERO)
8121 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
8123 if (bra == OP_BRAMINZERO)
8125 /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
8126 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8127 if (ket != OP_KETRMIN)
8129 free_stack(common, 1);
8130 braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
8132 else
8134 if (opcode == OP_ONCE || opcode >= OP_SBRA)
8136 jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
8137 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
8138 /* Nothing stored during the first run. */
8139 skip = JUMP(SLJIT_JUMP);
8140 JUMPHERE(jump);
8141 /* Checking zero-length iteration. */
8142 if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
8144 /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
8145 braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8147 else
8149 /* Except when the whole stack frame must be saved. */
8150 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8151 braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-BACKTRACK_AS(bracket_backtrack)->u.framesize - 2));
8153 JUMPHERE(skip);
8155 else
8157 jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
8158 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
8159 JUMPHERE(jump);
8164 if (repeat_type != 0)
8166 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, repeat_count);
8167 if (repeat_type == OP_EXACT)
8168 rmax_label = LABEL();
8171 if (ket == OP_KETRMIN)
8172 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
8174 if (ket == OP_KETRMAX)
8176 rmax_label = LABEL();
8177 if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA && repeat_type == 0)
8178 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmax_label;
8181 /* Handling capturing brackets and alternatives. */
8182 if (opcode == OP_ONCE)
8184 stacksize = 0;
8185 if (needs_control_head)
8187 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
8188 stacksize++;
8191 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
8193 /* Neither capturing brackets nor recursions are found in the block. */
8194 if (ket == OP_KETRMIN)
8196 stacksize += 2;
8197 if (!needs_control_head)
8198 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8200 else
8202 if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
8203 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
8204 if (ket == OP_KETRMAX || has_alternatives)
8205 stacksize++;
8208 if (stacksize > 0)
8209 allocate_stack(common, stacksize);
8211 stacksize = 0;
8212 if (needs_control_head)
8214 stacksize++;
8215 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
8218 if (ket == OP_KETRMIN)
8220 if (needs_control_head)
8221 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8222 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
8223 if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
8224 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw));
8225 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
8227 else if (ket == OP_KETRMAX || has_alternatives)
8228 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
8230 else
8232 if (ket != OP_KET || has_alternatives)
8233 stacksize++;
8235 stacksize += BACKTRACK_AS(bracket_backtrack)->u.framesize + 1;
8236 allocate_stack(common, stacksize);
8238 if (needs_control_head)
8239 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
8241 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8242 OP2(SLJIT_ADD, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
8244 stacksize = needs_control_head ? 1 : 0;
8245 if (ket != OP_KET || has_alternatives)
8247 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
8248 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
8249 stacksize++;
8250 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
8252 else
8254 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
8255 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
8257 init_frame(common, ccbegin, NULL, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1, FALSE);
8260 else if (opcode == OP_CBRA || opcode == OP_SCBRA)
8262 /* Saving the previous values. */
8263 if (common->optimized_cbracket[offset >> 1] != 0)
8265 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
8266 allocate_stack(common, 2);
8267 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8268 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
8269 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
8270 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
8271 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
8273 else
8275 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8276 allocate_stack(common, 1);
8277 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
8278 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
8281 else if (opcode == OP_SBRA || opcode == OP_SCOND)
8283 /* Saving the previous value. */
8284 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8285 allocate_stack(common, 1);
8286 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
8287 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
8289 else if (has_alternatives)
8291 /* Pushing the starting string pointer. */
8292 allocate_stack(common, 1);
8293 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8296 /* Generating code for the first alternative. */
8297 if (opcode == OP_COND || opcode == OP_SCOND)
8299 if (*matchingpath == OP_CREF)
8301 SLJIT_ASSERT(has_alternatives);
8302 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
8303 CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
8304 matchingpath += 1 + IMM2_SIZE;
8306 else if (*matchingpath == OP_DNCREF)
8308 SLJIT_ASSERT(has_alternatives);
8310 i = GET2(matchingpath, 1 + IMM2_SIZE);
8311 slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
8312 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
8313 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
8314 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
8315 slot += common->name_entry_size;
8316 i--;
8317 while (i-- > 0)
8319 OP2(SLJIT_SUB, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
8320 OP2(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, STR_PTR, 0);
8321 slot += common->name_entry_size;
8323 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
8324 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), JUMP(SLJIT_ZERO));
8325 matchingpath += 1 + 2 * IMM2_SIZE;
8327 else if (*matchingpath == OP_RREF || *matchingpath == OP_DNRREF || *matchingpath == OP_FAIL)
8329 /* Never has other case. */
8330 BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
8331 SLJIT_ASSERT(!has_alternatives);
8333 if (*matchingpath == OP_FAIL)
8334 stacksize = 0;
8335 else if (*matchingpath == OP_RREF)
8337 stacksize = GET2(matchingpath, 1);
8338 if (common->currententry == NULL)
8339 stacksize = 0;
8340 else if (stacksize == RREF_ANY)
8341 stacksize = 1;
8342 else if (common->currententry->start == 0)
8343 stacksize = stacksize == 0;
8344 else
8345 stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
8347 if (stacksize != 0)
8348 matchingpath += 1 + IMM2_SIZE;
8350 else
8352 if (common->currententry == NULL || common->currententry->start == 0)
8353 stacksize = 0;
8354 else
8356 stacksize = GET2(matchingpath, 1 + IMM2_SIZE);
8357 slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
8358 i = (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
8359 while (stacksize > 0)
8361 if ((int)GET2(slot, 0) == i)
8362 break;
8363 slot += common->name_entry_size;
8364 stacksize--;
8368 if (stacksize != 0)
8369 matchingpath += 1 + 2 * IMM2_SIZE;
8372 /* The stacksize == 0 is a common "else" case. */
8373 if (stacksize == 0)
8375 if (*cc == OP_ALT)
8377 matchingpath = cc + 1 + LINK_SIZE;
8378 cc += GET(cc, 1);
8380 else
8381 matchingpath = cc;
8384 else
8386 SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
8387 /* Similar code as PUSH_BACKTRACK macro. */
8388 assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
8389 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
8390 return NULL;
8391 memset(assert, 0, sizeof(assert_backtrack));
8392 assert->common.cc = matchingpath;
8393 BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
8394 matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
8398 compile_matchingpath(common, matchingpath, cc, backtrack);
8399 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
8400 return NULL;
8402 if (opcode == OP_ONCE)
8403 match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
8405 stacksize = 0;
8406 if (repeat_type == OP_MINUPTO)
8408 /* We need to preserve the counter. TMP2 will be used below. */
8409 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
8410 stacksize++;
8412 if (ket != OP_KET || bra != OP_BRA)
8413 stacksize++;
8414 if (offset != 0)
8416 if (common->capture_last_ptr != 0)
8417 stacksize++;
8418 if (common->optimized_cbracket[offset >> 1] == 0)
8419 stacksize += 2;
8421 if (has_alternatives && opcode != OP_ONCE)
8422 stacksize++;
8424 if (stacksize > 0)
8425 allocate_stack(common, stacksize);
8427 stacksize = 0;
8428 if (repeat_type == OP_MINUPTO)
8430 /* TMP2 was set above. */
8431 OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
8432 stacksize++;
8435 if (ket != OP_KET || bra != OP_BRA)
8437 if (ket != OP_KET)
8438 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
8439 else
8440 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
8441 stacksize++;
8444 if (offset != 0)
8445 stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
8447 if (has_alternatives)
8449 if (opcode != OP_ONCE)
8450 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
8451 if (ket != OP_KETRMAX)
8452 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
8455 /* Must be after the matchingpath label. */
8456 if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0)
8458 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
8459 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
8462 if (ket == OP_KETRMAX)
8464 if (repeat_type != 0)
8466 if (has_alternatives)
8467 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
8468 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
8469 JUMPTO(SLJIT_NOT_ZERO, rmax_label);
8470 /* Drop STR_PTR for greedy plus quantifier. */
8471 if (opcode != OP_ONCE)
8472 free_stack(common, 1);
8474 else if (opcode == OP_ONCE || opcode >= OP_SBRA)
8476 if (has_alternatives)
8477 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
8478 /* Checking zero-length iteration. */
8479 if (opcode != OP_ONCE)
8481 CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0, rmax_label);
8482 /* Drop STR_PTR for greedy plus quantifier. */
8483 if (bra != OP_BRAZERO)
8484 free_stack(common, 1);
8486 else
8487 /* TMP2 must contain the starting STR_PTR. */
8488 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmax_label);
8490 else
8491 JUMPTO(SLJIT_JUMP, rmax_label);
8492 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
8495 if (repeat_type == OP_EXACT)
8497 count_match(common);
8498 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
8499 JUMPTO(SLJIT_NOT_ZERO, rmax_label);
8501 else if (repeat_type == OP_UPTO)
8503 /* We need to preserve the counter. */
8504 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
8505 allocate_stack(common, 1);
8506 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
8509 if (bra == OP_BRAZERO)
8510 BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
8512 if (bra == OP_BRAMINZERO)
8514 /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
8515 JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
8516 if (braminzero != NULL)
8518 JUMPHERE(braminzero);
8519 /* We need to release the end pointer to perform the
8520 backtrack for the zero-length iteration. When
8521 framesize is < 0, OP_ONCE will do the release itself. */
8522 if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
8524 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8525 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
8527 else if (ket == OP_KETRMIN && opcode != OP_ONCE)
8528 free_stack(common, 1);
8530 /* Continue to the normal backtrack. */
8533 if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
8534 count_match(common);
8536 /* Skip the other alternatives. */
8537 while (*cc == OP_ALT)
8538 cc += GET(cc, 1);
8539 cc += 1 + LINK_SIZE;
8541 if (opcode == OP_ONCE)
8543 /* We temporarily encode the needs_control_head in the lowest bit.
8544 Note: on the target architectures of SLJIT the ((x << 1) >> 1) returns
8545 the same value for small signed numbers (including negative numbers). */
8546 BACKTRACK_AS(bracket_backtrack)->u.framesize = ((unsigned int)BACKTRACK_AS(bracket_backtrack)->u.framesize << 1) | (needs_control_head ? 1 : 0);
8548 return cc + repeat_length;
8551 static pcre_uchar *compile_bracketpos_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
8553 DEFINE_COMPILER;
8554 backtrack_common *backtrack;
8555 pcre_uchar opcode;
8556 int private_data_ptr;
8557 int cbraprivptr = 0;
8558 BOOL needs_control_head;
8559 int framesize;
8560 int stacksize;
8561 int offset = 0;
8562 BOOL zero = FALSE;
8563 pcre_uchar *ccbegin = NULL;
8564 int stack; /* Also contains the offset of control head. */
8565 struct sljit_label *loop = NULL;
8566 struct jump_list *emptymatch = NULL;
8568 PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
8569 if (*cc == OP_BRAPOSZERO)
8571 zero = TRUE;
8572 cc++;
8575 opcode = *cc;
8576 private_data_ptr = PRIVATE_DATA(cc);
8577 SLJIT_ASSERT(private_data_ptr != 0);
8578 BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;
8579 switch(opcode)
8581 case OP_BRAPOS:
8582 case OP_SBRAPOS:
8583 ccbegin = cc + 1 + LINK_SIZE;
8584 break;
8586 case OP_CBRAPOS:
8587 case OP_SCBRAPOS:
8588 offset = GET2(cc, 1 + LINK_SIZE);
8589 /* This case cannot be optimized in the same was as
8590 normal capturing brackets. */
8591 SLJIT_ASSERT(common->optimized_cbracket[offset] == 0);
8592 cbraprivptr = OVECTOR_PRIV(offset);
8593 offset <<= 1;
8594 ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
8595 break;
8597 default:
8598 SLJIT_UNREACHABLE();
8599 break;
8602 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
8603 BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
8604 if (framesize < 0)
8606 if (offset != 0)
8608 stacksize = 2;
8609 if (common->capture_last_ptr != 0)
8610 stacksize++;
8612 else
8613 stacksize = 1;
8615 if (needs_control_head)
8616 stacksize++;
8617 if (!zero)
8618 stacksize++;
8620 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
8621 allocate_stack(common, stacksize);
8622 if (framesize == no_frame)
8623 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
8625 stack = 0;
8626 if (offset != 0)
8628 stack = 2;
8629 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
8630 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
8631 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
8632 if (common->capture_last_ptr != 0)
8633 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
8634 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
8635 if (needs_control_head)
8636 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
8637 if (common->capture_last_ptr != 0)
8639 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
8640 stack = 3;
8643 else
8645 if (needs_control_head)
8646 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
8647 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8648 stack = 1;
8651 if (needs_control_head)
8652 stack++;
8653 if (!zero)
8654 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), SLJIT_IMM, 1);
8655 if (needs_control_head)
8657 stack--;
8658 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
8661 else
8663 stacksize = framesize + 1;
8664 if (!zero)
8665 stacksize++;
8666 if (needs_control_head)
8667 stacksize++;
8668 if (offset == 0)
8669 stacksize++;
8670 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
8672 allocate_stack(common, stacksize);
8673 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8674 if (needs_control_head)
8675 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
8676 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
8678 stack = 0;
8679 if (!zero)
8681 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
8682 stack = 1;
8684 if (needs_control_head)
8686 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
8687 stack++;
8689 if (offset == 0)
8691 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
8692 stack++;
8694 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
8695 init_frame(common, cc, NULL, stacksize - 1, stacksize - framesize, FALSE);
8696 stack -= 1 + (offset == 0);
8699 if (offset != 0)
8700 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
8702 loop = LABEL();
8703 while (*cc != OP_KETRPOS)
8705 backtrack->top = NULL;
8706 backtrack->topbacktracks = NULL;
8707 cc += GET(cc, 1);
8709 compile_matchingpath(common, ccbegin, cc, backtrack);
8710 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
8711 return NULL;
8713 if (framesize < 0)
8715 if (framesize == no_frame)
8716 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8718 if (offset != 0)
8720 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
8721 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
8722 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
8723 if (common->capture_last_ptr != 0)
8724 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
8725 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
8727 else
8729 if (opcode == OP_SBRAPOS)
8730 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8731 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8734 /* Even if the match is empty, we need to reset the control head. */
8735 if (needs_control_head)
8736 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
8738 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
8739 add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
8741 if (!zero)
8742 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
8744 else
8746 if (offset != 0)
8748 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_sw));
8749 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
8750 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
8751 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
8752 if (common->capture_last_ptr != 0)
8753 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
8754 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
8756 else
8758 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8759 OP2(SLJIT_SUB, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
8760 if (opcode == OP_SBRAPOS)
8761 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2));
8762 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(-framesize - 2), STR_PTR, 0);
8765 /* Even if the match is empty, we need to reset the control head. */
8766 if (needs_control_head)
8767 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
8769 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
8770 add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
8772 if (!zero)
8774 if (framesize < 0)
8775 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
8776 else
8777 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
8781 JUMPTO(SLJIT_JUMP, loop);
8782 flush_stubs(common);
8784 compile_backtrackingpath(common, backtrack->top);
8785 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
8786 return NULL;
8787 set_jumps(backtrack->topbacktracks, LABEL());
8789 if (framesize < 0)
8791 if (offset != 0)
8792 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
8793 else
8794 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8796 else
8798 if (offset != 0)
8800 /* Last alternative. */
8801 if (*cc == OP_KETRPOS)
8802 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8803 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
8805 else
8807 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8808 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2));
8812 if (*cc == OP_KETRPOS)
8813 break;
8814 ccbegin = cc + 1 + LINK_SIZE;
8817 /* We don't have to restore the control head in case of a failed match. */
8819 backtrack->topbacktracks = NULL;
8820 if (!zero)
8822 if (framesize < 0)
8823 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
8824 else /* TMP2 is set to [private_data_ptr] above. */
8825 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(TMP2), STACK(-stacksize), SLJIT_IMM, 0));
8828 /* None of them matched. */
8829 set_jumps(emptymatch, LABEL());
8830 count_match(common);
8831 return cc + 1 + LINK_SIZE;
8834 static SLJIT_INLINE pcre_uchar *get_iterator_parameters(compiler_common *common, pcre_uchar *cc, pcre_uchar *opcode, pcre_uchar *type, sljit_u32 *max, sljit_u32 *exact, pcre_uchar **end)
8836 int class_len;
8838 *opcode = *cc;
8839 *exact = 0;
8841 if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
8843 cc++;
8844 *type = OP_CHAR;
8846 else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
8848 cc++;
8849 *type = OP_CHARI;
8850 *opcode -= OP_STARI - OP_STAR;
8852 else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
8854 cc++;
8855 *type = OP_NOT;
8856 *opcode -= OP_NOTSTAR - OP_STAR;
8858 else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
8860 cc++;
8861 *type = OP_NOTI;
8862 *opcode -= OP_NOTSTARI - OP_STAR;
8864 else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
8866 cc++;
8867 *opcode -= OP_TYPESTAR - OP_STAR;
8868 *type = OP_END;
8870 else
8872 SLJIT_ASSERT(*opcode == OP_CLASS || *opcode == OP_NCLASS || *opcode == OP_XCLASS);
8873 *type = *opcode;
8874 cc++;
8875 class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(pcre_uchar))) : GET(cc, 0);
8876 *opcode = cc[class_len - 1];
8878 if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
8880 *opcode -= OP_CRSTAR - OP_STAR;
8881 *end = cc + class_len;
8883 if (*opcode == OP_PLUS || *opcode == OP_MINPLUS)
8885 *exact = 1;
8886 *opcode -= OP_PLUS - OP_STAR;
8889 else if (*opcode >= OP_CRPOSSTAR && *opcode <= OP_CRPOSQUERY)
8891 *opcode -= OP_CRPOSSTAR - OP_POSSTAR;
8892 *end = cc + class_len;
8894 if (*opcode == OP_POSPLUS)
8896 *exact = 1;
8897 *opcode = OP_POSSTAR;
8900 else
8902 SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE || *opcode == OP_CRPOSRANGE);
8903 *max = GET2(cc, (class_len + IMM2_SIZE));
8904 *exact = GET2(cc, class_len);
8906 if (*max == 0)
8908 if (*opcode == OP_CRPOSRANGE)
8909 *opcode = OP_POSSTAR;
8910 else
8911 *opcode -= OP_CRRANGE - OP_STAR;
8913 else
8915 *max -= *exact;
8916 if (*max == 0)
8917 *opcode = OP_EXACT;
8918 else if (*max == 1)
8920 if (*opcode == OP_CRPOSRANGE)
8921 *opcode = OP_POSQUERY;
8922 else
8923 *opcode -= OP_CRRANGE - OP_QUERY;
8925 else
8927 if (*opcode == OP_CRPOSRANGE)
8928 *opcode = OP_POSUPTO;
8929 else
8930 *opcode -= OP_CRRANGE - OP_UPTO;
8933 *end = cc + class_len + 2 * IMM2_SIZE;
8935 return cc;
8938 switch(*opcode)
8940 case OP_EXACT:
8941 *exact = GET2(cc, 0);
8942 cc += IMM2_SIZE;
8943 break;
8945 case OP_PLUS:
8946 case OP_MINPLUS:
8947 *exact = 1;
8948 *opcode -= OP_PLUS - OP_STAR;
8949 break;
8951 case OP_POSPLUS:
8952 *exact = 1;
8953 *opcode = OP_POSSTAR;
8954 break;
8956 case OP_UPTO:
8957 case OP_MINUPTO:
8958 case OP_POSUPTO:
8959 *max = GET2(cc, 0);
8960 cc += IMM2_SIZE;
8961 break;
8964 if (*type == OP_END)
8966 *type = *cc;
8967 *end = next_opcode(common, cc);
8968 cc++;
8969 return cc;
8972 *end = cc + 1;
8973 #ifdef SUPPORT_UTF
8974 if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
8975 #endif
8976 return cc;
8979 static pcre_uchar *compile_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
8981 DEFINE_COMPILER;
8982 backtrack_common *backtrack;
8983 pcre_uchar opcode;
8984 pcre_uchar type;
8985 sljit_u32 max = 0, exact;
8986 BOOL fast_fail;
8987 sljit_s32 fast_str_ptr;
8988 BOOL charpos_enabled;
8989 pcre_uchar charpos_char;
8990 unsigned int charpos_othercasebit;
8991 pcre_uchar *end;
8992 jump_list *no_match = NULL;
8993 jump_list *no_char1_match = NULL;
8994 struct sljit_jump *jump = NULL;
8995 struct sljit_label *label;
8996 int private_data_ptr = PRIVATE_DATA(cc);
8997 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
8998 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
8999 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
9000 int tmp_base, tmp_offset;
9002 PUSH_BACKTRACK(sizeof(char_iterator_backtrack), cc, NULL);
9004 fast_str_ptr = PRIVATE_DATA(cc + 1);
9005 fast_fail = TRUE;
9007 SLJIT_ASSERT(common->fast_forward_bc_ptr == NULL || fast_str_ptr == 0 || cc == common->fast_forward_bc_ptr);
9009 if (cc == common->fast_forward_bc_ptr)
9010 fast_fail = FALSE;
9011 else if (common->fast_fail_start_ptr == 0)
9012 fast_str_ptr = 0;
9014 SLJIT_ASSERT(common->fast_forward_bc_ptr != NULL || fast_str_ptr == 0
9015 || (fast_str_ptr >= common->fast_fail_start_ptr && fast_str_ptr <= common->fast_fail_end_ptr));
9017 cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
9019 if (type != OP_EXTUNI)
9021 tmp_base = TMP3;
9022 tmp_offset = 0;
9024 else
9026 tmp_base = SLJIT_MEM1(SLJIT_SP);
9027 tmp_offset = POSSESSIVE0;
9030 if (fast_fail && fast_str_ptr != 0)
9031 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), fast_str_ptr));
9033 /* Handle fixed part first. */
9034 if (exact > 1)
9036 SLJIT_ASSERT(fast_str_ptr == 0);
9037 if (common->mode == JIT_COMPILE
9038 #ifdef SUPPORT_UTF
9039 && !common->utf
9040 #endif
9041 && type != OP_ANYNL && type != OP_EXTUNI)
9043 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(exact));
9044 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER, TMP1, 0, STR_END, 0));
9045 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
9046 label = LABEL();
9047 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE);
9048 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
9049 JUMPTO(SLJIT_NOT_ZERO, label);
9051 else
9053 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
9054 label = LABEL();
9055 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE);
9056 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
9057 JUMPTO(SLJIT_NOT_ZERO, label);
9060 else if (exact == 1)
9061 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE);
9063 switch(opcode)
9065 case OP_STAR:
9066 case OP_UPTO:
9067 SLJIT_ASSERT(fast_str_ptr == 0 || opcode == OP_STAR);
9069 if (type == OP_ANYNL || type == OP_EXTUNI)
9071 SLJIT_ASSERT(private_data_ptr == 0);
9072 SLJIT_ASSERT(fast_str_ptr == 0);
9074 allocate_stack(common, 2);
9075 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9076 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
9078 if (opcode == OP_UPTO)
9079 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, max);
9081 label = LABEL();
9082 compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE);
9083 if (opcode == OP_UPTO)
9085 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
9086 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
9087 jump = JUMP(SLJIT_ZERO);
9088 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
9091 /* We cannot use TMP3 because of this allocate_stack. */
9092 allocate_stack(common, 1);
9093 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9094 JUMPTO(SLJIT_JUMP, label);
9095 if (jump != NULL)
9096 JUMPHERE(jump);
9098 else
9100 charpos_enabled = FALSE;
9101 charpos_char = 0;
9102 charpos_othercasebit = 0;
9104 if ((type != OP_CHAR && type != OP_CHARI) && (*end == OP_CHAR || *end == OP_CHARI))
9106 charpos_enabled = TRUE;
9107 #ifdef SUPPORT_UTF
9108 charpos_enabled = !common->utf || !HAS_EXTRALEN(end[1]);
9109 #endif
9110 if (charpos_enabled && *end == OP_CHARI && char_has_othercase(common, end + 1))
9112 charpos_othercasebit = char_get_othercase_bit(common, end + 1);
9113 if (charpos_othercasebit == 0)
9114 charpos_enabled = FALSE;
9117 if (charpos_enabled)
9119 charpos_char = end[1];
9120 /* Consumpe the OP_CHAR opcode. */
9121 end += 2;
9122 #if defined COMPILE_PCRE8
9123 SLJIT_ASSERT((charpos_othercasebit >> 8) == 0);
9124 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
9125 SLJIT_ASSERT((charpos_othercasebit >> 9) == 0);
9126 if ((charpos_othercasebit & 0x100) != 0)
9127 charpos_othercasebit = (charpos_othercasebit & 0xff) << 8;
9128 #endif
9129 if (charpos_othercasebit != 0)
9130 charpos_char |= charpos_othercasebit;
9132 BACKTRACK_AS(char_iterator_backtrack)->u.charpos.enabled = TRUE;
9133 BACKTRACK_AS(char_iterator_backtrack)->u.charpos.chr = charpos_char;
9134 BACKTRACK_AS(char_iterator_backtrack)->u.charpos.othercasebit = charpos_othercasebit;
9138 if (charpos_enabled)
9140 if (opcode == OP_UPTO)
9141 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max + 1);
9143 /* Search the first instance of charpos_char. */
9144 jump = JUMP(SLJIT_JUMP);
9145 label = LABEL();
9146 if (opcode == OP_UPTO)
9148 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
9149 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_ZERO));
9151 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE);
9152 if (fast_str_ptr != 0)
9153 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
9154 JUMPHERE(jump);
9156 detect_partial_match(common, &backtrack->topbacktracks);
9157 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
9158 if (charpos_othercasebit != 0)
9159 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);
9160 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label);
9162 if (private_data_ptr == 0)
9163 allocate_stack(common, 2);
9164 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9165 OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
9166 if (opcode == OP_UPTO)
9168 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
9169 add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
9172 /* Search the last instance of charpos_char. */
9173 label = LABEL();
9174 compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
9175 if (fast_str_ptr != 0)
9176 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
9177 detect_partial_match(common, &no_match);
9178 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
9179 if (charpos_othercasebit != 0)
9180 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);
9181 if (opcode == OP_STAR)
9183 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label);
9184 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9186 else
9188 jump = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char);
9189 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9190 JUMPHERE(jump);
9193 if (opcode == OP_UPTO)
9195 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
9196 JUMPTO(SLJIT_NOT_ZERO, label);
9198 else
9199 JUMPTO(SLJIT_JUMP, label);
9201 set_jumps(no_match, LABEL());
9202 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
9203 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9204 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9206 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
9207 else if (common->utf)
9209 if (private_data_ptr == 0)
9210 allocate_stack(common, 2);
9212 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9213 OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
9215 if (opcode == OP_UPTO)
9216 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
9218 label = LABEL();
9219 compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
9220 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9222 if (opcode == OP_UPTO)
9224 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
9225 JUMPTO(SLJIT_NOT_ZERO, label);
9227 else
9228 JUMPTO(SLJIT_JUMP, label);
9230 set_jumps(no_match, LABEL());
9231 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
9232 if (fast_str_ptr != 0)
9233 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
9235 #endif
9236 else
9238 if (private_data_ptr == 0)
9239 allocate_stack(common, 2);
9241 OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
9242 if (opcode == OP_UPTO)
9243 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
9245 label = LABEL();
9246 detect_partial_match(common, &no_match);
9247 compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
9248 if (opcode == OP_UPTO)
9250 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
9251 JUMPTO(SLJIT_NOT_ZERO, label);
9252 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9254 else
9255 JUMPTO(SLJIT_JUMP, label);
9257 set_jumps(no_char1_match, LABEL());
9258 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9259 set_jumps(no_match, LABEL());
9260 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9261 if (fast_str_ptr != 0)
9262 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
9265 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
9266 break;
9268 case OP_MINSTAR:
9269 if (private_data_ptr == 0)
9270 allocate_stack(common, 1);
9271 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9272 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
9273 if (fast_str_ptr != 0)
9274 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
9275 break;
9277 case OP_MINUPTO:
9278 SLJIT_ASSERT(fast_str_ptr == 0);
9279 if (private_data_ptr == 0)
9280 allocate_stack(common, 2);
9281 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9282 OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, max + 1);
9283 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
9284 break;
9286 case OP_QUERY:
9287 case OP_MINQUERY:
9288 SLJIT_ASSERT(fast_str_ptr == 0);
9289 if (private_data_ptr == 0)
9290 allocate_stack(common, 1);
9291 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9292 if (opcode == OP_QUERY)
9293 compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE);
9294 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
9295 break;
9297 case OP_EXACT:
9298 break;
9300 case OP_POSSTAR:
9301 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
9302 if (common->utf)
9304 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
9305 label = LABEL();
9306 compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
9307 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
9308 JUMPTO(SLJIT_JUMP, label);
9309 set_jumps(no_match, LABEL());
9310 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
9311 if (fast_str_ptr != 0)
9312 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
9313 break;
9315 #endif
9316 label = LABEL();
9317 detect_partial_match(common, &no_match);
9318 compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
9319 JUMPTO(SLJIT_JUMP, label);
9320 set_jumps(no_char1_match, LABEL());
9321 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9322 set_jumps(no_match, LABEL());
9323 if (fast_str_ptr != 0)
9324 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
9325 break;
9327 case OP_POSUPTO:
9328 SLJIT_ASSERT(fast_str_ptr == 0);
9329 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
9330 if (common->utf)
9332 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0);
9333 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
9334 label = LABEL();
9335 compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
9336 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0);
9337 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
9338 JUMPTO(SLJIT_NOT_ZERO, label);
9339 set_jumps(no_match, LABEL());
9340 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
9341 break;
9343 #endif
9344 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
9345 label = LABEL();
9346 detect_partial_match(common, &no_match);
9347 compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
9348 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
9349 JUMPTO(SLJIT_NOT_ZERO, label);
9350 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9351 set_jumps(no_char1_match, LABEL());
9352 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9353 set_jumps(no_match, LABEL());
9354 break;
9356 case OP_POSQUERY:
9357 SLJIT_ASSERT(fast_str_ptr == 0);
9358 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
9359 compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
9360 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
9361 set_jumps(no_match, LABEL());
9362 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
9363 break;
9365 default:
9366 SLJIT_UNREACHABLE();
9367 break;
9370 count_match(common);
9371 return end;
9374 static SLJIT_INLINE pcre_uchar *compile_fail_accept_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
9376 DEFINE_COMPILER;
9377 backtrack_common *backtrack;
9379 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
9381 if (*cc == OP_FAIL)
9383 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
9384 return cc + 1;
9387 if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL || !common->might_be_empty)
9389 /* No need to check notempty conditions. */
9390 if (common->accept_label == NULL)
9391 add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
9392 else
9393 JUMPTO(SLJIT_JUMP, common->accept_label);
9394 return cc + 1;
9397 if (common->accept_label == NULL)
9398 add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)));
9399 else
9400 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), common->accept_label);
9401 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
9402 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
9403 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
9404 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));
9405 if (common->accept_label == NULL)
9406 add_jump(compiler, &common->accept, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
9407 else
9408 CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0, common->accept_label);
9409 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
9410 if (common->accept_label == NULL)
9411 add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0));
9412 else
9413 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->accept_label);
9414 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
9415 return cc + 1;
9418 static SLJIT_INLINE pcre_uchar *compile_close_matchingpath(compiler_common *common, pcre_uchar *cc)
9420 DEFINE_COMPILER;
9421 int offset = GET2(cc, 1);
9422 BOOL optimized_cbracket = common->optimized_cbracket[offset] != 0;
9424 /* Data will be discarded anyway... */
9425 if (common->currententry != NULL)
9426 return cc + 1 + IMM2_SIZE;
9428 if (!optimized_cbracket)
9429 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR_PRIV(offset));
9430 offset <<= 1;
9431 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
9432 if (!optimized_cbracket)
9433 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
9434 return cc + 1 + IMM2_SIZE;
9437 static SLJIT_INLINE pcre_uchar *compile_control_verb_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
9439 DEFINE_COMPILER;
9440 backtrack_common *backtrack;
9441 pcre_uchar opcode = *cc;
9442 pcre_uchar *ccend = cc + 1;
9444 if (opcode == OP_PRUNE_ARG || opcode == OP_SKIP_ARG || opcode == OP_THEN_ARG)
9445 ccend += 2 + cc[1];
9447 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
9449 if (opcode == OP_SKIP)
9451 allocate_stack(common, 1);
9452 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9453 return ccend;
9456 if (opcode == OP_PRUNE_ARG || opcode == OP_THEN_ARG)
9458 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
9459 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
9460 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
9461 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
9464 return ccend;
9467 static pcre_uchar then_trap_opcode[1] = { OP_THEN_TRAP };
9469 static SLJIT_INLINE void compile_then_trap_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, backtrack_common *parent)
9471 DEFINE_COMPILER;
9472 backtrack_common *backtrack;
9473 BOOL needs_control_head;
9474 int size;
9476 PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
9477 common->then_trap = BACKTRACK_AS(then_trap_backtrack);
9478 BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
9479 BACKTRACK_AS(then_trap_backtrack)->start = (sljit_sw)(cc - common->start);
9480 BACKTRACK_AS(then_trap_backtrack)->framesize = get_framesize(common, cc, ccend, FALSE, &needs_control_head);
9482 size = BACKTRACK_AS(then_trap_backtrack)->framesize;
9483 size = 3 + (size < 0 ? 0 : size);
9485 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
9486 allocate_stack(common, size);
9487 if (size > 3)
9488 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0, SLJIT_IMM, (size - 3) * sizeof(sljit_sw));
9489 else
9490 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
9491 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 1), SLJIT_IMM, BACKTRACK_AS(then_trap_backtrack)->start);
9492 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 2), SLJIT_IMM, type_then_trap);
9493 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 3), TMP2, 0);
9495 size = BACKTRACK_AS(then_trap_backtrack)->framesize;
9496 if (size >= 0)
9497 init_frame(common, cc, ccend, size - 1, 0, FALSE);
9500 static void compile_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, backtrack_common *parent)
9502 DEFINE_COMPILER;
9503 backtrack_common *backtrack;
9504 BOOL has_then_trap = FALSE;
9505 then_trap_backtrack *save_then_trap = NULL;
9507 SLJIT_ASSERT(*ccend == OP_END || (*ccend >= OP_ALT && *ccend <= OP_KETRPOS));
9509 if (common->has_then && common->then_offsets[cc - common->start] != 0)
9511 SLJIT_ASSERT(*ccend != OP_END && common->control_head_ptr != 0);
9512 has_then_trap = TRUE;
9513 save_then_trap = common->then_trap;
9514 /* Tail item on backtrack. */
9515 compile_then_trap_matchingpath(common, cc, ccend, parent);
9518 while (cc < ccend)
9520 switch(*cc)
9522 case OP_SOD:
9523 case OP_SOM:
9524 case OP_NOT_WORD_BOUNDARY:
9525 case OP_WORD_BOUNDARY:
9526 case OP_EODN:
9527 case OP_EOD:
9528 case OP_DOLL:
9529 case OP_DOLLM:
9530 case OP_CIRC:
9531 case OP_CIRCM:
9532 case OP_REVERSE:
9533 cc = compile_simple_assertion_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
9534 break;
9536 case OP_NOT_DIGIT:
9537 case OP_DIGIT:
9538 case OP_NOT_WHITESPACE:
9539 case OP_WHITESPACE:
9540 case OP_NOT_WORDCHAR:
9541 case OP_WORDCHAR:
9542 case OP_ANY:
9543 case OP_ALLANY:
9544 case OP_ANYBYTE:
9545 case OP_NOTPROP:
9546 case OP_PROP:
9547 case OP_ANYNL:
9548 case OP_NOT_HSPACE:
9549 case OP_HSPACE:
9550 case OP_NOT_VSPACE:
9551 case OP_VSPACE:
9552 case OP_EXTUNI:
9553 case OP_NOT:
9554 case OP_NOTI:
9555 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
9556 break;
9558 case OP_SET_SOM:
9559 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
9560 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
9561 allocate_stack(common, 1);
9562 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
9563 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
9564 cc++;
9565 break;
9567 case OP_CHAR:
9568 case OP_CHARI:
9569 if (common->mode == JIT_COMPILE)
9570 cc = compile_charn_matchingpath(common, cc, ccend, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
9571 else
9572 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
9573 break;
9575 case OP_STAR:
9576 case OP_MINSTAR:
9577 case OP_PLUS:
9578 case OP_MINPLUS:
9579 case OP_QUERY:
9580 case OP_MINQUERY:
9581 case OP_UPTO:
9582 case OP_MINUPTO:
9583 case OP_EXACT:
9584 case OP_POSSTAR:
9585 case OP_POSPLUS:
9586 case OP_POSQUERY:
9587 case OP_POSUPTO:
9588 case OP_STARI:
9589 case OP_MINSTARI:
9590 case OP_PLUSI:
9591 case OP_MINPLUSI:
9592 case OP_QUERYI:
9593 case OP_MINQUERYI:
9594 case OP_UPTOI:
9595 case OP_MINUPTOI:
9596 case OP_EXACTI:
9597 case OP_POSSTARI:
9598 case OP_POSPLUSI:
9599 case OP_POSQUERYI:
9600 case OP_POSUPTOI:
9601 case OP_NOTSTAR:
9602 case OP_NOTMINSTAR:
9603 case OP_NOTPLUS:
9604 case OP_NOTMINPLUS:
9605 case OP_NOTQUERY:
9606 case OP_NOTMINQUERY:
9607 case OP_NOTUPTO:
9608 case OP_NOTMINUPTO:
9609 case OP_NOTEXACT:
9610 case OP_NOTPOSSTAR:
9611 case OP_NOTPOSPLUS:
9612 case OP_NOTPOSQUERY:
9613 case OP_NOTPOSUPTO:
9614 case OP_NOTSTARI:
9615 case OP_NOTMINSTARI:
9616 case OP_NOTPLUSI:
9617 case OP_NOTMINPLUSI:
9618 case OP_NOTQUERYI:
9619 case OP_NOTMINQUERYI:
9620 case OP_NOTUPTOI:
9621 case OP_NOTMINUPTOI:
9622 case OP_NOTEXACTI:
9623 case OP_NOTPOSSTARI:
9624 case OP_NOTPOSPLUSI:
9625 case OP_NOTPOSQUERYI:
9626 case OP_NOTPOSUPTOI:
9627 case OP_TYPESTAR:
9628 case OP_TYPEMINSTAR:
9629 case OP_TYPEPLUS:
9630 case OP_TYPEMINPLUS:
9631 case OP_TYPEQUERY:
9632 case OP_TYPEMINQUERY:
9633 case OP_TYPEUPTO:
9634 case OP_TYPEMINUPTO:
9635 case OP_TYPEEXACT:
9636 case OP_TYPEPOSSTAR:
9637 case OP_TYPEPOSPLUS:
9638 case OP_TYPEPOSQUERY:
9639 case OP_TYPEPOSUPTO:
9640 cc = compile_iterator_matchingpath(common, cc, parent);
9641 break;
9643 case OP_CLASS:
9644 case OP_NCLASS:
9645 if (cc[1 + (32 / sizeof(pcre_uchar))] >= OP_CRSTAR && cc[1 + (32 / sizeof(pcre_uchar))] <= OP_CRPOSRANGE)
9646 cc = compile_iterator_matchingpath(common, cc, parent);
9647 else
9648 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
9649 break;
9651 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
9652 case OP_XCLASS:
9653 if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRPOSRANGE)
9654 cc = compile_iterator_matchingpath(common, cc, parent);
9655 else
9656 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
9657 break;
9658 #endif
9660 case OP_REF:
9661 case OP_REFI:
9662 if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRPOSRANGE)
9663 cc = compile_ref_iterator_matchingpath(common, cc, parent);
9664 else
9666 compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
9667 cc += 1 + IMM2_SIZE;
9669 break;
9671 case OP_DNREF:
9672 case OP_DNREFI:
9673 if (cc[1 + 2 * IMM2_SIZE] >= OP_CRSTAR && cc[1 + 2 * IMM2_SIZE] <= OP_CRPOSRANGE)
9674 cc = compile_ref_iterator_matchingpath(common, cc, parent);
9675 else
9677 compile_dnref_search(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
9678 compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
9679 cc += 1 + 2 * IMM2_SIZE;
9681 break;
9683 case OP_RECURSE:
9684 cc = compile_recurse_matchingpath(common, cc, parent);
9685 break;
9687 case OP_CALLOUT:
9688 cc = compile_callout_matchingpath(common, cc, parent);
9689 break;
9691 case OP_ASSERT:
9692 case OP_ASSERT_NOT:
9693 case OP_ASSERTBACK:
9694 case OP_ASSERTBACK_NOT:
9695 PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
9696 cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
9697 break;
9699 case OP_BRAMINZERO:
9700 PUSH_BACKTRACK_NOVALUE(sizeof(braminzero_backtrack), cc);
9701 cc = bracketend(cc + 1);
9702 if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN)
9704 allocate_stack(common, 1);
9705 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9707 else
9709 allocate_stack(common, 2);
9710 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9711 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0);
9713 BACKTRACK_AS(braminzero_backtrack)->matchingpath = LABEL();
9714 count_match(common);
9715 break;
9717 case OP_ONCE:
9718 case OP_ONCE_NC:
9719 case OP_BRA:
9720 case OP_CBRA:
9721 case OP_COND:
9722 case OP_SBRA:
9723 case OP_SCBRA:
9724 case OP_SCOND:
9725 cc = compile_bracket_matchingpath(common, cc, parent);
9726 break;
9728 case OP_BRAZERO:
9729 if (cc[1] > OP_ASSERTBACK_NOT)
9730 cc = compile_bracket_matchingpath(common, cc, parent);
9731 else
9733 PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
9734 cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
9736 break;
9738 case OP_BRAPOS:
9739 case OP_CBRAPOS:
9740 case OP_SBRAPOS:
9741 case OP_SCBRAPOS:
9742 case OP_BRAPOSZERO:
9743 cc = compile_bracketpos_matchingpath(common, cc, parent);
9744 break;
9746 case OP_MARK:
9747 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
9748 SLJIT_ASSERT(common->mark_ptr != 0);
9749 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
9750 allocate_stack(common, common->has_skip_arg ? 5 : 1);
9751 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
9752 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0), TMP2, 0);
9753 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
9754 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
9755 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
9756 if (common->has_skip_arg)
9758 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
9759 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
9760 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, type_mark);
9761 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), SLJIT_IMM, (sljit_sw)(cc + 2));
9762 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);
9763 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
9765 cc += 1 + 2 + cc[1];
9766 break;
9768 case OP_PRUNE:
9769 case OP_PRUNE_ARG:
9770 case OP_SKIP:
9771 case OP_SKIP_ARG:
9772 case OP_THEN:
9773 case OP_THEN_ARG:
9774 case OP_COMMIT:
9775 cc = compile_control_verb_matchingpath(common, cc, parent);
9776 break;
9778 case OP_FAIL:
9779 case OP_ACCEPT:
9780 case OP_ASSERT_ACCEPT:
9781 cc = compile_fail_accept_matchingpath(common, cc, parent);
9782 break;
9784 case OP_CLOSE:
9785 cc = compile_close_matchingpath(common, cc);
9786 break;
9788 case OP_SKIPZERO:
9789 cc = bracketend(cc + 1);
9790 break;
9792 default:
9793 SLJIT_UNREACHABLE();
9794 return;
9796 if (cc == NULL)
9797 return;
9800 if (has_then_trap)
9802 /* Head item on backtrack. */
9803 PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
9804 BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
9805 BACKTRACK_AS(then_trap_backtrack)->then_trap = common->then_trap;
9806 common->then_trap = save_then_trap;
9808 SLJIT_ASSERT(cc == ccend);
9811 #undef PUSH_BACKTRACK
9812 #undef PUSH_BACKTRACK_NOVALUE
9813 #undef BACKTRACK_AS
9815 #define COMPILE_BACKTRACKINGPATH(current) \
9816 do \
9818 compile_backtrackingpath(common, (current)); \
9819 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
9820 return; \
9822 while (0)
9824 #define CURRENT_AS(type) ((type *)current)
9826 static void compile_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
9828 DEFINE_COMPILER;
9829 pcre_uchar *cc = current->cc;
9830 pcre_uchar opcode;
9831 pcre_uchar type;
9832 sljit_u32 max = 0, exact;
9833 struct sljit_label *label = NULL;
9834 struct sljit_jump *jump = NULL;
9835 jump_list *jumplist = NULL;
9836 pcre_uchar *end;
9837 int private_data_ptr = PRIVATE_DATA(cc);
9838 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
9839 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
9840 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
9842 cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
9844 switch(opcode)
9846 case OP_STAR:
9847 case OP_UPTO:
9848 if (type == OP_ANYNL || type == OP_EXTUNI)
9850 SLJIT_ASSERT(private_data_ptr == 0);
9851 set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL());
9852 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9853 free_stack(common, 1);
9854 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
9856 else
9858 if (CURRENT_AS(char_iterator_backtrack)->u.charpos.enabled)
9860 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
9861 OP1(SLJIT_MOV, TMP2, 0, base, offset1);
9862 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9864 jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
9865 label = LABEL();
9866 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
9867 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9868 if (CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit != 0)
9869 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit);
9870 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.chr, CURRENT_AS(char_iterator_backtrack)->matchingpath);
9871 skip_char_back(common);
9872 CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP2, 0, label);
9874 else
9876 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
9877 jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, base, offset1);
9878 skip_char_back(common);
9879 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9880 JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
9882 JUMPHERE(jump);
9883 if (private_data_ptr == 0)
9884 free_stack(common, 2);
9886 break;
9888 case OP_MINSTAR:
9889 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
9890 compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
9891 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9892 JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
9893 set_jumps(jumplist, LABEL());
9894 if (private_data_ptr == 0)
9895 free_stack(common, 1);
9896 break;
9898 case OP_MINUPTO:
9899 OP1(SLJIT_MOV, TMP1, 0, base, offset1);
9900 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
9901 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
9902 add_jump(compiler, &jumplist, JUMP(SLJIT_ZERO));
9904 OP1(SLJIT_MOV, base, offset1, TMP1, 0);
9905 compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
9906 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
9907 JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
9909 set_jumps(jumplist, LABEL());
9910 if (private_data_ptr == 0)
9911 free_stack(common, 2);
9912 break;
9914 case OP_QUERY:
9915 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
9916 OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
9917 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
9918 jump = JUMP(SLJIT_JUMP);
9919 set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL());
9920 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
9921 OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
9922 JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
9923 JUMPHERE(jump);
9924 if (private_data_ptr == 0)
9925 free_stack(common, 1);
9926 break;
9928 case OP_MINQUERY:
9929 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
9930 OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
9931 jump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
9932 compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
9933 JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
9934 set_jumps(jumplist, LABEL());
9935 JUMPHERE(jump);
9936 if (private_data_ptr == 0)
9937 free_stack(common, 1);
9938 break;
9940 case OP_EXACT:
9941 case OP_POSSTAR:
9942 case OP_POSQUERY:
9943 case OP_POSUPTO:
9944 break;
9946 default:
9947 SLJIT_UNREACHABLE();
9948 break;
9951 set_jumps(current->topbacktracks, LABEL());
9954 static SLJIT_INLINE void compile_ref_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
9956 DEFINE_COMPILER;
9957 pcre_uchar *cc = current->cc;
9958 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
9959 pcre_uchar type;
9961 type = cc[ref ? 1 + IMM2_SIZE : 1 + 2 * IMM2_SIZE];
9963 if ((type & 0x1) == 0)
9965 /* Maximize case. */
9966 set_jumps(current->topbacktracks, LABEL());
9967 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9968 free_stack(common, 1);
9969 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
9970 return;
9973 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9974 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
9975 set_jumps(current->topbacktracks, LABEL());
9976 free_stack(common, ref ? 2 : 3);
9979 static SLJIT_INLINE void compile_recurse_backtrackingpath(compiler_common *common, struct backtrack_common *current)
9981 DEFINE_COMPILER;
9983 if (CURRENT_AS(recurse_backtrack)->inlined_pattern)
9984 compile_backtrackingpath(common, current->top);
9985 set_jumps(current->topbacktracks, LABEL());
9986 if (CURRENT_AS(recurse_backtrack)->inlined_pattern)
9987 return;
9989 if (common->has_set_som && common->mark_ptr != 0)
9991 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9992 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
9993 free_stack(common, 2);
9994 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), TMP2, 0);
9995 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP1, 0);
9997 else if (common->has_set_som || common->mark_ptr != 0)
9999 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10000 free_stack(common, 1);
10001 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr, TMP2, 0);
10005 static void compile_assert_backtrackingpath(compiler_common *common, struct backtrack_common *current)
10007 DEFINE_COMPILER;
10008 pcre_uchar *cc = current->cc;
10009 pcre_uchar bra = OP_BRA;
10010 struct sljit_jump *brajump = NULL;
10012 SLJIT_ASSERT(*cc != OP_BRAMINZERO);
10013 if (*cc == OP_BRAZERO)
10015 bra = *cc;
10016 cc++;
10019 if (bra == OP_BRAZERO)
10021 SLJIT_ASSERT(current->topbacktracks == NULL);
10022 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10025 if (CURRENT_AS(assert_backtrack)->framesize < 0)
10027 set_jumps(current->topbacktracks, LABEL());
10029 if (bra == OP_BRAZERO)
10031 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10032 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
10033 free_stack(common, 1);
10035 return;
10038 if (bra == OP_BRAZERO)
10040 if (*cc == OP_ASSERT_NOT || *cc == OP_ASSERTBACK_NOT)
10042 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10043 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
10044 free_stack(common, 1);
10045 return;
10047 free_stack(common, 1);
10048 brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10051 if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK)
10053 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr);
10054 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10055 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(assert_backtrack)->framesize - 1));
10057 set_jumps(current->topbacktracks, LABEL());
10059 else
10060 set_jumps(current->topbacktracks, LABEL());
10062 if (bra == OP_BRAZERO)
10064 /* We know there is enough place on the stack. */
10065 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
10066 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10067 JUMPTO(SLJIT_JUMP, CURRENT_AS(assert_backtrack)->matchingpath);
10068 JUMPHERE(brajump);
10072 static void compile_bracket_backtrackingpath(compiler_common *common, struct backtrack_common *current)
10074 DEFINE_COMPILER;
10075 int opcode, stacksize, alt_count, alt_max;
10076 int offset = 0;
10077 int private_data_ptr = CURRENT_AS(bracket_backtrack)->private_data_ptr;
10078 int repeat_ptr = 0, repeat_type = 0, repeat_count = 0;
10079 pcre_uchar *cc = current->cc;
10080 pcre_uchar *ccbegin;
10081 pcre_uchar *ccprev;
10082 pcre_uchar bra = OP_BRA;
10083 pcre_uchar ket;
10084 assert_backtrack *assert;
10085 sljit_uw *next_update_addr = NULL;
10086 BOOL has_alternatives;
10087 BOOL needs_control_head = FALSE;
10088 struct sljit_jump *brazero = NULL;
10089 struct sljit_jump *alt1 = NULL;
10090 struct sljit_jump *alt2 = NULL;
10091 struct sljit_jump *once = NULL;
10092 struct sljit_jump *cond = NULL;
10093 struct sljit_label *rmin_label = NULL;
10094 struct sljit_label *exact_label = NULL;
10096 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
10098 bra = *cc;
10099 cc++;
10102 opcode = *cc;
10103 ccbegin = bracketend(cc) - 1 - LINK_SIZE;
10104 ket = *ccbegin;
10105 if (ket == OP_KET && PRIVATE_DATA(ccbegin) != 0)
10107 repeat_ptr = PRIVATE_DATA(ccbegin);
10108 repeat_type = PRIVATE_DATA(ccbegin + 2);
10109 repeat_count = PRIVATE_DATA(ccbegin + 3);
10110 SLJIT_ASSERT(repeat_type != 0 && repeat_count != 0);
10111 if (repeat_type == OP_UPTO)
10112 ket = OP_KETRMAX;
10113 if (repeat_type == OP_MINUPTO)
10114 ket = OP_KETRMIN;
10116 ccbegin = cc;
10117 cc += GET(cc, 1);
10118 has_alternatives = *cc == OP_ALT;
10119 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
10120 has_alternatives = (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) || CURRENT_AS(bracket_backtrack)->u.condfailed != NULL;
10121 if (opcode == OP_CBRA || opcode == OP_SCBRA)
10122 offset = (GET2(ccbegin, 1 + LINK_SIZE)) << 1;
10123 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
10124 opcode = OP_SCOND;
10125 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
10126 opcode = OP_ONCE;
10128 alt_max = has_alternatives ? no_alternatives(ccbegin) : 0;
10130 /* Decoding the needs_control_head in framesize. */
10131 if (opcode == OP_ONCE)
10133 needs_control_head = (CURRENT_AS(bracket_backtrack)->u.framesize & 0x1) != 0;
10134 CURRENT_AS(bracket_backtrack)->u.framesize >>= 1;
10137 if (ket != OP_KET && repeat_type != 0)
10139 /* TMP1 is used in OP_KETRMIN below. */
10140 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10141 free_stack(common, 1);
10142 if (repeat_type == OP_UPTO)
10143 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0, SLJIT_IMM, 1);
10144 else
10145 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
10148 if (ket == OP_KETRMAX)
10150 if (bra == OP_BRAZERO)
10152 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10153 free_stack(common, 1);
10154 brazero = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
10157 else if (ket == OP_KETRMIN)
10159 if (bra != OP_BRAMINZERO)
10161 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10162 if (repeat_type != 0)
10164 /* TMP1 was set a few lines above. */
10165 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
10166 /* Drop STR_PTR for non-greedy plus quantifier. */
10167 if (opcode != OP_ONCE)
10168 free_stack(common, 1);
10170 else if (opcode >= OP_SBRA || opcode == OP_ONCE)
10172 /* Checking zero-length iteration. */
10173 if (opcode != OP_ONCE || CURRENT_AS(bracket_backtrack)->u.framesize < 0)
10174 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
10175 else
10177 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10178 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 2), CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
10180 /* Drop STR_PTR for non-greedy plus quantifier. */
10181 if (opcode != OP_ONCE)
10182 free_stack(common, 1);
10184 else
10185 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
10187 rmin_label = LABEL();
10188 if (repeat_type != 0)
10189 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
10191 else if (bra == OP_BRAZERO)
10193 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10194 free_stack(common, 1);
10195 brazero = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
10197 else if (repeat_type == OP_EXACT)
10199 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
10200 exact_label = LABEL();
10203 if (offset != 0)
10205 if (common->capture_last_ptr != 0)
10207 SLJIT_ASSERT(common->optimized_cbracket[offset >> 1] == 0);
10208 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10209 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10210 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
10211 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
10212 free_stack(common, 3);
10213 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP2, 0);
10214 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
10216 else if (common->optimized_cbracket[offset >> 1] == 0)
10218 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10219 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10220 free_stack(common, 2);
10221 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
10222 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
10226 if (SLJIT_UNLIKELY(opcode == OP_ONCE))
10228 if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
10230 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10231 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10233 once = JUMP(SLJIT_JUMP);
10235 else if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
10237 if (has_alternatives)
10239 /* Always exactly one alternative. */
10240 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10241 free_stack(common, 1);
10243 alt_max = 2;
10244 alt1 = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, sizeof(sljit_uw));
10247 else if (has_alternatives)
10249 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10250 free_stack(common, 1);
10252 if (alt_max > 4)
10254 /* Table jump if alt_max is greater than 4. */
10255 next_update_addr = allocate_read_only_data(common, alt_max * sizeof(sljit_uw));
10256 if (SLJIT_UNLIKELY(next_update_addr == NULL))
10257 return;
10258 sljit_emit_ijump(compiler, SLJIT_JUMP, SLJIT_MEM1(TMP1), (sljit_sw)next_update_addr);
10259 add_label_addr(common, next_update_addr++);
10261 else
10263 if (alt_max == 4)
10264 alt2 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw));
10265 alt1 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, sizeof(sljit_uw));
10269 COMPILE_BACKTRACKINGPATH(current->top);
10270 if (current->topbacktracks)
10271 set_jumps(current->topbacktracks, LABEL());
10273 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
10275 /* Conditional block always has at most one alternative. */
10276 if (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT)
10278 SLJIT_ASSERT(has_alternatives);
10279 assert = CURRENT_AS(bracket_backtrack)->u.assert;
10280 if (assert->framesize >= 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK))
10282 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
10283 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10284 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-assert->framesize - 1));
10286 cond = JUMP(SLJIT_JUMP);
10287 set_jumps(CURRENT_AS(bracket_backtrack)->u.assert->condfailed, LABEL());
10289 else if (CURRENT_AS(bracket_backtrack)->u.condfailed != NULL)
10291 SLJIT_ASSERT(has_alternatives);
10292 cond = JUMP(SLJIT_JUMP);
10293 set_jumps(CURRENT_AS(bracket_backtrack)->u.condfailed, LABEL());
10295 else
10296 SLJIT_ASSERT(!has_alternatives);
10299 if (has_alternatives)
10301 alt_count = sizeof(sljit_uw);
10304 current->top = NULL;
10305 current->topbacktracks = NULL;
10306 current->nextbacktracks = NULL;
10307 /* Conditional blocks always have an additional alternative, even if it is empty. */
10308 if (*cc == OP_ALT)
10310 ccprev = cc + 1 + LINK_SIZE;
10311 cc += GET(cc, 1);
10312 if (opcode != OP_COND && opcode != OP_SCOND)
10314 if (opcode != OP_ONCE)
10316 if (private_data_ptr != 0)
10317 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10318 else
10319 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10321 else
10322 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(needs_control_head ? 1 : 0));
10324 compile_matchingpath(common, ccprev, cc, current);
10325 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10326 return;
10329 /* Instructions after the current alternative is successfully matched. */
10330 /* There is a similar code in compile_bracket_matchingpath. */
10331 if (opcode == OP_ONCE)
10332 match_once_common(common, ket, CURRENT_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
10334 stacksize = 0;
10335 if (repeat_type == OP_MINUPTO)
10337 /* We need to preserve the counter. TMP2 will be used below. */
10338 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
10339 stacksize++;
10341 if (ket != OP_KET || bra != OP_BRA)
10342 stacksize++;
10343 if (offset != 0)
10345 if (common->capture_last_ptr != 0)
10346 stacksize++;
10347 if (common->optimized_cbracket[offset >> 1] == 0)
10348 stacksize += 2;
10350 if (opcode != OP_ONCE)
10351 stacksize++;
10353 if (stacksize > 0)
10354 allocate_stack(common, stacksize);
10356 stacksize = 0;
10357 if (repeat_type == OP_MINUPTO)
10359 /* TMP2 was set above. */
10360 OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
10361 stacksize++;
10364 if (ket != OP_KET || bra != OP_BRA)
10366 if (ket != OP_KET)
10367 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10368 else
10369 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
10370 stacksize++;
10373 if (offset != 0)
10374 stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
10376 if (opcode != OP_ONCE)
10377 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, alt_count);
10379 if (offset != 0 && ket == OP_KETRMAX && common->optimized_cbracket[offset >> 1] != 0)
10381 /* If ket is not OP_KETRMAX, this code path is executed after the jump to alternative_matchingpath. */
10382 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
10383 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
10386 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->alternative_matchingpath);
10388 if (opcode != OP_ONCE)
10390 if (alt_max > 4)
10391 add_label_addr(common, next_update_addr++);
10392 else
10394 if (alt_count != 2 * sizeof(sljit_uw))
10396 JUMPHERE(alt1);
10397 if (alt_max == 3 && alt_count == sizeof(sljit_uw))
10398 alt2 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw));
10400 else
10402 JUMPHERE(alt2);
10403 if (alt_max == 4)
10404 alt1 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_uw));
10407 alt_count += sizeof(sljit_uw);
10410 COMPILE_BACKTRACKINGPATH(current->top);
10411 if (current->topbacktracks)
10412 set_jumps(current->topbacktracks, LABEL());
10413 SLJIT_ASSERT(!current->nextbacktracks);
10415 while (*cc == OP_ALT);
10417 if (cond != NULL)
10419 SLJIT_ASSERT(opcode == OP_COND || opcode == OP_SCOND);
10420 assert = CURRENT_AS(bracket_backtrack)->u.assert;
10421 if ((ccbegin[1 + LINK_SIZE] == OP_ASSERT_NOT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK_NOT) && assert->framesize >= 0)
10423 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
10424 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10425 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-assert->framesize - 1));
10427 JUMPHERE(cond);
10430 /* Free the STR_PTR. */
10431 if (private_data_ptr == 0)
10432 free_stack(common, 1);
10435 if (offset != 0)
10437 /* Using both tmp register is better for instruction scheduling. */
10438 if (common->optimized_cbracket[offset >> 1] != 0)
10440 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10441 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10442 free_stack(common, 2);
10443 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
10444 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
10446 else
10448 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10449 free_stack(common, 1);
10450 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
10453 else if (opcode == OP_SBRA || opcode == OP_SCOND)
10455 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(0));
10456 free_stack(common, 1);
10458 else if (opcode == OP_ONCE)
10460 cc = ccbegin + GET(ccbegin, 1);
10461 stacksize = needs_control_head ? 1 : 0;
10463 if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
10465 /* Reset head and drop saved frame. */
10466 stacksize += CURRENT_AS(bracket_backtrack)->u.framesize + ((ket != OP_KET || *cc == OP_ALT) ? 2 : 1);
10468 else if (ket == OP_KETRMAX || (*cc == OP_ALT && ket != OP_KETRMIN))
10470 /* The STR_PTR must be released. */
10471 stacksize++;
10474 if (stacksize > 0)
10475 free_stack(common, stacksize);
10477 JUMPHERE(once);
10478 /* Restore previous private_data_ptr */
10479 if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
10480 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 1));
10481 else if (ket == OP_KETRMIN)
10483 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10484 /* See the comment below. */
10485 free_stack(common, 2);
10486 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
10490 if (repeat_type == OP_EXACT)
10492 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
10493 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
10494 CMPTO(SLJIT_LESS_EQUAL, TMP1, 0, SLJIT_IMM, repeat_count, exact_label);
10496 else if (ket == OP_KETRMAX)
10498 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10499 if (bra != OP_BRAZERO)
10500 free_stack(common, 1);
10502 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
10503 if (bra == OP_BRAZERO)
10505 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10506 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
10507 JUMPHERE(brazero);
10508 free_stack(common, 1);
10511 else if (ket == OP_KETRMIN)
10513 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10515 /* OP_ONCE removes everything in case of a backtrack, so we don't
10516 need to explicitly release the STR_PTR. The extra release would
10517 affect badly the free_stack(2) above. */
10518 if (opcode != OP_ONCE)
10519 free_stack(common, 1);
10520 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, rmin_label);
10521 if (opcode == OP_ONCE)
10522 free_stack(common, bra == OP_BRAMINZERO ? 2 : 1);
10523 else if (bra == OP_BRAMINZERO)
10524 free_stack(common, 1);
10526 else if (bra == OP_BRAZERO)
10528 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10529 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
10530 JUMPHERE(brazero);
10534 static SLJIT_INLINE void compile_bracketpos_backtrackingpath(compiler_common *common, struct backtrack_common *current)
10536 DEFINE_COMPILER;
10537 int offset;
10538 struct sljit_jump *jump;
10540 if (CURRENT_AS(bracketpos_backtrack)->framesize < 0)
10542 if (*current->cc == OP_CBRAPOS || *current->cc == OP_SCBRAPOS)
10544 offset = (GET2(current->cc, 1 + LINK_SIZE)) << 1;
10545 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10546 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10547 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
10548 if (common->capture_last_ptr != 0)
10549 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
10550 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
10551 if (common->capture_last_ptr != 0)
10552 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
10554 set_jumps(current->topbacktracks, LABEL());
10555 free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
10556 return;
10559 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr);
10560 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10562 if (current->topbacktracks)
10564 jump = JUMP(SLJIT_JUMP);
10565 set_jumps(current->topbacktracks, LABEL());
10566 /* Drop the stack frame. */
10567 free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
10568 JUMPHERE(jump);
10570 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracketpos_backtrack)->framesize - 1));
10573 static SLJIT_INLINE void compile_braminzero_backtrackingpath(compiler_common *common, struct backtrack_common *current)
10575 assert_backtrack backtrack;
10577 current->top = NULL;
10578 current->topbacktracks = NULL;
10579 current->nextbacktracks = NULL;
10580 if (current->cc[1] > OP_ASSERTBACK_NOT)
10582 /* Manual call of compile_bracket_matchingpath and compile_bracket_backtrackingpath. */
10583 compile_bracket_matchingpath(common, current->cc, current);
10584 compile_bracket_backtrackingpath(common, current->top);
10586 else
10588 memset(&backtrack, 0, sizeof(backtrack));
10589 backtrack.common.cc = current->cc;
10590 backtrack.matchingpath = CURRENT_AS(braminzero_backtrack)->matchingpath;
10591 /* Manual call of compile_assert_matchingpath. */
10592 compile_assert_matchingpath(common, current->cc, &backtrack, FALSE);
10594 SLJIT_ASSERT(!current->nextbacktracks && !current->topbacktracks);
10597 static SLJIT_INLINE void compile_control_verb_backtrackingpath(compiler_common *common, struct backtrack_common *current)
10599 DEFINE_COMPILER;
10600 pcre_uchar opcode = *current->cc;
10601 struct sljit_label *loop;
10602 struct sljit_jump *jump;
10604 if (opcode == OP_THEN || opcode == OP_THEN_ARG)
10606 if (common->then_trap != NULL)
10608 SLJIT_ASSERT(common->control_head_ptr != 0);
10610 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10611 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, type_then_trap);
10612 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, common->then_trap->start);
10613 jump = JUMP(SLJIT_JUMP);
10615 loop = LABEL();
10616 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10617 JUMPHERE(jump);
10618 CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0, loop);
10619 CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0, loop);
10620 add_jump(compiler, &common->then_trap->quit, JUMP(SLJIT_JUMP));
10621 return;
10623 else if (common->positive_assert)
10625 add_jump(compiler, &common->positive_assert_quit, JUMP(SLJIT_JUMP));
10626 return;
10630 if (common->local_exit)
10632 if (common->quit_label == NULL)
10633 add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
10634 else
10635 JUMPTO(SLJIT_JUMP, common->quit_label);
10636 return;
10639 if (opcode == OP_SKIP_ARG)
10641 SLJIT_ASSERT(common->control_head_ptr != 0);
10642 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10643 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
10644 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, (sljit_sw)(current->cc + 2));
10645 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_search_mark));
10646 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
10648 OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
10649 add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0));
10650 return;
10653 if (opcode == OP_SKIP)
10654 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10655 else
10656 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_IMM, 0);
10657 add_jump(compiler, &common->reset_match, JUMP(SLJIT_JUMP));
10660 static SLJIT_INLINE void compile_then_trap_backtrackingpath(compiler_common *common, struct backtrack_common *current)
10662 DEFINE_COMPILER;
10663 struct sljit_jump *jump;
10664 int size;
10666 if (CURRENT_AS(then_trap_backtrack)->then_trap)
10668 common->then_trap = CURRENT_AS(then_trap_backtrack)->then_trap;
10669 return;
10672 size = CURRENT_AS(then_trap_backtrack)->framesize;
10673 size = 3 + (size < 0 ? 0 : size);
10675 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(size - 3));
10676 free_stack(common, size);
10677 jump = JUMP(SLJIT_JUMP);
10679 set_jumps(CURRENT_AS(then_trap_backtrack)->quit, LABEL());
10680 /* STACK_TOP is set by THEN. */
10681 if (CURRENT_AS(then_trap_backtrack)->framesize >= 0)
10682 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10683 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10684 free_stack(common, 3);
10686 JUMPHERE(jump);
10687 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
10690 static void compile_backtrackingpath(compiler_common *common, struct backtrack_common *current)
10692 DEFINE_COMPILER;
10693 then_trap_backtrack *save_then_trap = common->then_trap;
10695 while (current)
10697 if (current->nextbacktracks != NULL)
10698 set_jumps(current->nextbacktracks, LABEL());
10699 switch(*current->cc)
10701 case OP_SET_SOM:
10702 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10703 free_stack(common, 1);
10704 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), TMP1, 0);
10705 break;
10707 case OP_STAR:
10708 case OP_MINSTAR:
10709 case OP_PLUS:
10710 case OP_MINPLUS:
10711 case OP_QUERY:
10712 case OP_MINQUERY:
10713 case OP_UPTO:
10714 case OP_MINUPTO:
10715 case OP_EXACT:
10716 case OP_POSSTAR:
10717 case OP_POSPLUS:
10718 case OP_POSQUERY:
10719 case OP_POSUPTO:
10720 case OP_STARI:
10721 case OP_MINSTARI:
10722 case OP_PLUSI:
10723 case OP_MINPLUSI:
10724 case OP_QUERYI:
10725 case OP_MINQUERYI:
10726 case OP_UPTOI:
10727 case OP_MINUPTOI:
10728 case OP_EXACTI:
10729 case OP_POSSTARI:
10730 case OP_POSPLUSI:
10731 case OP_POSQUERYI:
10732 case OP_POSUPTOI:
10733 case OP_NOTSTAR:
10734 case OP_NOTMINSTAR:
10735 case OP_NOTPLUS:
10736 case OP_NOTMINPLUS:
10737 case OP_NOTQUERY:
10738 case OP_NOTMINQUERY:
10739 case OP_NOTUPTO:
10740 case OP_NOTMINUPTO:
10741 case OP_NOTEXACT:
10742 case OP_NOTPOSSTAR:
10743 case OP_NOTPOSPLUS:
10744 case OP_NOTPOSQUERY:
10745 case OP_NOTPOSUPTO:
10746 case OP_NOTSTARI:
10747 case OP_NOTMINSTARI:
10748 case OP_NOTPLUSI:
10749 case OP_NOTMINPLUSI:
10750 case OP_NOTQUERYI:
10751 case OP_NOTMINQUERYI:
10752 case OP_NOTUPTOI:
10753 case OP_NOTMINUPTOI:
10754 case OP_NOTEXACTI:
10755 case OP_NOTPOSSTARI:
10756 case OP_NOTPOSPLUSI:
10757 case OP_NOTPOSQUERYI:
10758 case OP_NOTPOSUPTOI:
10759 case OP_TYPESTAR:
10760 case OP_TYPEMINSTAR:
10761 case OP_TYPEPLUS:
10762 case OP_TYPEMINPLUS:
10763 case OP_TYPEQUERY:
10764 case OP_TYPEMINQUERY:
10765 case OP_TYPEUPTO:
10766 case OP_TYPEMINUPTO:
10767 case OP_TYPEEXACT:
10768 case OP_TYPEPOSSTAR:
10769 case OP_TYPEPOSPLUS:
10770 case OP_TYPEPOSQUERY:
10771 case OP_TYPEPOSUPTO:
10772 case OP_CLASS:
10773 case OP_NCLASS:
10774 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
10775 case OP_XCLASS:
10776 #endif
10777 compile_iterator_backtrackingpath(common, current);
10778 break;
10780 case OP_REF:
10781 case OP_REFI:
10782 case OP_DNREF:
10783 case OP_DNREFI:
10784 compile_ref_iterator_backtrackingpath(common, current);
10785 break;
10787 case OP_RECURSE:
10788 compile_recurse_backtrackingpath(common, current);
10789 break;
10791 case OP_ASSERT:
10792 case OP_ASSERT_NOT:
10793 case OP_ASSERTBACK:
10794 case OP_ASSERTBACK_NOT:
10795 compile_assert_backtrackingpath(common, current);
10796 break;
10798 case OP_ONCE:
10799 case OP_ONCE_NC:
10800 case OP_BRA:
10801 case OP_CBRA:
10802 case OP_COND:
10803 case OP_SBRA:
10804 case OP_SCBRA:
10805 case OP_SCOND:
10806 compile_bracket_backtrackingpath(common, current);
10807 break;
10809 case OP_BRAZERO:
10810 if (current->cc[1] > OP_ASSERTBACK_NOT)
10811 compile_bracket_backtrackingpath(common, current);
10812 else
10813 compile_assert_backtrackingpath(common, current);
10814 break;
10816 case OP_BRAPOS:
10817 case OP_CBRAPOS:
10818 case OP_SBRAPOS:
10819 case OP_SCBRAPOS:
10820 case OP_BRAPOSZERO:
10821 compile_bracketpos_backtrackingpath(common, current);
10822 break;
10824 case OP_BRAMINZERO:
10825 compile_braminzero_backtrackingpath(common, current);
10826 break;
10828 case OP_MARK:
10829 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0));
10830 if (common->has_skip_arg)
10831 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10832 free_stack(common, common->has_skip_arg ? 5 : 1);
10833 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP1, 0);
10834 if (common->has_skip_arg)
10835 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP2, 0);
10836 break;
10838 case OP_THEN:
10839 case OP_THEN_ARG:
10840 case OP_PRUNE:
10841 case OP_PRUNE_ARG:
10842 case OP_SKIP:
10843 case OP_SKIP_ARG:
10844 compile_control_verb_backtrackingpath(common, current);
10845 break;
10847 case OP_COMMIT:
10848 if (!common->local_exit)
10849 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
10850 if (common->quit_label == NULL)
10851 add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
10852 else
10853 JUMPTO(SLJIT_JUMP, common->quit_label);
10854 break;
10856 case OP_CALLOUT:
10857 case OP_FAIL:
10858 case OP_ACCEPT:
10859 case OP_ASSERT_ACCEPT:
10860 set_jumps(current->topbacktracks, LABEL());
10861 break;
10863 case OP_THEN_TRAP:
10864 /* A virtual opcode for then traps. */
10865 compile_then_trap_backtrackingpath(common, current);
10866 break;
10868 default:
10869 SLJIT_UNREACHABLE();
10870 break;
10872 current = current->prev;
10874 common->then_trap = save_then_trap;
10877 static SLJIT_INLINE void compile_recurse(compiler_common *common)
10879 DEFINE_COMPILER;
10880 pcre_uchar *cc = common->start + common->currententry->start;
10881 pcre_uchar *ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE);
10882 pcre_uchar *ccend = bracketend(cc) - (1 + LINK_SIZE);
10883 BOOL needs_control_head;
10884 int framesize = get_framesize(common, cc, NULL, TRUE, &needs_control_head);
10885 int private_data_size = get_private_data_copy_length(common, ccbegin, ccend, needs_control_head);
10886 int alternativesize;
10887 BOOL needs_frame;
10888 backtrack_common altbacktrack;
10889 struct sljit_jump *jump;
10891 /* Recurse captures then. */
10892 common->then_trap = NULL;
10894 SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA || *cc == OP_CBRAPOS || *cc == OP_SCBRA || *cc == OP_SCBRAPOS);
10895 needs_frame = framesize >= 0;
10896 if (!needs_frame)
10897 framesize = 0;
10898 alternativesize = *(cc + GET(cc, 1)) == OP_ALT ? 1 : 0;
10900 SLJIT_ASSERT(common->currententry->entry == NULL && common->recursive_head_ptr != 0);
10901 common->currententry->entry = LABEL();
10902 set_jumps(common->currententry->calls, common->currententry->entry);
10904 sljit_emit_fast_enter(compiler, TMP2, 0);
10905 count_match(common);
10906 allocate_stack(common, private_data_size + framesize + alternativesize);
10907 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(private_data_size + framesize + alternativesize - 1), TMP2, 0);
10908 copy_private_data(common, ccbegin, ccend, TRUE, framesize + alternativesize, private_data_size + framesize + alternativesize, needs_control_head);
10909 if (needs_control_head)
10910 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
10911 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, STACK_TOP, 0);
10912 if (needs_frame)
10913 init_frame(common, cc, NULL, framesize + alternativesize - 1, alternativesize, TRUE);
10915 if (alternativesize > 0)
10916 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10918 memset(&altbacktrack, 0, sizeof(backtrack_common));
10919 common->quit_label = NULL;
10920 common->accept_label = NULL;
10921 common->quit = NULL;
10922 common->accept = NULL;
10923 altbacktrack.cc = ccbegin;
10924 cc += GET(cc, 1);
10925 while (1)
10927 altbacktrack.top = NULL;
10928 altbacktrack.topbacktracks = NULL;
10930 if (altbacktrack.cc != ccbegin)
10931 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10933 compile_matchingpath(common, altbacktrack.cc, cc, &altbacktrack);
10934 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10935 return;
10937 add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
10939 compile_backtrackingpath(common, altbacktrack.top);
10940 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10941 return;
10942 set_jumps(altbacktrack.topbacktracks, LABEL());
10944 if (*cc != OP_ALT)
10945 break;
10947 altbacktrack.cc = cc + 1 + LINK_SIZE;
10948 cc += GET(cc, 1);
10951 /* None of them matched. */
10952 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
10953 jump = JUMP(SLJIT_JUMP);
10955 if (common->quit != NULL)
10957 set_jumps(common->quit, LABEL());
10958 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
10959 if (needs_frame)
10961 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
10962 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10963 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
10965 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
10966 common->quit = NULL;
10967 add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
10970 set_jumps(common->accept, LABEL());
10971 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
10972 if (needs_frame)
10974 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
10975 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10976 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
10978 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 1);
10980 JUMPHERE(jump);
10981 if (common->quit != NULL)
10982 set_jumps(common->quit, LABEL());
10983 copy_private_data(common, ccbegin, ccend, FALSE, framesize + alternativesize, private_data_size + framesize + alternativesize, needs_control_head);
10984 free_stack(common, private_data_size + framesize + alternativesize);
10985 if (needs_control_head)
10987 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-3));
10988 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
10989 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, TMP1, 0);
10990 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
10991 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP2, 0);
10993 else
10995 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
10996 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
10997 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, TMP2, 0);
10999 sljit_emit_fast_return(compiler, SLJIT_MEM1(STACK_TOP), STACK(-1));
11002 #undef COMPILE_BACKTRACKINGPATH
11003 #undef CURRENT_AS
11005 void
11006 PRIV(jit_compile)(const REAL_PCRE *re, PUBL(extra) *extra, int mode)
11008 struct sljit_compiler *compiler;
11009 backtrack_common rootbacktrack;
11010 compiler_common common_data;
11011 compiler_common *common = &common_data;
11012 const sljit_u8 *tables = re->tables;
11013 pcre_study_data *study;
11014 int private_data_size;
11015 pcre_uchar *ccend;
11016 executable_functions *functions;
11017 void *executable_func;
11018 sljit_uw executable_size;
11019 sljit_uw total_length;
11020 label_addr_list *label_addr;
11021 struct sljit_label *mainloop_label = NULL;
11022 struct sljit_label *continue_match_label;
11023 struct sljit_label *empty_match_found_label = NULL;
11024 struct sljit_label *empty_match_backtrack_label = NULL;
11025 struct sljit_label *reset_match_label;
11026 struct sljit_label *quit_label;
11027 struct sljit_jump *jump;
11028 struct sljit_jump *minlength_check_failed = NULL;
11029 struct sljit_jump *reqbyte_notfound = NULL;
11030 struct sljit_jump *empty_match = NULL;
11032 SLJIT_ASSERT((extra->flags & PCRE_EXTRA_STUDY_DATA) != 0);
11033 study = extra->study_data;
11035 if (!tables)
11036 tables = PRIV(default_tables);
11038 memset(&rootbacktrack, 0, sizeof(backtrack_common));
11039 memset(common, 0, sizeof(compiler_common));
11040 rootbacktrack.cc = (pcre_uchar *)re + re->name_table_offset + re->name_count * re->name_entry_size;
11042 common->start = rootbacktrack.cc;
11043 common->read_only_data_head = NULL;
11044 common->fcc = tables + fcc_offset;
11045 common->lcc = (sljit_sw)(tables + lcc_offset);
11046 common->mode = mode;
11047 common->might_be_empty = study->minlength == 0;
11048 common->nltype = NLTYPE_FIXED;
11049 switch(re->options & PCRE_NEWLINE_BITS)
11051 case 0:
11052 /* Compile-time default */
11053 switch(NEWLINE)
11055 case -1: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
11056 case -2: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
11057 default: common->newline = NEWLINE; break;
11059 break;
11060 case PCRE_NEWLINE_CR: common->newline = CHAR_CR; break;
11061 case PCRE_NEWLINE_LF: common->newline = CHAR_NL; break;
11062 case PCRE_NEWLINE_CR+
11063 PCRE_NEWLINE_LF: common->newline = (CHAR_CR << 8) | CHAR_NL; break;
11064 case PCRE_NEWLINE_ANY: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
11065 case PCRE_NEWLINE_ANYCRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
11066 default: return;
11068 common->nlmax = READ_CHAR_MAX;
11069 common->nlmin = 0;
11070 if ((re->options & PCRE_BSR_ANYCRLF) != 0)
11071 common->bsr_nltype = NLTYPE_ANYCRLF;
11072 else if ((re->options & PCRE_BSR_UNICODE) != 0)
11073 common->bsr_nltype = NLTYPE_ANY;
11074 else
11076 #ifdef BSR_ANYCRLF
11077 common->bsr_nltype = NLTYPE_ANYCRLF;
11078 #else
11079 common->bsr_nltype = NLTYPE_ANY;
11080 #endif
11082 common->bsr_nlmax = READ_CHAR_MAX;
11083 common->bsr_nlmin = 0;
11084 common->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
11085 common->ctypes = (sljit_sw)(tables + ctypes_offset);
11086 common->name_table = ((pcre_uchar *)re) + re->name_table_offset;
11087 common->name_count = re->name_count;
11088 common->name_entry_size = re->name_entry_size;
11089 common->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
11090 #ifdef SUPPORT_UTF
11091 /* PCRE_UTF[16|32] have the same value as PCRE_UTF8. */
11092 common->utf = (re->options & PCRE_UTF8) != 0;
11093 #ifdef SUPPORT_UCP
11094 common->use_ucp = (re->options & PCRE_UCP) != 0;
11095 #endif
11096 if (common->utf)
11098 if (common->nltype == NLTYPE_ANY)
11099 common->nlmax = 0x2029;
11100 else if (common->nltype == NLTYPE_ANYCRLF)
11101 common->nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
11102 else
11104 /* We only care about the first newline character. */
11105 common->nlmax = common->newline & 0xff;
11108 if (common->nltype == NLTYPE_FIXED)
11109 common->nlmin = common->newline & 0xff;
11110 else
11111 common->nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
11113 if (common->bsr_nltype == NLTYPE_ANY)
11114 common->bsr_nlmax = 0x2029;
11115 else
11116 common->bsr_nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
11117 common->bsr_nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
11119 #endif /* SUPPORT_UTF */
11120 ccend = bracketend(common->start);
11122 /* Calculate the local space size on the stack. */
11123 common->ovector_start = LIMIT_MATCH + sizeof(sljit_sw);
11124 common->optimized_cbracket = (sljit_u8 *)SLJIT_MALLOC(re->top_bracket + 1, compiler->allocator_data);
11125 if (!common->optimized_cbracket)
11126 return;
11127 #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 1
11128 memset(common->optimized_cbracket, 0, re->top_bracket + 1);
11129 #else
11130 memset(common->optimized_cbracket, 1, re->top_bracket + 1);
11131 #endif
11133 SLJIT_ASSERT(*common->start == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET);
11134 #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 2
11135 common->capture_last_ptr = common->ovector_start;
11136 common->ovector_start += sizeof(sljit_sw);
11137 #endif
11138 if (!check_opcode_types(common, common->start, ccend))
11140 SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
11141 return;
11144 /* Checking flags and updating ovector_start. */
11145 if (mode == JIT_COMPILE && (re->flags & PCRE_REQCHSET) != 0 && (re->options & PCRE_NO_START_OPTIMIZE) == 0)
11147 common->req_char_ptr = common->ovector_start;
11148 common->ovector_start += sizeof(sljit_sw);
11150 if (mode != JIT_COMPILE)
11152 common->start_used_ptr = common->ovector_start;
11153 common->ovector_start += sizeof(sljit_sw);
11154 if (mode == JIT_PARTIAL_SOFT_COMPILE)
11156 common->hit_start = common->ovector_start;
11157 common->ovector_start += 2 * sizeof(sljit_sw);
11160 if ((re->options & PCRE_FIRSTLINE) != 0)
11162 common->match_end_ptr = common->ovector_start;
11163 common->ovector_start += sizeof(sljit_sw);
11165 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
11166 common->control_head_ptr = 1;
11167 #endif
11168 if (common->control_head_ptr != 0)
11170 common->control_head_ptr = common->ovector_start;
11171 common->ovector_start += sizeof(sljit_sw);
11173 if (common->has_set_som)
11175 /* Saving the real start pointer is necessary. */
11176 common->start_ptr = common->ovector_start;
11177 common->ovector_start += sizeof(sljit_sw);
11180 /* Aligning ovector to even number of sljit words. */
11181 if ((common->ovector_start & sizeof(sljit_sw)) != 0)
11182 common->ovector_start += sizeof(sljit_sw);
11184 if (common->start_ptr == 0)
11185 common->start_ptr = OVECTOR(0);
11187 /* Capturing brackets cannot be optimized if callouts are allowed. */
11188 if (common->capture_last_ptr != 0)
11189 memset(common->optimized_cbracket, 0, re->top_bracket + 1);
11191 SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0));
11192 common->cbra_ptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_sw);
11194 total_length = ccend - common->start;
11195 common->private_data_ptrs = (sljit_s32 *)SLJIT_MALLOC(total_length * (sizeof(sljit_s32) + (common->has_then ? 1 : 0)), compiler->allocator_data);
11196 if (!common->private_data_ptrs)
11198 SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
11199 return;
11201 memset(common->private_data_ptrs, 0, total_length * sizeof(sljit_s32));
11203 private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw);
11204 set_private_data_ptrs(common, &private_data_size, ccend);
11205 if ((re->options & PCRE_ANCHORED) == 0 && (re->options & PCRE_NO_START_OPTIMIZE) == 0)
11207 if (!detect_fast_forward_skip(common, &private_data_size) && !common->has_skip_in_assert_back)
11208 detect_fast_fail(common, common->start, &private_data_size, 4);
11211 SLJIT_ASSERT(common->fast_fail_start_ptr <= common->fast_fail_end_ptr);
11213 if (private_data_size > SLJIT_MAX_LOCAL_SIZE)
11215 SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
11216 SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
11217 return;
11220 if (common->has_then)
11222 common->then_offsets = (sljit_u8 *)(common->private_data_ptrs + total_length);
11223 memset(common->then_offsets, 0, total_length);
11224 set_then_offsets(common, common->start, NULL);
11227 compiler = sljit_create_compiler(NULL);
11228 if (!compiler)
11230 SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
11231 SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
11232 return;
11234 common->compiler = compiler;
11236 /* Main pcre_jit_exec entry. */
11237 sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW), 5, 5, 0, 0, private_data_size);
11239 /* Register init. */
11240 reset_ovector(common, (re->top_bracket + 1) * 2);
11241 if (common->req_char_ptr != 0)
11242 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, SLJIT_R0, 0);
11244 OP1(SLJIT_MOV, ARGUMENTS, 0, SLJIT_S0, 0);
11245 OP1(SLJIT_MOV, TMP1, 0, SLJIT_S0, 0);
11246 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
11247 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, end));
11248 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
11249 OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, limit_match));
11250 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, end));
11251 OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, start));
11252 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
11253 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH, TMP1, 0);
11255 if (common->fast_fail_start_ptr < common->fast_fail_end_ptr)
11256 reset_fast_fail(common);
11258 if (mode == JIT_PARTIAL_SOFT_COMPILE)
11259 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
11260 if (common->mark_ptr != 0)
11261 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
11262 if (common->control_head_ptr != 0)
11263 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
11265 /* Main part of the matching */
11266 if ((re->options & PCRE_ANCHORED) == 0)
11268 mainloop_label = mainloop_entry(common, (re->flags & PCRE_HASCRORLF) != 0);
11269 continue_match_label = LABEL();
11270 /* Forward search if possible. */
11271 if ((re->options & PCRE_NO_START_OPTIMIZE) == 0)
11273 if (mode == JIT_COMPILE && fast_forward_first_n_chars(common))
11275 else if ((re->flags & PCRE_FIRSTSET) != 0)
11276 fast_forward_first_char(common, (pcre_uchar)re->first_char, (re->flags & PCRE_FCH_CASELESS) != 0);
11277 else if ((re->flags & PCRE_STARTLINE) != 0)
11278 fast_forward_newline(common);
11279 else if (study != NULL && (study->flags & PCRE_STUDY_MAPPED) != 0)
11280 fast_forward_start_bits(common, study->start_bits);
11283 else
11284 continue_match_label = LABEL();
11286 if (mode == JIT_COMPILE && study->minlength > 0 && (re->options & PCRE_NO_START_OPTIMIZE) == 0)
11288 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
11289 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(study->minlength));
11290 minlength_check_failed = CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0);
11292 if (common->req_char_ptr != 0)
11293 reqbyte_notfound = search_requested_char(common, (pcre_uchar)re->req_char, (re->flags & PCRE_RCH_CASELESS) != 0, (re->flags & PCRE_FIRSTSET) != 0);
11295 /* Store the current STR_PTR in OVECTOR(0). */
11296 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
11297 /* Copy the limit of allowed recursions. */
11298 OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH);
11299 if (common->capture_last_ptr != 0)
11300 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, -1);
11301 if (common->fast_forward_bc_ptr != NULL)
11302 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), PRIVATE_DATA(common->fast_forward_bc_ptr + 1), STR_PTR, 0);
11304 if (common->start_ptr != OVECTOR(0))
11305 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_ptr, STR_PTR, 0);
11307 /* Copy the beginning of the string. */
11308 if (mode == JIT_PARTIAL_SOFT_COMPILE)
11310 jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
11311 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
11312 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start + sizeof(sljit_sw), STR_PTR, 0);
11313 JUMPHERE(jump);
11315 else if (mode == JIT_PARTIAL_HARD_COMPILE)
11316 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
11318 compile_matchingpath(common, common->start, ccend, &rootbacktrack);
11319 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11321 sljit_free_compiler(compiler);
11322 SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
11323 SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
11324 free_read_only_data(common->read_only_data_head, compiler->allocator_data);
11325 return;
11328 if (common->might_be_empty)
11330 empty_match = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
11331 empty_match_found_label = LABEL();
11334 common->accept_label = LABEL();
11335 if (common->accept != NULL)
11336 set_jumps(common->accept, common->accept_label);
11338 /* This means we have a match. Update the ovector. */
11339 copy_ovector(common, re->top_bracket + 1);
11340 common->quit_label = common->forced_quit_label = LABEL();
11341 if (common->quit != NULL)
11342 set_jumps(common->quit, common->quit_label);
11343 if (common->forced_quit != NULL)
11344 set_jumps(common->forced_quit, common->forced_quit_label);
11345 if (minlength_check_failed != NULL)
11346 SET_LABEL(minlength_check_failed, common->forced_quit_label);
11347 sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0);
11349 if (mode != JIT_COMPILE)
11351 common->partialmatchlabel = LABEL();
11352 set_jumps(common->partialmatch, common->partialmatchlabel);
11353 return_with_partial_match(common, common->quit_label);
11356 if (common->might_be_empty)
11357 empty_match_backtrack_label = LABEL();
11358 compile_backtrackingpath(common, rootbacktrack.top);
11359 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11361 sljit_free_compiler(compiler);
11362 SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
11363 SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
11364 free_read_only_data(common->read_only_data_head, compiler->allocator_data);
11365 return;
11368 SLJIT_ASSERT(rootbacktrack.prev == NULL);
11369 reset_match_label = LABEL();
11371 if (mode == JIT_PARTIAL_SOFT_COMPILE)
11373 /* Update hit_start only in the first time. */
11374 jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
11375 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr);
11376 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
11377 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, TMP1, 0);
11378 JUMPHERE(jump);
11381 /* Check we have remaining characters. */
11382 if ((re->options & PCRE_ANCHORED) == 0 && (re->options & PCRE_FIRSTLINE) != 0)
11384 SLJIT_ASSERT(common->match_end_ptr != 0);
11385 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
11388 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP),
11389 (common->fast_forward_bc_ptr != NULL) ? (PRIVATE_DATA(common->fast_forward_bc_ptr + 1)) : common->start_ptr);
11391 if ((re->options & PCRE_ANCHORED) == 0)
11393 if (common->ff_newline_shortcut != NULL)
11395 if ((re->options & PCRE_FIRSTLINE) == 0)
11396 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, common->ff_newline_shortcut);
11397 /* There cannot be more newlines here. */
11399 else
11400 CMPTO(SLJIT_LESS, STR_PTR, 0, ((re->options & PCRE_FIRSTLINE) == 0) ? STR_END : TMP1, 0, mainloop_label);
11403 /* No more remaining characters. */
11404 if (reqbyte_notfound != NULL)
11405 JUMPHERE(reqbyte_notfound);
11407 if (mode == JIT_PARTIAL_SOFT_COMPILE)
11408 CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1, common->partialmatchlabel);
11410 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
11411 JUMPTO(SLJIT_JUMP, common->quit_label);
11413 flush_stubs(common);
11415 if (common->might_be_empty)
11417 JUMPHERE(empty_match);
11418 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
11419 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
11420 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_backtrack_label);
11421 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));
11422 CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_found_label);
11423 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
11424 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found_label);
11425 JUMPTO(SLJIT_JUMP, empty_match_backtrack_label);
11428 common->fast_forward_bc_ptr = NULL;
11429 common->fast_fail_start_ptr = 0;
11430 common->fast_fail_end_ptr = 0;
11431 common->currententry = common->entries;
11432 common->local_exit = TRUE;
11433 quit_label = common->quit_label;
11434 while (common->currententry != NULL)
11436 /* Might add new entries. */
11437 compile_recurse(common);
11438 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11440 sljit_free_compiler(compiler);
11441 SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
11442 SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
11443 free_read_only_data(common->read_only_data_head, compiler->allocator_data);
11444 return;
11446 flush_stubs(common);
11447 common->currententry = common->currententry->next;
11449 common->local_exit = FALSE;
11450 common->quit_label = quit_label;
11452 /* Allocating stack, returns with PCRE_ERROR_JIT_STACKLIMIT if fails. */
11453 /* This is a (really) rare case. */
11454 set_jumps(common->stackalloc, LABEL());
11455 /* RETURN_ADDR is not a saved register. */
11456 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
11458 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STACK_TOP == SLJIT_R1);
11460 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STACK_TOP, 0);
11461 OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
11462 OP2(SLJIT_SUB, SLJIT_R1, 0, STACK_LIMIT, 0, SLJIT_IMM, STACK_GROWTH_RATE);
11463 OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, stack));
11464 OP1(SLJIT_MOV, STACK_LIMIT, 0, TMP2, 0);
11466 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_stack_resize));
11467 jump = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
11468 OP1(SLJIT_MOV, TMP2, 0, STACK_LIMIT, 0);
11469 OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_RETURN_REG, 0);
11470 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
11471 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
11472 sljit_emit_fast_return(compiler, TMP1, 0);
11474 /* Allocation failed. */
11475 JUMPHERE(jump);
11476 /* We break the return address cache here, but this is a really rare case. */
11477 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_JIT_STACKLIMIT);
11478 JUMPTO(SLJIT_JUMP, common->quit_label);
11480 /* Call limit reached. */
11481 set_jumps(common->calllimit, LABEL());
11482 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_MATCHLIMIT);
11483 JUMPTO(SLJIT_JUMP, common->quit_label);
11485 if (common->revertframes != NULL)
11487 set_jumps(common->revertframes, LABEL());
11488 do_revertframes(common);
11490 if (common->wordboundary != NULL)
11492 set_jumps(common->wordboundary, LABEL());
11493 check_wordboundary(common);
11495 if (common->anynewline != NULL)
11497 set_jumps(common->anynewline, LABEL());
11498 check_anynewline(common);
11500 if (common->hspace != NULL)
11502 set_jumps(common->hspace, LABEL());
11503 check_hspace(common);
11505 if (common->vspace != NULL)
11507 set_jumps(common->vspace, LABEL());
11508 check_vspace(common);
11510 if (common->casefulcmp != NULL)
11512 set_jumps(common->casefulcmp, LABEL());
11513 do_casefulcmp(common);
11515 if (common->caselesscmp != NULL)
11517 set_jumps(common->caselesscmp, LABEL());
11518 do_caselesscmp(common);
11520 if (common->reset_match != NULL)
11522 set_jumps(common->reset_match, LABEL());
11523 do_reset_match(common, (re->top_bracket + 1) * 2);
11524 CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP1, 0, continue_match_label);
11525 OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
11526 JUMPTO(SLJIT_JUMP, reset_match_label);
11528 #ifdef SUPPORT_UTF
11529 #ifdef COMPILE_PCRE8
11530 if (common->utfreadchar != NULL)
11532 set_jumps(common->utfreadchar, LABEL());
11533 do_utfreadchar(common);
11535 if (common->utfreadchar16 != NULL)
11537 set_jumps(common->utfreadchar16, LABEL());
11538 do_utfreadchar16(common);
11540 if (common->utfreadtype8 != NULL)
11542 set_jumps(common->utfreadtype8, LABEL());
11543 do_utfreadtype8(common);
11545 #endif /* COMPILE_PCRE8 */
11546 #endif /* SUPPORT_UTF */
11547 #ifdef SUPPORT_UCP
11548 if (common->getucd != NULL)
11550 set_jumps(common->getucd, LABEL());
11551 do_getucd(common);
11553 #endif
11555 SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
11556 SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
11558 executable_func = sljit_generate_code(compiler);
11559 executable_size = sljit_get_generated_code_size(compiler);
11560 label_addr = common->label_addrs;
11561 while (label_addr != NULL)
11563 *label_addr->update_addr = sljit_get_label_addr(label_addr->label);
11564 label_addr = label_addr->next;
11566 sljit_free_compiler(compiler);
11567 if (executable_func == NULL)
11569 free_read_only_data(common->read_only_data_head, compiler->allocator_data);
11570 return;
11573 /* Reuse the function descriptor if possible. */
11574 if ((extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 && extra->executable_jit != NULL)
11575 functions = (executable_functions *)extra->executable_jit;
11576 else
11578 /* Note: If your memory-checker has flagged the allocation below as a
11579 * memory leak, it is probably because you either forgot to call
11580 * pcre_free_study() (or pcre16_free_study()) on the pcre_extra (or
11581 * pcre16_extra) object, or you called said function after having
11582 * cleared the PCRE_EXTRA_EXECUTABLE_JIT bit from the "flags" field
11583 * of the object. (The function will only free the JIT data if the
11584 * bit remains set, as the bit indicates that the pointer to the data
11585 * is valid.)
11587 functions = SLJIT_MALLOC(sizeof(executable_functions), compiler->allocator_data);
11588 if (functions == NULL)
11590 /* This case is highly unlikely since we just recently
11591 freed a lot of memory. Not impossible though. */
11592 sljit_free_code(executable_func);
11593 free_read_only_data(common->read_only_data_head, compiler->allocator_data);
11594 return;
11596 memset(functions, 0, sizeof(executable_functions));
11597 functions->top_bracket = (re->top_bracket + 1) * 2;
11598 functions->limit_match = (re->flags & PCRE_MLSET) != 0 ? re->limit_match : 0;
11599 extra->executable_jit = functions;
11600 extra->flags |= PCRE_EXTRA_EXECUTABLE_JIT;
11603 functions->executable_funcs[mode] = executable_func;
11604 functions->read_only_data_heads[mode] = common->read_only_data_head;
11605 functions->executable_sizes[mode] = executable_size;
11608 static SLJIT_NOINLINE int jit_machine_stack_exec(jit_arguments *arguments, void *executable_func)
11610 union {
11611 void *executable_func;
11612 jit_function call_executable_func;
11613 } convert_executable_func;
11614 sljit_u8 local_space[MACHINE_STACK_SIZE];
11615 struct sljit_stack local_stack;
11617 local_stack.min_start = local_space;
11618 local_stack.start = local_space;
11619 local_stack.end = local_space + MACHINE_STACK_SIZE;
11620 local_stack.top = local_space + MACHINE_STACK_SIZE;
11621 arguments->stack = &local_stack;
11622 convert_executable_func.executable_func = executable_func;
11623 return convert_executable_func.call_executable_func(arguments);
11627 PRIV(jit_exec)(const PUBL(extra) *extra_data, const pcre_uchar *subject,
11628 int length, int start_offset, int options, int *offsets, int offset_count)
11630 executable_functions *functions = (executable_functions *)extra_data->executable_jit;
11631 union {
11632 void *executable_func;
11633 jit_function call_executable_func;
11634 } convert_executable_func;
11635 jit_arguments arguments;
11636 int max_offset_count;
11637 int retval;
11638 int mode = JIT_COMPILE;
11640 if ((options & PCRE_PARTIAL_HARD) != 0)
11641 mode = JIT_PARTIAL_HARD_COMPILE;
11642 else if ((options & PCRE_PARTIAL_SOFT) != 0)
11643 mode = JIT_PARTIAL_SOFT_COMPILE;
11645 if (functions->executable_funcs[mode] == NULL)
11646 return PCRE_ERROR_JIT_BADOPTION;
11648 /* Sanity checks should be handled by pcre_exec. */
11649 arguments.str = subject + start_offset;
11650 arguments.begin = subject;
11651 arguments.end = subject + length;
11652 arguments.mark_ptr = NULL;
11653 /* JIT decreases this value less frequently than the interpreter. */
11654 arguments.limit_match = ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0) ? MATCH_LIMIT : (sljit_u32)(extra_data->match_limit);
11655 if (functions->limit_match != 0 && functions->limit_match < arguments.limit_match)
11656 arguments.limit_match = functions->limit_match;
11657 arguments.notbol = (options & PCRE_NOTBOL) != 0;
11658 arguments.noteol = (options & PCRE_NOTEOL) != 0;
11659 arguments.notempty = (options & PCRE_NOTEMPTY) != 0;
11660 arguments.notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
11661 arguments.offsets = offsets;
11662 arguments.callout_data = (extra_data->flags & PCRE_EXTRA_CALLOUT_DATA) != 0 ? extra_data->callout_data : NULL;
11663 arguments.real_offset_count = offset_count;
11665 /* pcre_exec() rounds offset_count to a multiple of 3, and then uses only 2/3 of
11666 the output vector for storing captured strings, with the remainder used as
11667 workspace. We don't need the workspace here. For compatibility, we limit the
11668 number of captured strings in the same way as pcre_exec(), so that the user
11669 gets the same result with and without JIT. */
11671 if (offset_count != 2)
11672 offset_count = ((offset_count - (offset_count % 3)) * 2) / 3;
11673 max_offset_count = functions->top_bracket;
11674 if (offset_count > max_offset_count)
11675 offset_count = max_offset_count;
11676 arguments.offset_count = offset_count;
11678 if (functions->callback)
11679 arguments.stack = (struct sljit_stack *)functions->callback(functions->userdata);
11680 else
11681 arguments.stack = (struct sljit_stack *)functions->userdata;
11683 if (arguments.stack == NULL)
11684 retval = jit_machine_stack_exec(&arguments, functions->executable_funcs[mode]);
11685 else
11687 convert_executable_func.executable_func = functions->executable_funcs[mode];
11688 retval = convert_executable_func.call_executable_func(&arguments);
11691 if (retval * 2 > offset_count)
11692 retval = 0;
11693 if ((extra_data->flags & PCRE_EXTRA_MARK) != 0)
11694 *(extra_data->mark) = arguments.mark_ptr;
11696 return retval;
11699 #if defined COMPILE_PCRE8
11700 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
11701 pcre_jit_exec(const pcre *argument_re, const pcre_extra *extra_data,
11702 PCRE_SPTR subject, int length, int start_offset, int options,
11703 int *offsets, int offset_count, pcre_jit_stack *stack)
11704 #elif defined COMPILE_PCRE16
11705 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
11706 pcre16_jit_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
11707 PCRE_SPTR16 subject, int length, int start_offset, int options,
11708 int *offsets, int offset_count, pcre16_jit_stack *stack)
11709 #elif defined COMPILE_PCRE32
11710 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
11711 pcre32_jit_exec(const pcre32 *argument_re, const pcre32_extra *extra_data,
11712 PCRE_SPTR32 subject, int length, int start_offset, int options,
11713 int *offsets, int offset_count, pcre32_jit_stack *stack)
11714 #endif
11716 pcre_uchar *subject_ptr = (pcre_uchar *)subject;
11717 executable_functions *functions = (executable_functions *)(extra_data? extra_data->executable_jit : NULL);
11718 union {
11719 void *executable_func;
11720 jit_function call_executable_func;
11721 } convert_executable_func;
11722 jit_arguments arguments;
11723 int max_offset_count;
11724 int retval;
11725 int mode = JIT_COMPILE;
11727 SLJIT_UNUSED_ARG(argument_re);
11729 /* Plausibility checks */
11730 if ((options & ~PUBLIC_JIT_EXEC_OPTIONS) != 0) return PCRE_ERROR_JIT_BADOPTION;
11732 if ((options & PCRE_PARTIAL_HARD) != 0)
11733 mode = JIT_PARTIAL_HARD_COMPILE;
11734 else if ((options & PCRE_PARTIAL_SOFT) != 0)
11735 mode = JIT_PARTIAL_SOFT_COMPILE;
11737 if (functions == NULL || functions->executable_funcs[mode] == NULL)
11738 return
11739 #if defined COMPILE_PCRE8
11740 pcre_exec
11741 #elif defined COMPILE_PCRE16
11742 pcre16_exec
11743 #elif defined COMPILE_PCRE32
11744 pcre32_exec
11745 #endif
11746 (argument_re, extra_data, subject, length, start_offset, options, offsets, offset_count);
11748 /* Sanity checks should be handled by pcre_exec. */
11749 arguments.stack = (struct sljit_stack *)stack;
11750 arguments.str = subject_ptr + start_offset;
11751 arguments.begin = subject_ptr;
11752 arguments.end = subject_ptr + length;
11753 arguments.mark_ptr = NULL;
11754 /* JIT decreases this value less frequently than the interpreter. */
11755 arguments.limit_match = ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0) ? MATCH_LIMIT : (sljit_u32)(extra_data->match_limit);
11756 if (functions->limit_match != 0 && functions->limit_match < arguments.limit_match)
11757 arguments.limit_match = functions->limit_match;
11758 arguments.notbol = (options & PCRE_NOTBOL) != 0;
11759 arguments.noteol = (options & PCRE_NOTEOL) != 0;
11760 arguments.notempty = (options & PCRE_NOTEMPTY) != 0;
11761 arguments.notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
11762 arguments.offsets = offsets;
11763 arguments.callout_data = (extra_data->flags & PCRE_EXTRA_CALLOUT_DATA) != 0 ? extra_data->callout_data : NULL;
11764 arguments.real_offset_count = offset_count;
11766 /* pcre_exec() rounds offset_count to a multiple of 3, and then uses only 2/3 of
11767 the output vector for storing captured strings, with the remainder used as
11768 workspace. We don't need the workspace here. For compatibility, we limit the
11769 number of captured strings in the same way as pcre_exec(), so that the user
11770 gets the same result with and without JIT. */
11772 if (offset_count != 2)
11773 offset_count = ((offset_count - (offset_count % 3)) * 2) / 3;
11774 max_offset_count = functions->top_bracket;
11775 if (offset_count > max_offset_count)
11776 offset_count = max_offset_count;
11777 arguments.offset_count = offset_count;
11779 convert_executable_func.executable_func = functions->executable_funcs[mode];
11780 retval = convert_executable_func.call_executable_func(&arguments);
11782 if (retval * 2 > offset_count)
11783 retval = 0;
11784 if ((extra_data->flags & PCRE_EXTRA_MARK) != 0)
11785 *(extra_data->mark) = arguments.mark_ptr;
11787 return retval;
11790 void
11791 PRIV(jit_free)(void *executable_funcs)
11793 int i;
11794 executable_functions *functions = (executable_functions *)executable_funcs;
11795 for (i = 0; i < JIT_NUMBER_OF_COMPILE_MODES; i++)
11797 if (functions->executable_funcs[i] != NULL)
11798 sljit_free_code(functions->executable_funcs[i]);
11799 free_read_only_data(functions->read_only_data_heads[i], NULL);
11801 SLJIT_FREE(functions, compiler->allocator_data);
11805 PRIV(jit_get_size)(void *executable_funcs)
11807 int i;
11808 sljit_uw size = 0;
11809 sljit_uw *executable_sizes = ((executable_functions *)executable_funcs)->executable_sizes;
11810 for (i = 0; i < JIT_NUMBER_OF_COMPILE_MODES; i++)
11811 size += executable_sizes[i];
11812 return (int)size;
11815 const char*
11816 PRIV(jit_get_target)(void)
11818 return sljit_get_platform_name();
11821 #if defined COMPILE_PCRE8
11822 PCRE_EXP_DECL pcre_jit_stack *
11823 pcre_jit_stack_alloc(int startsize, int maxsize)
11824 #elif defined COMPILE_PCRE16
11825 PCRE_EXP_DECL pcre16_jit_stack *
11826 pcre16_jit_stack_alloc(int startsize, int maxsize)
11827 #elif defined COMPILE_PCRE32
11828 PCRE_EXP_DECL pcre32_jit_stack *
11829 pcre32_jit_stack_alloc(int startsize, int maxsize)
11830 #endif
11832 if (startsize < 1 || maxsize < 1)
11833 return NULL;
11834 if (startsize > maxsize)
11835 startsize = maxsize;
11836 startsize = (startsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1);
11837 maxsize = (maxsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1);
11838 return (PUBL(jit_stack)*)sljit_allocate_stack(startsize, maxsize, NULL);
11841 #if defined COMPILE_PCRE8
11842 PCRE_EXP_DECL void
11843 pcre_jit_stack_free(pcre_jit_stack *stack)
11844 #elif defined COMPILE_PCRE16
11845 PCRE_EXP_DECL void
11846 pcre16_jit_stack_free(pcre16_jit_stack *stack)
11847 #elif defined COMPILE_PCRE32
11848 PCRE_EXP_DECL void
11849 pcre32_jit_stack_free(pcre32_jit_stack *stack)
11850 #endif
11852 sljit_free_stack((struct sljit_stack *)stack, NULL);
11855 #if defined COMPILE_PCRE8
11856 PCRE_EXP_DECL void
11857 pcre_assign_jit_stack(pcre_extra *extra, pcre_jit_callback callback, void *userdata)
11858 #elif defined COMPILE_PCRE16
11859 PCRE_EXP_DECL void
11860 pcre16_assign_jit_stack(pcre16_extra *extra, pcre16_jit_callback callback, void *userdata)
11861 #elif defined COMPILE_PCRE32
11862 PCRE_EXP_DECL void
11863 pcre32_assign_jit_stack(pcre32_extra *extra, pcre32_jit_callback callback, void *userdata)
11864 #endif
11866 executable_functions *functions;
11867 if (extra != NULL &&
11868 (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 &&
11869 extra->executable_jit != NULL)
11871 functions = (executable_functions *)extra->executable_jit;
11872 functions->callback = callback;
11873 functions->userdata = userdata;
11877 #if defined COMPILE_PCRE8
11878 PCRE_EXP_DECL void
11879 pcre_jit_free_unused_memory(void)
11880 #elif defined COMPILE_PCRE16
11881 PCRE_EXP_DECL void
11882 pcre16_jit_free_unused_memory(void)
11883 #elif defined COMPILE_PCRE32
11884 PCRE_EXP_DECL void
11885 pcre32_jit_free_unused_memory(void)
11886 #endif
11888 sljit_free_unused_memory_exec();
11891 #else /* SUPPORT_JIT */
11893 /* These are dummy functions to avoid linking errors when JIT support is not
11894 being compiled. */
11896 static const void *const dummy_stack = NULL;
11898 #if defined COMPILE_PCRE8
11899 #define DUMMY_STACK (pcre_jit_stack *)&dummy_stack
11900 PCRE_EXP_DECL pcre_jit_stack *
11901 pcre_jit_stack_alloc(int startsize, int maxsize)
11902 #elif defined COMPILE_PCRE16
11903 #define DUMMY_STACK (pcre16_jit_stack *)&dummy_stack
11904 PCRE_EXP_DECL pcre16_jit_stack *
11905 pcre16_jit_stack_alloc(int startsize, int maxsize)
11906 #elif defined COMPILE_PCRE32
11907 #define DUMMY_STACK (pcre32_jit_stack *)&dummy_stack
11908 PCRE_EXP_DECL pcre32_jit_stack *
11909 pcre32_jit_stack_alloc(int startsize, int maxsize)
11910 #endif
11912 (void)startsize;
11913 (void)maxsize;
11914 return DUMMY_STACK;
11917 #if defined COMPILE_PCRE8
11918 PCRE_EXP_DECL void
11919 pcre_jit_stack_free(pcre_jit_stack *stack)
11920 #elif defined COMPILE_PCRE16
11921 PCRE_EXP_DECL void
11922 pcre16_jit_stack_free(pcre16_jit_stack *stack)
11923 #elif defined COMPILE_PCRE32
11924 PCRE_EXP_DECL void
11925 pcre32_jit_stack_free(pcre32_jit_stack *stack)
11926 #endif
11928 (void)stack;
11931 #if defined COMPILE_PCRE8
11932 PCRE_EXP_DECL void
11933 pcre_assign_jit_stack(pcre_extra *extra, pcre_jit_callback callback, void *userdata)
11934 #elif defined COMPILE_PCRE16
11935 PCRE_EXP_DECL void
11936 pcre16_assign_jit_stack(pcre16_extra *extra, pcre16_jit_callback callback, void *userdata)
11937 #elif defined COMPILE_PCRE32
11938 PCRE_EXP_DECL void
11939 pcre32_assign_jit_stack(pcre32_extra *extra, pcre32_jit_callback callback, void *userdata)
11940 #endif
11942 (void)extra;
11943 (void)callback;
11944 (void)userdata;
11947 #if defined COMPILE_PCRE8
11948 PCRE_EXP_DECL void
11949 pcre_jit_free_unused_memory(void)
11950 #elif defined COMPILE_PCRE16
11951 PCRE_EXP_DECL void
11952 pcre16_jit_free_unused_memory(void)
11953 #elif defined COMPILE_PCRE32
11954 PCRE_EXP_DECL void
11955 pcre32_jit_free_unused_memory(void)
11956 #endif
11960 #endif
11962 /* End of pcre_jit_compile.c */