1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
8 Written by Philip Hazel
9 Copyright (c) 1997-2013 University of Cambridge
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2013
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
47 #include "pcre_internal.h"
49 #if !defined SUPPORT_JIT
51 /* Stubs for clients compiled against pcre_jit_exec (and friends)
52 so they gracefully fall back to non-JIT. */
54 #if defined COMPILE_PCRE8
55 PCRE_EXP_DEFN
int PCRE_CALL_CONVENTION
56 pcre_jit_exec(const pcre
*argument_re
, const pcre_extra
*extra_data
,
57 PCRE_SPTR subject
, int length
, int start_offset
, int options
,
58 int *offsets
, int offset_count
, pcre_jit_stack
*stack
)
59 #elif defined COMPILE_PCRE16
60 PCRE_EXP_DEFN
int PCRE_CALL_CONVENTION
61 pcre16_jit_exec(const pcre16
*argument_re
, const pcre16_extra
*extra_data
,
62 PCRE_SPTR16 subject
, int length
, int start_offset
, int options
,
63 int *offsets
, int offset_count
, pcre16_jit_stack
*stack
)
64 #elif defined COMPILE_PCRE32
65 PCRE_EXP_DEFN
int PCRE_CALL_CONVENTION
66 pcre32_jit_exec(const pcre32
*argument_re
, const pcre32_extra
*extra_data
,
67 PCRE_SPTR32 subject
, int length
, int start_offset
, int options
,
68 int *offsets
, int offset_count
, pcre32_jit_stack
*stack
)
73 #if defined COMPILE_PCRE8
75 #elif defined COMPILE_PCRE16
77 #elif defined COMPILE_PCRE32
80 (argument_re
, extra_data
, subject
, length
, start_offset
, options
, offsets
, offset_count
);
83 #endif /* !SUPPORT_JIT stubs */
85 #if defined SUPPORT_JIT
87 /* All-in-one: Since we use the JIT compiler only from here,
88 we just include it. This way we don't need to touch the build
91 #define SLJIT_MALLOC(size, allocator_data) (PUBL(malloc))(size)
92 #define SLJIT_FREE(ptr, allocator_data) (PUBL(free))(ptr)
93 #define SLJIT_CONFIG_AUTO 1
94 #define SLJIT_CONFIG_STATIC 1
95 #define SLJIT_VERBOSE 0
98 #include "sljit/sljitLir.c"
100 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
101 #error Unsupported architecture
104 /* Defines for debugging purposes. */
106 /* 1 - Use unoptimized capturing brackets.
107 2 - Enable capture_last_ptr (includes option 1). */
108 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
110 /* 1 - Always have a control head. */
111 /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
113 /* Allocate memory for the regex stack on the real machine stack.
114 Fast, but limited size. */
115 #define MACHINE_STACK_SIZE 32768
117 /* Growth rate for stack allocated by the OS. Should be the multiply
119 #define STACK_GROWTH_RATE 8192
121 /* Enable to check that the allocation could destroy temporaries. */
122 #if defined SLJIT_DEBUG && SLJIT_DEBUG
123 #define DESTROY_REGISTERS 1
127 Short summary about the backtracking mechanism empolyed by the jit code generator:
129 The code generator follows the recursive nature of the PERL compatible regular
130 expressions. The basic blocks of regular expressions are condition checkers
131 whose execute different commands depending on the result of the condition check.
132 The relationship between the operators can be horizontal (concatenation) and
133 vertical (sub-expression) (See struct backtrack_common for more details).
135 'ab' - 'a' and 'b' regexps are concatenated
136 'a+' - 'a' is the sub-expression of the '+' operator
138 The condition checkers are boolean (true/false) checkers. Machine code is generated
139 for the checker itself and for the actions depending on the result of the checker.
140 The 'true' case is called as the matching path (expected path), and the other is called as
141 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
142 branches on the matching path.
144 Greedy star operator (*) :
145 Matching path: match happens.
146 Backtrack path: match failed.
147 Non-greedy star operator (*?) :
148 Matching path: no need to perform a match.
149 Backtrack path: match is required.
151 The following example shows how the code generated for a capturing bracket
152 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
153 we have the following regular expression:
157 The generated code will be the following:
160 '(' matching path (pushing arguments to the stack)
162 ')' matching path (pushing arguments to the stack)
164 return with successful match
167 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
170 jump to D matching path
174 Notice, that the order of backtrack code paths are the opposite of the fast
175 code paths. In this way the topmost value on the stack is always belong
176 to the current backtrack code path. The backtrack path must check
177 whether there is a next alternative. If so, it needs to jump back to
178 the matching path eventually. Otherwise it needs to clear out its own stack
179 frame and continue the execution on the backtrack code paths.
185 Atomic blocks and asserts require reloading the values of private data
186 when the backtrack mechanism performed. Because of OP_RECURSE, the data
187 are not necessarly known in compile time, thus we need a dynamic restore
190 The stack frames are stored in a chain list, and have the following format:
191 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
193 Thus we can restore the private data to a particular point in the stack.
196 typedef struct jit_arguments
{
197 /* Pointers first. */
198 struct sljit_stack
*stack
;
199 const pcre_uchar
*str
;
200 const pcre_uchar
*begin
;
201 const pcre_uchar
*end
;
203 pcre_uchar
*mark_ptr
;
205 /* Everything else after. */
206 sljit_u32 limit_match
;
207 int real_offset_count
;
212 sljit_u8 notempty_atstart
;
215 typedef struct executable_functions
{
216 void *executable_funcs
[JIT_NUMBER_OF_COMPILE_MODES
];
217 void *read_only_data_heads
[JIT_NUMBER_OF_COMPILE_MODES
];
218 sljit_uw executable_sizes
[JIT_NUMBER_OF_COMPILE_MODES
];
219 PUBL(jit_callback
) callback
;
221 sljit_u32 top_bracket
;
222 sljit_u32 limit_match
;
223 } executable_functions
;
225 typedef struct jump_list
{
226 struct sljit_jump
*jump
;
227 struct jump_list
*next
;
230 typedef struct stub_list
{
231 struct sljit_jump
*start
;
232 struct sljit_label
*quit
;
233 struct stub_list
*next
;
236 typedef struct label_addr_list
{
237 struct sljit_label
*label
;
238 sljit_uw
*update_addr
;
239 struct label_addr_list
*next
;
252 typedef int (SLJIT_FUNC
*jit_function
)(jit_arguments
*args
);
254 /* The following structure is the key data type for the recursive
255 code generator. It is allocated by compile_matchingpath, and contains
256 the arguments for compile_backtrackingpath. Must be the first member
257 of its descendants. */
258 typedef struct backtrack_common
{
259 /* Concatenation stack. */
260 struct backtrack_common
*prev
;
261 jump_list
*nextbacktracks
;
262 /* Internal stack (for component operators). */
263 struct backtrack_common
*top
;
264 jump_list
*topbacktracks
;
265 /* Opcode pointer. */
269 typedef struct assert_backtrack
{
270 backtrack_common common
;
271 jump_list
*condfailed
;
272 /* Less than 0 if a frame is not needed. */
274 /* Points to our private memory word on the stack. */
275 int private_data_ptr
;
277 struct sljit_label
*matchingpath
;
280 typedef struct bracket_backtrack
{
281 backtrack_common common
;
282 /* Where to coninue if an alternative is successfully matched. */
283 struct sljit_label
*alternative_matchingpath
;
284 /* For rmin and rmax iterators. */
285 struct sljit_label
*recursive_matchingpath
;
286 /* For greedy ? operator. */
287 struct sljit_label
*zero_matchingpath
;
288 /* Contains the branches of a failed condition. */
290 /* Both for OP_COND, OP_SCOND. */
291 jump_list
*condfailed
;
292 assert_backtrack
*assert;
293 /* For OP_ONCE. Less than 0 if not needed. */
296 /* Points to our private memory word on the stack. */
297 int private_data_ptr
;
300 typedef struct bracketpos_backtrack
{
301 backtrack_common common
;
302 /* Points to our private memory word on the stack. */
303 int private_data_ptr
;
304 /* Reverting stack is needed. */
306 /* Allocated stack size. */
308 } bracketpos_backtrack
;
310 typedef struct braminzero_backtrack
{
311 backtrack_common common
;
312 struct sljit_label
*matchingpath
;
313 } braminzero_backtrack
;
315 typedef struct char_iterator_backtrack
{
316 backtrack_common common
;
317 /* Next iteration. */
318 struct sljit_label
*matchingpath
;
320 jump_list
*backtracks
;
322 unsigned int othercasebit
;
327 } char_iterator_backtrack
;
329 typedef struct ref_iterator_backtrack
{
330 backtrack_common common
;
331 /* Next iteration. */
332 struct sljit_label
*matchingpath
;
333 } ref_iterator_backtrack
;
335 typedef struct recurse_entry
{
336 struct recurse_entry
*next
;
337 /* Contains the function entry. */
338 struct sljit_label
*entry
;
339 /* Collects the calls until the function is not created. */
341 /* Points to the starting opcode. */
345 typedef struct recurse_backtrack
{
346 backtrack_common common
;
347 BOOL inlined_pattern
;
350 #define OP_THEN_TRAP OP_TABLE_LENGTH
352 typedef struct then_trap_backtrack
{
353 backtrack_common common
;
354 /* If then_trap is not NULL, this structure contains the real
355 then_trap for the backtracking path. */
356 struct then_trap_backtrack
*then_trap
;
357 /* Points to the starting opcode. */
359 /* Exit point for the then opcodes of this alternative. */
361 /* Frame size of the current alternative. */
363 } then_trap_backtrack
;
365 #define MAX_RANGE_SIZE 4
367 typedef struct compiler_common
{
368 /* The sljit ceneric compiler. */
369 struct sljit_compiler
*compiler
;
370 /* First byte code. */
372 /* Maps private data offset to each opcode. */
373 sljit_s32
*private_data_ptrs
;
374 /* Chain list of read-only data ptrs. */
375 void *read_only_data_head
;
376 /* Tells whether the capturing bracket is optimized. */
377 sljit_u8
*optimized_cbracket
;
378 /* Tells whether the starting offset is a target of then. */
379 sljit_u8
*then_offsets
;
380 /* Current position where a THEN must jump. */
381 then_trap_backtrack
*then_trap
;
382 /* Starting offset of private data for capturing brackets. */
384 /* Output vector starting point. Must be divisible by 2. */
385 sljit_s32 ovector_start
;
386 /* Points to the starting character of the current match. */
388 /* Last known position of the requested byte. */
389 sljit_s32 req_char_ptr
;
390 /* Head of the last recursion. */
391 sljit_s32 recursive_head_ptr
;
392 /* First inspected character for partial matching.
393 (Needed for avoiding zero length partial matches.) */
394 sljit_s32 start_used_ptr
;
395 /* Starting pointer for partial soft matches. */
397 /* Pointer of the match end position. */
398 sljit_s32 match_end_ptr
;
399 /* Points to the marked string. */
401 /* Recursive control verb management chain. */
402 sljit_s32 control_head_ptr
;
403 /* Points to the last matched capture block index. */
404 sljit_s32 capture_last_ptr
;
405 /* Fast forward skipping byte code pointer. */
406 pcre_uchar
*fast_forward_bc_ptr
;
407 /* Locals used by fast fail optimization. */
408 sljit_s32 fast_fail_start_ptr
;
409 sljit_s32 fast_fail_end_ptr
;
411 /* Flipped and lower case tables. */
414 /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
416 /* TRUE, when minlength is greater than 0. */
418 /* \K is found in the pattern. */
420 /* (*SKIP:arg) is found in the pattern. */
422 /* (*THEN) is found in the pattern. */
424 /* (*SKIP) or (*SKIP:arg) is found in lookbehind assertion. */
425 BOOL has_skip_in_assert_back
;
426 /* Currently in recurse or negative assert. */
428 /* Currently in a positive assert. */
429 BOOL positive_assert
;
430 /* Newline control. */
438 /* Dollar endonly. */
442 /* Named capturing brackets. */
443 pcre_uchar
*name_table
;
445 sljit_sw name_entry_size
;
447 /* Labels and jump lists. */
448 struct sljit_label
*partialmatchlabel
;
449 struct sljit_label
*quit_label
;
450 struct sljit_label
*forced_quit_label
;
451 struct sljit_label
*accept_label
;
452 struct sljit_label
*ff_newline_shortcut
;
454 label_addr_list
*label_addrs
;
455 recurse_entry
*entries
;
456 recurse_entry
*currententry
;
457 jump_list
*partialmatch
;
459 jump_list
*positive_assert_quit
;
460 jump_list
*forced_quit
;
462 jump_list
*calllimit
;
463 jump_list
*stackalloc
;
464 jump_list
*revertframes
;
465 jump_list
*wordboundary
;
466 jump_list
*anynewline
;
469 jump_list
*casefulcmp
;
470 jump_list
*caselesscmp
;
471 jump_list
*reset_match
;
480 jump_list
*utfreadchar
;
481 jump_list
*utfreadchar16
;
482 jump_list
*utfreadtype8
;
484 #endif /* SUPPORT_UTF */
487 /* For byte_sequence_compare. */
489 typedef struct compare_context
{
492 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
497 #if defined COMPILE_PCRE8
499 sljit_u8 asuchars
[4];
500 #elif defined COMPILE_PCRE16
501 sljit_u16 asuchars
[2];
502 #elif defined COMPILE_PCRE32
503 sljit_u32 asuchars
[1];
509 #if defined COMPILE_PCRE8
511 sljit_u8 asuchars
[4];
512 #elif defined COMPILE_PCRE16
513 sljit_u16 asuchars
[2];
514 #elif defined COMPILE_PCRE32
515 sljit_u32 asuchars
[1];
521 /* Undefine sljit macros. */
524 /* Used for accessing the elements of the stack. */
525 #define STACK(i) ((i) * (int)sizeof(sljit_sw))
527 #ifdef SLJIT_PREF_SHIFT_REG
528 #if SLJIT_PREF_SHIFT_REG == SLJIT_R2
530 #elif SLJIT_PREF_SHIFT_REG == SLJIT_R3
531 #define SHIFT_REG_IS_R3
533 #error "Unsupported shift register"
537 #define TMP1 SLJIT_R0
538 #ifdef SHIFT_REG_IS_R3
539 #define TMP2 SLJIT_R3
540 #define TMP3 SLJIT_R2
542 #define TMP2 SLJIT_R2
543 #define TMP3 SLJIT_R3
545 #define STR_PTR SLJIT_S0
546 #define STR_END SLJIT_S1
547 #define STACK_TOP SLJIT_R1
548 #define STACK_LIMIT SLJIT_S2
549 #define COUNT_MATCH SLJIT_S3
550 #define ARGUMENTS SLJIT_S4
551 #define RETURN_ADDR SLJIT_R4
553 /* Local space layout. */
554 /* These two locals can be used by the current opcode. */
555 #define LOCALS0 (0 * sizeof(sljit_sw))
556 #define LOCALS1 (1 * sizeof(sljit_sw))
557 /* Two local variables for possessive quantifiers (char1 cannot use them). */
558 #define POSSESSIVE0 (2 * sizeof(sljit_sw))
559 #define POSSESSIVE1 (3 * sizeof(sljit_sw))
560 /* Max limit of recursions. */
561 #define LIMIT_MATCH (4 * sizeof(sljit_sw))
562 /* The output vector is stored on the stack, and contains pointers
563 to characters. The vector data is divided into two groups: the first
564 group contains the start / end character pointers, and the second is
565 the start pointers when the end of the capturing group has not yet reached. */
566 #define OVECTOR_START (common->ovector_start)
567 #define OVECTOR(i) (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
568 #define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
569 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
571 #if defined COMPILE_PCRE8
572 #define MOV_UCHAR SLJIT_MOV_U8
573 #elif defined COMPILE_PCRE16
574 #define MOV_UCHAR SLJIT_MOV_U16
575 #elif defined COMPILE_PCRE32
576 #define MOV_UCHAR SLJIT_MOV_U32
578 #error Unsupported compiling mode
582 #define DEFINE_COMPILER \
583 struct sljit_compiler *compiler = common->compiler
584 #define OP1(op, dst, dstw, src, srcw) \
585 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
586 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
587 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
589 sljit_emit_label(compiler)
591 sljit_emit_jump(compiler, (type))
592 #define JUMPTO(type, label) \
593 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
594 #define JUMPHERE(jump) \
595 sljit_set_label((jump), sljit_emit_label(compiler))
596 #define SET_LABEL(jump, label) \
597 sljit_set_label((jump), (label))
598 #define CMP(type, src1, src1w, src2, src2w) \
599 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
600 #define CMPTO(type, src1, src1w, src2, src2w, label) \
601 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
602 #define OP_FLAGS(op, dst, dstw, type) \
603 sljit_emit_op_flags(compiler, (op), (dst), (dstw), (type))
604 #define GET_LOCAL_BASE(dst, dstw, offset) \
605 sljit_get_local_base(compiler, (dst), (dstw), (offset))
607 #define READ_CHAR_MAX 0x7fffffff
609 #define INVALID_UTF_CHAR 888
611 static pcre_uchar
*bracketend(pcre_uchar
*cc
)
613 SLJIT_ASSERT((*cc
>= OP_ASSERT
&& *cc
<= OP_ASSERTBACK_NOT
) || (*cc
>= OP_ONCE
&& *cc
<= OP_SCOND
));
614 do cc
+= GET(cc
, 1); while (*cc
== OP_ALT
);
615 SLJIT_ASSERT(*cc
>= OP_KET
&& *cc
<= OP_KETRPOS
);
620 static int no_alternatives(pcre_uchar
*cc
)
623 SLJIT_ASSERT((*cc
>= OP_ASSERT
&& *cc
<= OP_ASSERTBACK_NOT
) || (*cc
>= OP_ONCE
&& *cc
<= OP_SCOND
));
629 while (*cc
== OP_ALT
);
630 SLJIT_ASSERT(*cc
>= OP_KET
&& *cc
<= OP_KETRPOS
);
634 /* Functions whose might need modification for all new supported opcodes:
637 set_private_data_ptrs
640 get_private_data_copy_length
643 compile_backtrackingpath
646 static pcre_uchar
*next_opcode(compiler_common
*common
, pcre_uchar
*cc
)
648 SLJIT_UNUSED_ARG(common
);
654 case OP_NOT_WORD_BOUNDARY
:
655 case OP_WORD_BOUNDARY
:
658 case OP_NOT_WHITESPACE
:
660 case OP_NOT_WORDCHAR
:
707 case OP_ASSERTBACK_NOT
:
734 case OP_ASSERT_ACCEPT
:
737 return cc
+ PRIV(OP_lengths
)[*cc
];
787 case OP_NOTMINQUERYI
:
793 case OP_NOTPOSQUERYI
:
795 cc
+= PRIV(OP_lengths
)[*cc
];
797 if (common
->utf
&& HAS_EXTRALEN(cc
[-1])) cc
+= GET_EXTRALEN(cc
[-1]);
807 case OP_TYPEMINQUERY
:
813 case OP_TYPEPOSQUERY
:
815 return cc
+ PRIV(OP_lengths
)[*cc
] - 1;
819 if (common
->utf
) return NULL
;
823 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
825 return cc
+ GET(cc
, 1);
832 return cc
+ 1 + 2 + cc
[1];
835 /* All opcodes are supported now! */
841 static BOOL
check_opcode_types(compiler_common
*common
, pcre_uchar
*cc
, pcre_uchar
*ccend
)
845 pcre_uchar
*assert_back_end
= cc
- 1;
847 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
853 common
->has_set_som
= TRUE
;
854 common
->might_be_empty
= TRUE
;
860 common
->optimized_cbracket
[GET2(cc
, 1)] = 0;
866 common
->optimized_cbracket
[GET2(cc
, 1 + LINK_SIZE
)] = 0;
867 cc
+= 1 + LINK_SIZE
+ IMM2_SIZE
;
872 /* Only AUTO_CALLOUT can insert this opcode. We do
873 not intend to support this case. */
874 if (cc
[1 + LINK_SIZE
] == OP_CALLOUT
)
880 common
->optimized_cbracket
[GET2(cc
, 1)] = 0;
887 count
= GET2(cc
, 1 + IMM2_SIZE
);
888 slot
= common
->name_table
+ GET2(cc
, 1) * common
->name_entry_size
;
891 common
->optimized_cbracket
[GET2(slot
, 0)] = 0;
892 slot
+= common
->name_entry_size
;
894 cc
+= 1 + 2 * IMM2_SIZE
;
898 /* Set its value only once. */
899 if (common
->recursive_head_ptr
== 0)
901 common
->recursive_head_ptr
= common
->ovector_start
;
902 common
->ovector_start
+= sizeof(sljit_sw
);
908 if (common
->capture_last_ptr
== 0)
910 common
->capture_last_ptr
= common
->ovector_start
;
911 common
->ovector_start
+= sizeof(sljit_sw
);
913 cc
+= 2 + 2 * LINK_SIZE
;
917 slot
= bracketend(cc
);
918 if (slot
> assert_back_end
)
919 assert_back_end
= slot
;
924 common
->has_then
= TRUE
;
925 common
->control_head_ptr
= 1;
930 if (common
->mark_ptr
== 0)
932 common
->mark_ptr
= common
->ovector_start
;
933 common
->ovector_start
+= sizeof(sljit_sw
);
939 common
->has_then
= TRUE
;
940 common
->control_head_ptr
= 1;
945 if (cc
< assert_back_end
)
946 common
->has_skip_in_assert_back
= TRUE
;
951 common
->control_head_ptr
= 1;
952 common
->has_skip_arg
= TRUE
;
953 if (cc
< assert_back_end
)
954 common
->has_skip_in_assert_back
= TRUE
;
959 cc
= next_opcode(common
, cc
);
968 static BOOL
is_accelerated_repeat(pcre_uchar
*cc
)
978 return (cc
[1] != OP_ANYNL
&& cc
[1] != OP_EXTUNI
);
1002 case OP_NOTMINSTARI
:
1004 case OP_NOTMINPLUSI
:
1005 case OP_NOTPOSSTARI
:
1006 case OP_NOTPOSPLUSI
:
1011 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1013 cc
+= (*cc
== OP_XCLASS
) ? GET(cc
, 1) : (int)(1 + (32 / sizeof(pcre_uchar
)));
1015 cc
+= (1 + (32 / sizeof(pcre_uchar
)));
1033 static SLJIT_INLINE BOOL
detect_fast_forward_skip(compiler_common
*common
, int *private_data_start
)
1035 pcre_uchar
*cc
= common
->start
;
1038 /* Skip not repeated brackets. */
1046 case OP_NOT_WORD_BOUNDARY
:
1047 case OP_WORD_BOUNDARY
:
1054 /* Zero width assertions. */
1059 if (*cc
!= OP_BRA
&& *cc
!= OP_CBRA
)
1062 end
= cc
+ GET(cc
, 1);
1063 if (*end
!= OP_KET
|| PRIVATE_DATA(end
) != 0)
1067 if (common
->optimized_cbracket
[GET2(cc
, 1 + LINK_SIZE
)] == 0)
1071 cc
+= 1 + LINK_SIZE
;
1074 if (is_accelerated_repeat(cc
))
1076 common
->fast_forward_bc_ptr
= cc
;
1077 common
->private_data_ptrs
[(cc
+ 1) - common
->start
] = *private_data_start
;
1078 *private_data_start
+= sizeof(sljit_sw
);
1084 static SLJIT_INLINE
void detect_fast_fail(compiler_common
*common
, pcre_uchar
*cc
, int *private_data_start
, sljit_s32 depth
)
1086 pcre_uchar
*next_alt
;
1088 SLJIT_ASSERT(*cc
== OP_BRA
|| *cc
== OP_CBRA
);
1090 if (*cc
== OP_CBRA
&& common
->optimized_cbracket
[GET2(cc
, 1 + LINK_SIZE
)] == 0)
1093 next_alt
= bracketend(cc
) - (1 + LINK_SIZE
);
1094 if (*next_alt
!= OP_KET
|| PRIVATE_DATA(next_alt
) != 0)
1099 next_alt
= cc
+ GET(cc
, 1);
1101 cc
+= 1 + LINK_SIZE
+ ((*cc
== OP_CBRA
) ? IMM2_SIZE
: 0);
1110 case OP_NOT_WORD_BOUNDARY
:
1111 case OP_WORD_BOUNDARY
:
1118 /* Zero width assertions. */
1125 if (depth
> 0 && (*cc
== OP_BRA
|| *cc
== OP_CBRA
))
1126 detect_fast_fail(common
, cc
, private_data_start
, depth
- 1);
1128 if (is_accelerated_repeat(cc
))
1130 common
->private_data_ptrs
[(cc
+ 1) - common
->start
] = *private_data_start
;
1132 if (common
->fast_fail_start_ptr
== 0)
1133 common
->fast_fail_start_ptr
= *private_data_start
;
1135 *private_data_start
+= sizeof(sljit_sw
);
1136 common
->fast_fail_end_ptr
= *private_data_start
;
1138 if (*private_data_start
> SLJIT_MAX_LOCAL_SIZE
)
1144 while (*cc
== OP_ALT
);
1147 static int get_class_iterator_size(pcre_uchar
*cc
)
1166 max
= GET2(cc
, 1 + IMM2_SIZE
);
1168 return (*cc
== OP_CRRANGE
) ? 2 : 1;
1179 static BOOL
detect_repeat(compiler_common
*common
, pcre_uchar
*begin
)
1181 pcre_uchar
*end
= bracketend(begin
);
1183 pcre_uchar
*next_end
;
1184 pcre_uchar
*max_end
;
1186 sljit_sw length
= end
- begin
;
1189 /* Detect fixed iterations first. */
1190 if (end
[-(1 + LINK_SIZE
)] != OP_KET
)
1193 /* Already detected repeat. */
1194 if (common
->private_data_ptrs
[end
- common
->start
- LINK_SIZE
] != 0)
1201 if (*next
!= *begin
)
1203 next_end
= bracketend(next
);
1204 if (next_end
- next
!= length
|| memcmp(begin
, next
, IN_UCHARS(length
)) != 0)
1215 if (*next
== OP_BRAZERO
|| *next
== OP_BRAMINZERO
)
1220 if (next
[0] != type
|| next
[1] != OP_BRA
|| next
[2 + LINK_SIZE
] != *begin
)
1222 next_end
= bracketend(next
+ 2 + LINK_SIZE
);
1223 if (next_end
- next
!= (length
+ 2 + LINK_SIZE
) || memcmp(begin
, next
+ 2 + LINK_SIZE
, IN_UCHARS(length
)) != 0)
1229 if (next
[0] == type
&& next
[1] == *begin
&& max
>= 1)
1231 next_end
= bracketend(next
+ 1);
1232 if (next_end
- next
== (length
+ 1) && memcmp(begin
, next
+ 1, IN_UCHARS(length
)) == 0)
1234 for (i
= 0; i
< max
; i
++, next_end
+= 1 + LINK_SIZE
)
1235 if (*next_end
!= OP_KET
)
1240 common
->private_data_ptrs
[max_end
- common
->start
- LINK_SIZE
] = next_end
- max_end
;
1241 common
->private_data_ptrs
[max_end
- common
->start
- LINK_SIZE
+ 1] = (type
== OP_BRAZERO
) ? OP_UPTO
: OP_MINUPTO
;
1242 /* +2 the original and the last. */
1243 common
->private_data_ptrs
[max_end
- common
->start
- LINK_SIZE
+ 2] = max
+ 2;
1247 max_end
-= (1 + LINK_SIZE
) + GET(max_end
, -LINK_SIZE
);
1255 common
->private_data_ptrs
[end
- common
->start
- LINK_SIZE
] = max_end
- end
;
1256 common
->private_data_ptrs
[end
- common
->start
- LINK_SIZE
+ 1] = OP_EXACT
;
1257 common
->private_data_ptrs
[end
- common
->start
- LINK_SIZE
+ 2] = min
;
1264 #define CASE_ITERATOR_PRIVATE_DATA_1 \
1272 case OP_MINQUERYI: \
1273 case OP_NOTMINSTAR: \
1274 case OP_NOTMINPLUS: \
1276 case OP_NOTMINQUERY: \
1277 case OP_NOTMINSTARI: \
1278 case OP_NOTMINPLUSI: \
1279 case OP_NOTQUERYI: \
1280 case OP_NOTMINQUERYI:
1282 #define CASE_ITERATOR_PRIVATE_DATA_2A \
1292 #define CASE_ITERATOR_PRIVATE_DATA_2B \
1298 case OP_NOTMINUPTO: \
1300 case OP_NOTMINUPTOI:
1302 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1303 case OP_TYPEMINSTAR: \
1304 case OP_TYPEMINPLUS: \
1305 case OP_TYPEQUERY: \
1306 case OP_TYPEMINQUERY:
1308 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1312 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1314 case OP_TYPEMINUPTO:
1316 static void set_private_data_ptrs(compiler_common
*common
, int *private_data_start
, pcre_uchar
*ccend
)
1318 pcre_uchar
*cc
= common
->start
;
1319 pcre_uchar
*alternative
;
1320 pcre_uchar
*end
= NULL
;
1321 int private_data_ptr
= *private_data_start
;
1322 int space
, size
, bracketlen
;
1323 BOOL repeat_check
= TRUE
;
1330 if (private_data_ptr
> SLJIT_MAX_LOCAL_SIZE
)
1333 if (repeat_check
&& (*cc
== OP_ONCE
|| *cc
== OP_ONCE_NC
|| *cc
== OP_BRA
|| *cc
== OP_CBRA
|| *cc
== OP_COND
))
1335 if (detect_repeat(common
, cc
))
1337 /* These brackets are converted to repeats, so no global
1338 based single character repeat is allowed. */
1340 end
= bracketend(cc
);
1343 repeat_check
= TRUE
;
1348 if (common
->private_data_ptrs
[cc
+ 1 - common
->start
] != 0)
1350 common
->private_data_ptrs
[cc
- common
->start
] = private_data_ptr
;
1351 private_data_ptr
+= sizeof(sljit_sw
);
1352 cc
+= common
->private_data_ptrs
[cc
+ 1 - common
->start
];
1354 cc
+= 1 + LINK_SIZE
;
1360 case OP_ASSERTBACK_NOT
:
1367 common
->private_data_ptrs
[cc
- common
->start
] = private_data_ptr
;
1368 private_data_ptr
+= sizeof(sljit_sw
);
1369 bracketlen
= 1 + LINK_SIZE
;
1374 common
->private_data_ptrs
[cc
- common
->start
] = private_data_ptr
;
1375 private_data_ptr
+= sizeof(sljit_sw
);
1376 bracketlen
= 1 + LINK_SIZE
+ IMM2_SIZE
;
1380 /* Might be a hidden SCOND. */
1381 alternative
= cc
+ GET(cc
, 1);
1382 if (*alternative
== OP_KETRMAX
|| *alternative
== OP_KETRMIN
)
1384 common
->private_data_ptrs
[cc
- common
->start
] = private_data_ptr
;
1385 private_data_ptr
+= sizeof(sljit_sw
);
1387 bracketlen
= 1 + LINK_SIZE
;
1391 bracketlen
= 1 + LINK_SIZE
;
1396 bracketlen
= 1 + LINK_SIZE
+ IMM2_SIZE
;
1402 repeat_check
= FALSE
;
1406 CASE_ITERATOR_PRIVATE_DATA_1
1411 CASE_ITERATOR_PRIVATE_DATA_2A
1416 CASE_ITERATOR_PRIVATE_DATA_2B
1418 size
= -(2 + IMM2_SIZE
);
1421 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1426 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1427 if (cc
[1] != OP_ANYNL
&& cc
[1] != OP_EXTUNI
)
1433 if (cc
[1 + IMM2_SIZE
] != OP_ANYNL
&& cc
[1 + IMM2_SIZE
] != OP_EXTUNI
)
1435 size
= 1 + IMM2_SIZE
;
1438 case OP_TYPEMINUPTO
:
1440 size
= 1 + IMM2_SIZE
;
1445 space
= get_class_iterator_size(cc
+ size
);
1446 size
= 1 + 32 / sizeof(pcre_uchar
);
1449 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1451 space
= get_class_iterator_size(cc
+ size
);
1457 cc
= next_opcode(common
, cc
);
1458 SLJIT_ASSERT(cc
!= NULL
);
1462 /* Character iterators, which are not inside a repeated bracket,
1463 gets a private slot instead of allocating it on the stack. */
1464 if (space
> 0 && cc
>= end
)
1466 common
->private_data_ptrs
[cc
- common
->start
] = private_data_ptr
;
1467 private_data_ptr
+= sizeof(sljit_sw
) * space
;
1476 if (common
->utf
&& HAS_EXTRALEN(cc
[-1])) cc
+= GET_EXTRALEN(cc
[-1]);
1487 end
= bracketend(cc
);
1488 if (end
[-1 - LINK_SIZE
] == OP_KET
)
1494 *private_data_start
= private_data_ptr
;
1497 /* Returns with a frame_types (always < 0) if no need for frame. */
1498 static int get_framesize(compiler_common
*common
, pcre_uchar
*cc
, pcre_uchar
*ccend
, BOOL recursive
, BOOL
*needs_control_head
)
1502 BOOL stack_restore
= FALSE
;
1503 BOOL setsom_found
= recursive
;
1504 BOOL setmark_found
= recursive
;
1505 /* The last capture is a local variable even for recursions. */
1506 BOOL capture_last_found
= FALSE
;
1508 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1509 SLJIT_ASSERT(common
->control_head_ptr
!= 0);
1510 *needs_control_head
= TRUE
;
1512 *needs_control_head
= FALSE
;
1517 ccend
= bracketend(cc
) - (1 + LINK_SIZE
);
1518 if (!recursive
&& (*cc
== OP_CBRAPOS
|| *cc
== OP_SCBRAPOS
))
1520 possessive
= length
= (common
->capture_last_ptr
!= 0) ? 5 : 3;
1521 /* This is correct regardless of common->capture_last_ptr. */
1522 capture_last_found
= TRUE
;
1524 cc
= next_opcode(common
, cc
);
1527 SLJIT_ASSERT(cc
!= NULL
);
1532 SLJIT_ASSERT(common
->has_set_som
);
1533 stack_restore
= TRUE
;
1537 setsom_found
= TRUE
;
1545 SLJIT_ASSERT(common
->mark_ptr
!= 0);
1546 stack_restore
= TRUE
;
1550 setmark_found
= TRUE
;
1552 if (common
->control_head_ptr
!= 0)
1553 *needs_control_head
= TRUE
;
1554 cc
+= 1 + 2 + cc
[1];
1558 stack_restore
= TRUE
;
1559 if (common
->has_set_som
&& !setsom_found
)
1562 setsom_found
= TRUE
;
1564 if (common
->mark_ptr
!= 0 && !setmark_found
)
1567 setmark_found
= TRUE
;
1569 if (common
->capture_last_ptr
!= 0 && !capture_last_found
)
1572 capture_last_found
= TRUE
;
1574 cc
+= 1 + LINK_SIZE
;
1581 stack_restore
= TRUE
;
1582 if (common
->capture_last_ptr
!= 0 && !capture_last_found
)
1585 capture_last_found
= TRUE
;
1588 cc
+= 1 + LINK_SIZE
+ IMM2_SIZE
;
1592 stack_restore
= TRUE
;
1593 if (common
->control_head_ptr
!= 0)
1594 *needs_control_head
= TRUE
;
1599 stack_restore
= TRUE
;
1602 case OP_NOT_WORD_BOUNDARY
:
1603 case OP_WORD_BOUNDARY
:
1606 case OP_NOT_WHITESPACE
:
1608 case OP_NOT_WORDCHAR
:
1647 case OP_NOTPOSQUERY
:
1651 case OP_NOTPOSSTARI
:
1652 case OP_NOTPOSPLUSI
:
1653 case OP_NOTPOSQUERYI
:
1654 case OP_NOTPOSUPTOI
:
1657 case OP_TYPEPOSSTAR
:
1658 case OP_TYPEPOSPLUS
:
1659 case OP_TYPEPOSQUERY
:
1660 case OP_TYPEPOSUPTO
:
1667 cc
= next_opcode(common
, cc
);
1668 SLJIT_ASSERT(cc
!= NULL
);
1672 /* Possessive quantifiers can use a special case. */
1673 if (SLJIT_UNLIKELY(possessive
== length
))
1674 return stack_restore
? no_frame
: no_stack
;
1678 return stack_restore
? no_frame
: no_stack
;
1681 static void init_frame(compiler_common
*common
, pcre_uchar
*cc
, pcre_uchar
*ccend
, int stackpos
, int stacktop
, BOOL recursive
)
1684 BOOL setsom_found
= recursive
;
1685 BOOL setmark_found
= recursive
;
1686 /* The last capture is a local variable even for recursions. */
1687 BOOL capture_last_found
= FALSE
;
1690 /* >= 1 + shortest item size (2) */
1691 SLJIT_UNUSED_ARG(stacktop
);
1692 SLJIT_ASSERT(stackpos
>= stacktop
+ 2);
1694 stackpos
= STACK(stackpos
);
1697 ccend
= bracketend(cc
) - (1 + LINK_SIZE
);
1698 if (recursive
|| (*cc
!= OP_CBRAPOS
&& *cc
!= OP_SCBRAPOS
))
1699 cc
= next_opcode(common
, cc
);
1702 SLJIT_ASSERT(cc
!= NULL
);
1707 SLJIT_ASSERT(common
->has_set_som
);
1710 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(SLJIT_SP
), OVECTOR(0));
1711 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), stackpos
, SLJIT_IMM
, -OVECTOR(0));
1712 stackpos
-= (int)sizeof(sljit_sw
);
1713 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), stackpos
, TMP1
, 0);
1714 stackpos
-= (int)sizeof(sljit_sw
);
1715 setsom_found
= TRUE
;
1723 SLJIT_ASSERT(common
->mark_ptr
!= 0);
1726 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(SLJIT_SP
), common
->mark_ptr
);
1727 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), stackpos
, SLJIT_IMM
, -common
->mark_ptr
);
1728 stackpos
-= (int)sizeof(sljit_sw
);
1729 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), stackpos
, TMP1
, 0);
1730 stackpos
-= (int)sizeof(sljit_sw
);
1731 setmark_found
= TRUE
;
1733 cc
+= 1 + 2 + cc
[1];
1737 if (common
->has_set_som
&& !setsom_found
)
1739 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(SLJIT_SP
), OVECTOR(0));
1740 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), stackpos
, SLJIT_IMM
, -OVECTOR(0));
1741 stackpos
-= (int)sizeof(sljit_sw
);
1742 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), stackpos
, TMP1
, 0);
1743 stackpos
-= (int)sizeof(sljit_sw
);
1744 setsom_found
= TRUE
;
1746 if (common
->mark_ptr
!= 0 && !setmark_found
)
1748 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(SLJIT_SP
), common
->mark_ptr
);
1749 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), stackpos
, SLJIT_IMM
, -common
->mark_ptr
);
1750 stackpos
-= (int)sizeof(sljit_sw
);
1751 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), stackpos
, TMP1
, 0);
1752 stackpos
-= (int)sizeof(sljit_sw
);
1753 setmark_found
= TRUE
;
1755 if (common
->capture_last_ptr
!= 0 && !capture_last_found
)
1757 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(SLJIT_SP
), common
->capture_last_ptr
);
1758 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), stackpos
, SLJIT_IMM
, -common
->capture_last_ptr
);
1759 stackpos
-= (int)sizeof(sljit_sw
);
1760 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), stackpos
, TMP1
, 0);
1761 stackpos
-= (int)sizeof(sljit_sw
);
1762 capture_last_found
= TRUE
;
1764 cc
+= 1 + LINK_SIZE
;
1771 if (common
->capture_last_ptr
!= 0 && !capture_last_found
)
1773 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(SLJIT_SP
), common
->capture_last_ptr
);
1774 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), stackpos
, SLJIT_IMM
, -common
->capture_last_ptr
);
1775 stackpos
-= (int)sizeof(sljit_sw
);
1776 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), stackpos
, TMP1
, 0);
1777 stackpos
-= (int)sizeof(sljit_sw
);
1778 capture_last_found
= TRUE
;
1780 offset
= (GET2(cc
, 1 + LINK_SIZE
)) << 1;
1781 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), stackpos
, SLJIT_IMM
, OVECTOR(offset
));
1782 stackpos
-= (int)sizeof(sljit_sw
);
1783 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(SLJIT_SP
), OVECTOR(offset
));
1784 OP1(SLJIT_MOV
, TMP2
, 0, SLJIT_MEM1(SLJIT_SP
), OVECTOR(offset
+ 1));
1785 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), stackpos
, TMP1
, 0);
1786 stackpos
-= (int)sizeof(sljit_sw
);
1787 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), stackpos
, TMP2
, 0);
1788 stackpos
-= (int)sizeof(sljit_sw
);
1790 cc
+= 1 + LINK_SIZE
+ IMM2_SIZE
;
1794 cc
= next_opcode(common
, cc
);
1795 SLJIT_ASSERT(cc
!= NULL
);
1799 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), stackpos
, SLJIT_IMM
, 0);
1800 SLJIT_ASSERT(stackpos
== STACK(stacktop
));
1803 static SLJIT_INLINE
int get_private_data_copy_length(compiler_common
*common
, pcre_uchar
*cc
, pcre_uchar
*ccend
, BOOL needs_control_head
)
1805 int private_data_length
= needs_control_head
? 3 : 2;
1807 pcre_uchar
*alternative
;
1808 /* Calculate the sum of the private machine words. */
1815 if (PRIVATE_DATA(cc
) != 0)
1817 private_data_length
++;
1818 SLJIT_ASSERT(PRIVATE_DATA(cc
+ 1) != 0);
1819 cc
+= PRIVATE_DATA(cc
+ 1);
1821 cc
+= 1 + LINK_SIZE
;
1827 case OP_ASSERTBACK_NOT
:
1834 private_data_length
++;
1835 SLJIT_ASSERT(PRIVATE_DATA(cc
) != 0);
1836 cc
+= 1 + LINK_SIZE
;
1841 if (common
->optimized_cbracket
[GET2(cc
, 1 + LINK_SIZE
)] == 0)
1842 private_data_length
++;
1843 cc
+= 1 + LINK_SIZE
+ IMM2_SIZE
;
1848 private_data_length
+= 2;
1849 cc
+= 1 + LINK_SIZE
+ IMM2_SIZE
;
1853 /* Might be a hidden SCOND. */
1854 alternative
= cc
+ GET(cc
, 1);
1855 if (*alternative
== OP_KETRMAX
|| *alternative
== OP_KETRMIN
)
1856 private_data_length
++;
1857 cc
+= 1 + LINK_SIZE
;
1860 CASE_ITERATOR_PRIVATE_DATA_1
1861 if (PRIVATE_DATA(cc
))
1862 private_data_length
++;
1865 if (common
->utf
&& HAS_EXTRALEN(cc
[-1])) cc
+= GET_EXTRALEN(cc
[-1]);
1869 CASE_ITERATOR_PRIVATE_DATA_2A
1870 if (PRIVATE_DATA(cc
))
1871 private_data_length
+= 2;
1874 if (common
->utf
&& HAS_EXTRALEN(cc
[-1])) cc
+= GET_EXTRALEN(cc
[-1]);
1878 CASE_ITERATOR_PRIVATE_DATA_2B
1879 if (PRIVATE_DATA(cc
))
1880 private_data_length
+= 2;
1881 cc
+= 2 + IMM2_SIZE
;
1883 if (common
->utf
&& HAS_EXTRALEN(cc
[-1])) cc
+= GET_EXTRALEN(cc
[-1]);
1887 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1888 if (PRIVATE_DATA(cc
))
1889 private_data_length
++;
1893 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1894 if (PRIVATE_DATA(cc
))
1895 private_data_length
+= 2;
1899 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1900 if (PRIVATE_DATA(cc
))
1901 private_data_length
+= 2;
1902 cc
+= 1 + IMM2_SIZE
;
1907 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1909 size
= (*cc
== OP_XCLASS
) ? GET(cc
, 1) : 1 + 32 / (int)sizeof(pcre_uchar
);
1911 size
= 1 + 32 / (int)sizeof(pcre_uchar
);
1913 if (PRIVATE_DATA(cc
))
1914 private_data_length
+= get_class_iterator_size(cc
+ size
);
1919 cc
= next_opcode(common
, cc
);
1920 SLJIT_ASSERT(cc
!= NULL
);
1924 SLJIT_ASSERT(cc
== ccend
);
1925 return private_data_length
;
1928 static void copy_private_data(compiler_common
*common
, pcre_uchar
*cc
, pcre_uchar
*ccend
,
1929 BOOL save
, int stackptr
, int stacktop
, BOOL needs_control_head
)
1934 BOOL tmp1next
= TRUE
;
1935 BOOL tmp1empty
= TRUE
;
1936 BOOL tmp2empty
= TRUE
;
1937 pcre_uchar
*alternative
;
1944 stackptr
= STACK(stackptr
);
1945 stacktop
= STACK(stacktop
- 1);
1949 stacktop
-= (needs_control_head
? 2 : 1) * sizeof(sljit_sw
);
1950 if (stackptr
< stacktop
)
1952 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(STACK_TOP
), stackptr
);
1953 stackptr
+= sizeof(sljit_sw
);
1956 if (stackptr
< stacktop
)
1958 OP1(SLJIT_MOV
, TMP2
, 0, SLJIT_MEM1(STACK_TOP
), stackptr
);
1959 stackptr
+= sizeof(sljit_sw
);
1962 /* The tmp1next must be TRUE in either way. */
1965 SLJIT_ASSERT(common
->recursive_head_ptr
!= 0);
1976 srcw
[0] = common
->recursive_head_ptr
;
1977 if (needs_control_head
)
1979 SLJIT_ASSERT(common
->control_head_ptr
!= 0);
1981 srcw
[0] = common
->control_head_ptr
;
1982 srcw
[1] = common
->recursive_head_ptr
;
1989 if (PRIVATE_DATA(cc
) != 0)
1992 srcw
[0] = PRIVATE_DATA(cc
);
1993 SLJIT_ASSERT(PRIVATE_DATA(cc
+ 1) != 0);
1994 cc
+= PRIVATE_DATA(cc
+ 1);
1996 cc
+= 1 + LINK_SIZE
;
2002 case OP_ASSERTBACK_NOT
:
2010 srcw
[0] = PRIVATE_DATA(cc
);
2011 SLJIT_ASSERT(srcw
[0] != 0);
2012 cc
+= 1 + LINK_SIZE
;
2017 if (common
->optimized_cbracket
[GET2(cc
, 1 + LINK_SIZE
)] == 0)
2020 srcw
[0] = OVECTOR_PRIV(GET2(cc
, 1 + LINK_SIZE
));
2022 cc
+= 1 + LINK_SIZE
+ IMM2_SIZE
;
2028 srcw
[0] = PRIVATE_DATA(cc
);
2029 srcw
[1] = OVECTOR_PRIV(GET2(cc
, 1 + LINK_SIZE
));
2030 SLJIT_ASSERT(srcw
[0] != 0 && srcw
[1] != 0);
2031 cc
+= 1 + LINK_SIZE
+ IMM2_SIZE
;
2035 /* Might be a hidden SCOND. */
2036 alternative
= cc
+ GET(cc
, 1);
2037 if (*alternative
== OP_KETRMAX
|| *alternative
== OP_KETRMIN
)
2040 srcw
[0] = PRIVATE_DATA(cc
);
2041 SLJIT_ASSERT(srcw
[0] != 0);
2043 cc
+= 1 + LINK_SIZE
;
2046 CASE_ITERATOR_PRIVATE_DATA_1
2047 if (PRIVATE_DATA(cc
))
2050 srcw
[0] = PRIVATE_DATA(cc
);
2054 if (common
->utf
&& HAS_EXTRALEN(cc
[-1])) cc
+= GET_EXTRALEN(cc
[-1]);
2058 CASE_ITERATOR_PRIVATE_DATA_2A
2059 if (PRIVATE_DATA(cc
))
2062 srcw
[0] = PRIVATE_DATA(cc
);
2063 srcw
[1] = PRIVATE_DATA(cc
) + sizeof(sljit_sw
);
2067 if (common
->utf
&& HAS_EXTRALEN(cc
[-1])) cc
+= GET_EXTRALEN(cc
[-1]);
2071 CASE_ITERATOR_PRIVATE_DATA_2B
2072 if (PRIVATE_DATA(cc
))
2075 srcw
[0] = PRIVATE_DATA(cc
);
2076 srcw
[1] = PRIVATE_DATA(cc
) + sizeof(sljit_sw
);
2078 cc
+= 2 + IMM2_SIZE
;
2080 if (common
->utf
&& HAS_EXTRALEN(cc
[-1])) cc
+= GET_EXTRALEN(cc
[-1]);
2084 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2085 if (PRIVATE_DATA(cc
))
2088 srcw
[0] = PRIVATE_DATA(cc
);
2093 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2094 if (PRIVATE_DATA(cc
))
2097 srcw
[0] = PRIVATE_DATA(cc
);
2098 srcw
[1] = srcw
[0] + sizeof(sljit_sw
);
2103 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2104 if (PRIVATE_DATA(cc
))
2107 srcw
[0] = PRIVATE_DATA(cc
);
2108 srcw
[1] = srcw
[0] + sizeof(sljit_sw
);
2110 cc
+= 1 + IMM2_SIZE
;
2115 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2117 size
= (*cc
== OP_XCLASS
) ? GET(cc
, 1) : 1 + 32 / (int)sizeof(pcre_uchar
);
2119 size
= 1 + 32 / (int)sizeof(pcre_uchar
);
2121 if (PRIVATE_DATA(cc
))
2122 switch(get_class_iterator_size(cc
+ size
))
2126 srcw
[0] = PRIVATE_DATA(cc
);
2131 srcw
[0] = PRIVATE_DATA(cc
);
2132 srcw
[1] = srcw
[0] + sizeof(sljit_sw
);
2136 SLJIT_UNREACHABLE();
2143 cc
= next_opcode(common
, cc
);
2144 SLJIT_ASSERT(cc
!= NULL
);
2157 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), stackptr
, TMP1
, 0);
2158 stackptr
+= sizeof(sljit_sw
);
2160 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(SLJIT_SP
), srcw
[count
]);
2168 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), stackptr
, TMP2
, 0);
2169 stackptr
+= sizeof(sljit_sw
);
2171 OP1(SLJIT_MOV
, TMP2
, 0, SLJIT_MEM1(SLJIT_SP
), srcw
[count
]);
2180 SLJIT_ASSERT(!tmp1empty
);
2181 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), srcw
[count
], TMP1
, 0);
2182 tmp1empty
= stackptr
>= stacktop
;
2185 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(STACK_TOP
), stackptr
);
2186 stackptr
+= sizeof(sljit_sw
);
2192 SLJIT_ASSERT(!tmp2empty
);
2193 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), srcw
[count
], TMP2
, 0);
2194 tmp2empty
= stackptr
>= stacktop
;
2197 OP1(SLJIT_MOV
, TMP2
, 0, SLJIT_MEM1(STACK_TOP
), stackptr
);
2198 stackptr
+= sizeof(sljit_sw
);
2205 while (status
!= end
);
2213 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), stackptr
, TMP1
, 0);
2214 stackptr
+= sizeof(sljit_sw
);
2218 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), stackptr
, TMP2
, 0);
2219 stackptr
+= sizeof(sljit_sw
);
2226 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), stackptr
, TMP2
, 0);
2227 stackptr
+= sizeof(sljit_sw
);
2231 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), stackptr
, TMP1
, 0);
2232 stackptr
+= sizeof(sljit_sw
);
2236 SLJIT_ASSERT(cc
== ccend
&& stackptr
== stacktop
&& (save
|| (tmp1empty
&& tmp2empty
)));
2239 static SLJIT_INLINE pcre_uchar
*set_then_offsets(compiler_common
*common
, pcre_uchar
*cc
, sljit_u8
*current_offset
)
2241 pcre_uchar
*end
= bracketend(cc
);
2242 BOOL has_alternatives
= cc
[GET(cc
, 1)] == OP_ALT
;
2244 /* Assert captures then. */
2245 if (*cc
>= OP_ASSERT
&& *cc
<= OP_ASSERTBACK_NOT
)
2246 current_offset
= NULL
;
2247 /* Conditional block does not. */
2248 if (*cc
== OP_COND
|| *cc
== OP_SCOND
)
2249 has_alternatives
= FALSE
;
2251 cc
= next_opcode(common
, cc
);
2252 if (has_alternatives
)
2253 current_offset
= common
->then_offsets
+ (cc
- common
->start
);
2257 if ((*cc
>= OP_ASSERT
&& *cc
<= OP_ASSERTBACK_NOT
) || (*cc
>= OP_ONCE
&& *cc
<= OP_SCOND
))
2258 cc
= set_then_offsets(common
, cc
, current_offset
);
2261 if (*cc
== OP_ALT
&& has_alternatives
)
2262 current_offset
= common
->then_offsets
+ (cc
+ 1 + LINK_SIZE
- common
->start
);
2263 if (*cc
>= OP_THEN
&& *cc
<= OP_THEN_ARG
&& current_offset
!= NULL
)
2264 *current_offset
= 1;
2265 cc
= next_opcode(common
, cc
);
2272 #undef CASE_ITERATOR_PRIVATE_DATA_1
2273 #undef CASE_ITERATOR_PRIVATE_DATA_2A
2274 #undef CASE_ITERATOR_PRIVATE_DATA_2B
2275 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2276 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2277 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2279 static SLJIT_INLINE BOOL
is_powerof2(unsigned int value
)
2281 return (value
& (value
- 1)) == 0;
2284 static SLJIT_INLINE
void set_jumps(jump_list
*list
, struct sljit_label
*label
)
2288 /* sljit_set_label is clever enough to do nothing
2289 if either the jump or the label is NULL. */
2290 SET_LABEL(list
->jump
, label
);
2295 static SLJIT_INLINE
void add_jump(struct sljit_compiler
*compiler
, jump_list
**list
, struct sljit_jump
*jump
)
2297 jump_list
*list_item
= sljit_alloc_memory(compiler
, sizeof(jump_list
));
2300 list_item
->next
= *list
;
2301 list_item
->jump
= jump
;
2306 static void add_stub(compiler_common
*common
, struct sljit_jump
*start
)
2309 stub_list
*list_item
= sljit_alloc_memory(compiler
, sizeof(stub_list
));
2313 list_item
->start
= start
;
2314 list_item
->quit
= LABEL();
2315 list_item
->next
= common
->stubs
;
2316 common
->stubs
= list_item
;
2320 static void flush_stubs(compiler_common
*common
)
2323 stub_list
*list_item
= common
->stubs
;
2327 JUMPHERE(list_item
->start
);
2328 add_jump(compiler
, &common
->stackalloc
, JUMP(SLJIT_FAST_CALL
));
2329 JUMPTO(SLJIT_JUMP
, list_item
->quit
);
2330 list_item
= list_item
->next
;
2332 common
->stubs
= NULL
;
2335 static void add_label_addr(compiler_common
*common
, sljit_uw
*update_addr
)
2338 label_addr_list
*label_addr
;
2340 label_addr
= sljit_alloc_memory(compiler
, sizeof(label_addr_list
));
2341 if (label_addr
== NULL
)
2343 label_addr
->label
= LABEL();
2344 label_addr
->update_addr
= update_addr
;
2345 label_addr
->next
= common
->label_addrs
;
2346 common
->label_addrs
= label_addr
;
2349 static SLJIT_INLINE
void count_match(compiler_common
*common
)
2353 OP2(SLJIT_SUB
| SLJIT_SET_Z
, COUNT_MATCH
, 0, COUNT_MATCH
, 0, SLJIT_IMM
, 1);
2354 add_jump(compiler
, &common
->calllimit
, JUMP(SLJIT_ZERO
));
2357 static SLJIT_INLINE
void allocate_stack(compiler_common
*common
, int size
)
2359 /* May destroy all locals and registers except TMP2. */
2362 SLJIT_ASSERT(size
> 0);
2363 OP2(SLJIT_SUB
, STACK_TOP
, 0, STACK_TOP
, 0, SLJIT_IMM
, size
* sizeof(sljit_sw
));
2364 #ifdef DESTROY_REGISTERS
2365 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_IMM
, 12345);
2366 OP1(SLJIT_MOV
, TMP3
, 0, TMP1
, 0);
2367 OP1(SLJIT_MOV
, RETURN_ADDR
, 0, TMP1
, 0);
2368 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), LOCALS0
, TMP1
, 0);
2369 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), LOCALS1
, TMP1
, 0);
2371 add_stub(common
, CMP(SLJIT_LESS
, STACK_TOP
, 0, STACK_LIMIT
, 0));
2374 static SLJIT_INLINE
void free_stack(compiler_common
*common
, int size
)
2378 SLJIT_ASSERT(size
> 0);
2379 OP2(SLJIT_ADD
, STACK_TOP
, 0, STACK_TOP
, 0, SLJIT_IMM
, size
* sizeof(sljit_sw
));
2382 static sljit_uw
* allocate_read_only_data(compiler_common
*common
, sljit_uw size
)
2387 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler
)))
2390 result
= (sljit_uw
*)SLJIT_MALLOC(size
+ sizeof(sljit_uw
), compiler
->allocator_data
);
2391 if (SLJIT_UNLIKELY(result
== NULL
))
2393 sljit_set_compiler_memory_error(compiler
);
2397 *(void**)result
= common
->read_only_data_head
;
2398 common
->read_only_data_head
= (void *)result
;
2402 static void free_read_only_data(void *current
, void *allocator_data
)
2406 SLJIT_UNUSED_ARG(allocator_data
);
2408 while (current
!= NULL
)
2410 next
= *(void**)current
;
2411 SLJIT_FREE(current
, allocator_data
);
2416 static SLJIT_INLINE
void reset_ovector(compiler_common
*common
, int length
)
2419 struct sljit_label
*loop
;
2422 /* At this point we can freely use all temporary registers. */
2423 SLJIT_ASSERT(length
> 1);
2424 /* TMP1 returns with begin - 1. */
2425 OP2(SLJIT_SUB
, SLJIT_R0
, 0, SLJIT_MEM1(SLJIT_S0
), SLJIT_OFFSETOF(jit_arguments
, begin
), SLJIT_IMM
, IN_UCHARS(1));
2428 for (i
= 1; i
< length
; i
++)
2429 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), OVECTOR(i
), SLJIT_R0
, 0);
2433 if (sljit_emit_mem(compiler
, SLJIT_MOV
| SLJIT_MEM_SUPP
| SLJIT_MEM_STORE
| SLJIT_MEM_PRE
, SLJIT_R0
, SLJIT_MEM1(SLJIT_R1
), sizeof(sljit_sw
)) == SLJIT_SUCCESS
)
2435 GET_LOCAL_BASE(SLJIT_R1
, 0, OVECTOR_START
);
2436 OP1(SLJIT_MOV
, SLJIT_R2
, 0, SLJIT_IMM
, length
- 1);
2438 sljit_emit_mem(compiler
, SLJIT_MOV
| SLJIT_MEM_STORE
| SLJIT_MEM_PRE
, SLJIT_R0
, SLJIT_MEM1(SLJIT_R1
), sizeof(sljit_sw
));
2439 OP2(SLJIT_SUB
| SLJIT_SET_Z
, SLJIT_R2
, 0, SLJIT_R2
, 0, SLJIT_IMM
, 1);
2440 JUMPTO(SLJIT_NOT_ZERO
, loop
);
2444 GET_LOCAL_BASE(SLJIT_R1
, 0, OVECTOR_START
+ sizeof(sljit_sw
));
2445 OP1(SLJIT_MOV
, SLJIT_R2
, 0, SLJIT_IMM
, length
- 1);
2447 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_R1
), 0, SLJIT_R0
, 0);
2448 OP2(SLJIT_ADD
, SLJIT_R1
, 0, SLJIT_R1
, 0, SLJIT_IMM
, sizeof(sljit_sw
));
2449 OP2(SLJIT_SUB
| SLJIT_SET_Z
, SLJIT_R2
, 0, SLJIT_R2
, 0, SLJIT_IMM
, 1);
2450 JUMPTO(SLJIT_NOT_ZERO
, loop
);
2455 static SLJIT_INLINE
void reset_fast_fail(compiler_common
*common
)
2460 SLJIT_ASSERT(common
->fast_fail_start_ptr
< common
->fast_fail_end_ptr
);
2462 OP2(SLJIT_SUB
, TMP1
, 0, STR_PTR
, 0, SLJIT_IMM
, IN_UCHARS(1));
2463 for (i
= common
->fast_fail_start_ptr
; i
< common
->fast_fail_end_ptr
; i
+= sizeof(sljit_sw
))
2464 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), i
, TMP1
, 0);
2467 static SLJIT_INLINE
void do_reset_match(compiler_common
*common
, int length
)
2470 struct sljit_label
*loop
;
2473 SLJIT_ASSERT(length
> 1);
2474 /* OVECTOR(1) contains the "string begin - 1" constant. */
2476 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(SLJIT_SP
), OVECTOR(1));
2479 for (i
= 2; i
< length
; i
++)
2480 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), OVECTOR(i
), TMP1
, 0);
2484 if (sljit_emit_mem(compiler
, SLJIT_MOV
| SLJIT_MEM_SUPP
| SLJIT_MEM_STORE
| SLJIT_MEM_PRE
, TMP1
, SLJIT_MEM1(TMP2
), sizeof(sljit_sw
)) == SLJIT_SUCCESS
)
2486 GET_LOCAL_BASE(TMP2
, 0, OVECTOR_START
+ sizeof(sljit_sw
));
2487 OP1(SLJIT_MOV
, STACK_TOP
, 0, SLJIT_IMM
, length
- 2);
2489 sljit_emit_mem(compiler
, SLJIT_MOV
| SLJIT_MEM_STORE
| SLJIT_MEM_PRE
, TMP1
, SLJIT_MEM1(TMP2
), sizeof(sljit_sw
));
2490 OP2(SLJIT_SUB
| SLJIT_SET_Z
, STACK_TOP
, 0, STACK_TOP
, 0, SLJIT_IMM
, 1);
2491 JUMPTO(SLJIT_NOT_ZERO
, loop
);
2495 GET_LOCAL_BASE(TMP2
, 0, OVECTOR_START
+ 2 * sizeof(sljit_sw
));
2496 OP1(SLJIT_MOV
, STACK_TOP
, 0, SLJIT_IMM
, length
- 2);
2498 OP1(SLJIT_MOV
, SLJIT_MEM1(TMP2
), 0, TMP1
, 0);
2499 OP2(SLJIT_ADD
, TMP2
, 0, TMP2
, 0, SLJIT_IMM
, sizeof(sljit_sw
));
2500 OP2(SLJIT_SUB
| SLJIT_SET_Z
, STACK_TOP
, 0, STACK_TOP
, 0, SLJIT_IMM
, 1);
2501 JUMPTO(SLJIT_NOT_ZERO
, loop
);
2505 OP1(SLJIT_MOV
, STACK_TOP
, 0, ARGUMENTS
, 0);
2506 if (common
->mark_ptr
!= 0)
2507 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), common
->mark_ptr
, SLJIT_IMM
, 0);
2508 if (common
->control_head_ptr
!= 0)
2509 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), common
->control_head_ptr
, SLJIT_IMM
, 0);
2510 OP1(SLJIT_MOV
, STACK_TOP
, 0, SLJIT_MEM1(STACK_TOP
), SLJIT_OFFSETOF(jit_arguments
, stack
));
2511 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(SLJIT_SP
), common
->start_ptr
);
2512 OP1(SLJIT_MOV
, STACK_TOP
, 0, SLJIT_MEM1(STACK_TOP
), SLJIT_OFFSETOF(struct sljit_stack
, end
));
2515 static sljit_sw SLJIT_FUNC
do_search_mark(sljit_sw
*current
, const pcre_uchar
*skip_arg
)
2517 while (current
!= NULL
)
2521 case type_then_trap
:
2525 if (STRCMP_UC_UC(skip_arg
, (pcre_uchar
*)current
[2]) == 0)
2530 SLJIT_UNREACHABLE();
2533 SLJIT_ASSERT(current
[0] == 0 || current
< (sljit_sw
*)current
[0]);
2534 current
= (sljit_sw
*)current
[0];
2539 static SLJIT_INLINE
void copy_ovector(compiler_common
*common
, int topbracket
)
2542 struct sljit_label
*loop
;
2543 struct sljit_jump
*early_quit
;
2546 /* At this point we can freely use all registers. */
2547 OP1(SLJIT_MOV
, SLJIT_S2
, 0, SLJIT_MEM1(SLJIT_SP
), OVECTOR(1));
2548 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), OVECTOR(1), STR_PTR
, 0);
2550 OP1(SLJIT_MOV
, SLJIT_R0
, 0, ARGUMENTS
, 0);
2551 if (common
->mark_ptr
!= 0)
2552 OP1(SLJIT_MOV
, SLJIT_R2
, 0, SLJIT_MEM1(SLJIT_SP
), common
->mark_ptr
);
2553 OP1(SLJIT_MOV_S32
, SLJIT_R1
, 0, SLJIT_MEM1(SLJIT_R0
), SLJIT_OFFSETOF(jit_arguments
, offset_count
));
2554 if (common
->mark_ptr
!= 0)
2555 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_R0
), SLJIT_OFFSETOF(jit_arguments
, mark_ptr
), SLJIT_R2
, 0);
2556 OP2(SLJIT_SUB
, SLJIT_R2
, 0, SLJIT_MEM1(SLJIT_R0
), SLJIT_OFFSETOF(jit_arguments
, offsets
), SLJIT_IMM
, sizeof(int));
2557 OP1(SLJIT_MOV
, SLJIT_R0
, 0, SLJIT_MEM1(SLJIT_R0
), SLJIT_OFFSETOF(jit_arguments
, begin
));
2559 has_pre
= sljit_emit_mem(compiler
, SLJIT_MOV
| SLJIT_MEM_SUPP
| SLJIT_MEM_PRE
, SLJIT_S1
, SLJIT_MEM1(SLJIT_S0
), sizeof(sljit_sw
)) == SLJIT_SUCCESS
;
2560 GET_LOCAL_BASE(SLJIT_S0
, 0, OVECTOR_START
- (has_pre
? sizeof(sljit_sw
) : 0));
2562 /* Unlikely, but possible */
2563 early_quit
= CMP(SLJIT_EQUAL
, SLJIT_R1
, 0, SLJIT_IMM
, 0);
2567 sljit_emit_mem(compiler
, SLJIT_MOV
| SLJIT_MEM_PRE
, SLJIT_S1
, SLJIT_MEM1(SLJIT_S0
), sizeof(sljit_sw
));
2570 OP1(SLJIT_MOV
, SLJIT_S1
, 0, SLJIT_MEM1(SLJIT_S0
), 0);
2571 OP2(SLJIT_ADD
, SLJIT_S0
, 0, SLJIT_S0
, 0, SLJIT_IMM
, sizeof(sljit_sw
));
2574 OP2(SLJIT_ADD
, SLJIT_R2
, 0, SLJIT_R2
, 0, SLJIT_IMM
, sizeof(int));
2575 OP2(SLJIT_SUB
, SLJIT_S1
, 0, SLJIT_S1
, 0, SLJIT_R0
, 0);
2576 /* Copy the integer value to the output buffer */
2577 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2578 OP2(SLJIT_ASHR
, SLJIT_S1
, 0, SLJIT_S1
, 0, SLJIT_IMM
, UCHAR_SHIFT
);
2581 OP1(SLJIT_MOV_S32
, SLJIT_MEM1(SLJIT_R2
), 0, SLJIT_S1
, 0);
2582 OP2(SLJIT_SUB
| SLJIT_SET_Z
, SLJIT_R1
, 0, SLJIT_R1
, 0, SLJIT_IMM
, 1);
2583 JUMPTO(SLJIT_NOT_ZERO
, loop
);
2584 JUMPHERE(early_quit
);
2586 /* Calculate the return value, which is the maximum ovector value. */
2589 if (sljit_emit_mem(compiler
, SLJIT_MOV
| SLJIT_MEM_SUPP
| SLJIT_MEM_PRE
, SLJIT_R2
, SLJIT_MEM1(SLJIT_R0
), -(2 * (sljit_sw
)sizeof(sljit_sw
))) == SLJIT_SUCCESS
)
2591 GET_LOCAL_BASE(SLJIT_R0
, 0, OVECTOR_START
+ topbracket
* 2 * sizeof(sljit_sw
));
2592 OP1(SLJIT_MOV
, SLJIT_R1
, 0, SLJIT_IMM
, topbracket
+ 1);
2594 /* OVECTOR(0) is never equal to SLJIT_S2. */
2596 sljit_emit_mem(compiler
, SLJIT_MOV
| SLJIT_MEM_PRE
, SLJIT_R2
, SLJIT_MEM1(SLJIT_R0
), -(2 * (sljit_sw
)sizeof(sljit_sw
)));
2597 OP2(SLJIT_SUB
, SLJIT_R1
, 0, SLJIT_R1
, 0, SLJIT_IMM
, 1);
2598 CMPTO(SLJIT_EQUAL
, SLJIT_R2
, 0, SLJIT_S2
, 0, loop
);
2602 GET_LOCAL_BASE(SLJIT_R0
, 0, OVECTOR_START
+ (topbracket
- 1) * 2 * sizeof(sljit_sw
));
2603 OP1(SLJIT_MOV
, SLJIT_R1
, 0, SLJIT_IMM
, topbracket
+ 1);
2605 /* OVECTOR(0) is never equal to SLJIT_S2. */
2607 OP1(SLJIT_MOV
, SLJIT_R2
, 0, SLJIT_MEM1(SLJIT_R0
), 0);
2608 OP2(SLJIT_SUB
, SLJIT_R0
, 0, SLJIT_R0
, 0, SLJIT_IMM
, 2 * (sljit_sw
)sizeof(sljit_sw
));
2609 OP2(SLJIT_SUB
, SLJIT_R1
, 0, SLJIT_R1
, 0, SLJIT_IMM
, 1);
2610 CMPTO(SLJIT_EQUAL
, SLJIT_R2
, 0, SLJIT_S2
, 0, loop
);
2612 OP1(SLJIT_MOV
, SLJIT_RETURN_REG
, 0, SLJIT_R1
, 0);
2615 OP1(SLJIT_MOV
, SLJIT_RETURN_REG
, 0, SLJIT_IMM
, 1);
2618 static SLJIT_INLINE
void return_with_partial_match(compiler_common
*common
, struct sljit_label
*quit
)
2621 struct sljit_jump
*jump
;
2623 SLJIT_COMPILE_ASSERT(STR_END
== SLJIT_S1
, str_end_must_be_saved_reg2
);
2624 SLJIT_ASSERT(common
->start_used_ptr
!= 0 && common
->start_ptr
!= 0
2625 && (common
->mode
== JIT_PARTIAL_SOFT_COMPILE
? common
->hit_start
!= 0 : common
->hit_start
== 0));
2627 OP1(SLJIT_MOV
, SLJIT_R1
, 0, ARGUMENTS
, 0);
2628 OP1(SLJIT_MOV
, SLJIT_RETURN_REG
, 0, SLJIT_IMM
, PCRE_ERROR_PARTIAL
);
2629 OP1(SLJIT_MOV_S32
, SLJIT_R2
, 0, SLJIT_MEM1(SLJIT_R1
), SLJIT_OFFSETOF(jit_arguments
, real_offset_count
));
2630 CMPTO(SLJIT_SIG_LESS
, SLJIT_R2
, 0, SLJIT_IMM
, 2, quit
);
2632 /* Store match begin and end. */
2633 OP1(SLJIT_MOV
, SLJIT_S0
, 0, SLJIT_MEM1(SLJIT_R1
), SLJIT_OFFSETOF(jit_arguments
, begin
));
2634 OP1(SLJIT_MOV
, SLJIT_R1
, 0, SLJIT_MEM1(SLJIT_R1
), SLJIT_OFFSETOF(jit_arguments
, offsets
));
2636 jump
= CMP(SLJIT_SIG_LESS
, SLJIT_R2
, 0, SLJIT_IMM
, 3);
2637 OP2(SLJIT_SUB
, SLJIT_R2
, 0, SLJIT_MEM1(SLJIT_SP
), common
->mode
== JIT_PARTIAL_HARD_COMPILE
? common
->start_ptr
: (common
->hit_start
+ (int)sizeof(sljit_sw
)), SLJIT_S0
, 0);
2638 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2639 OP2(SLJIT_ASHR
, SLJIT_R2
, 0, SLJIT_R2
, 0, SLJIT_IMM
, UCHAR_SHIFT
);
2641 OP1(SLJIT_MOV_S32
, SLJIT_MEM1(SLJIT_R1
), 2 * sizeof(int), SLJIT_R2
, 0);
2644 OP1(SLJIT_MOV
, SLJIT_R2
, 0, SLJIT_MEM1(SLJIT_SP
), common
->mode
== JIT_PARTIAL_HARD_COMPILE
? common
->start_used_ptr
: common
->hit_start
);
2645 OP2(SLJIT_SUB
, SLJIT_S1
, 0, STR_END
, 0, SLJIT_S0
, 0);
2646 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2647 OP2(SLJIT_ASHR
, SLJIT_S1
, 0, SLJIT_S1
, 0, SLJIT_IMM
, UCHAR_SHIFT
);
2649 OP1(SLJIT_MOV_S32
, SLJIT_MEM1(SLJIT_R1
), sizeof(int), SLJIT_S1
, 0);
2651 OP2(SLJIT_SUB
, SLJIT_R2
, 0, SLJIT_R2
, 0, SLJIT_S0
, 0);
2652 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2653 OP2(SLJIT_ASHR
, SLJIT_R2
, 0, SLJIT_R2
, 0, SLJIT_IMM
, UCHAR_SHIFT
);
2655 OP1(SLJIT_MOV_S32
, SLJIT_MEM1(SLJIT_R1
), 0, SLJIT_R2
, 0);
2657 JUMPTO(SLJIT_JUMP
, quit
);
2660 static SLJIT_INLINE
void check_start_used_ptr(compiler_common
*common
)
2662 /* May destroy TMP1. */
2664 struct sljit_jump
*jump
;
2666 if (common
->mode
== JIT_PARTIAL_SOFT_COMPILE
)
2668 /* The value of -1 must be kept for start_used_ptr! */
2669 OP2(SLJIT_ADD
, TMP1
, 0, SLJIT_MEM1(SLJIT_SP
), common
->start_used_ptr
, SLJIT_IMM
, 1);
2670 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
2671 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
2672 jump
= CMP(SLJIT_LESS_EQUAL
, TMP1
, 0, STR_PTR
, 0);
2673 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), common
->start_used_ptr
, STR_PTR
, 0);
2676 else if (common
->mode
== JIT_PARTIAL_HARD_COMPILE
)
2678 jump
= CMP(SLJIT_LESS_EQUAL
, SLJIT_MEM1(SLJIT_SP
), common
->start_used_ptr
, STR_PTR
, 0);
2679 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), common
->start_used_ptr
, STR_PTR
, 0);
2684 static SLJIT_INLINE BOOL
char_has_othercase(compiler_common
*common
, pcre_uchar
*cc
)
2686 /* Detects if the character has an othercase. */
2696 return c
!= UCD_OTHERCASE(c
);
2701 #ifndef COMPILE_PCRE8
2702 return common
->fcc
[c
] != c
;
2708 return MAX_255(c
) ? common
->fcc
[c
] != c
: FALSE
;
2711 static SLJIT_INLINE
unsigned int char_othercase(compiler_common
*common
, unsigned int c
)
2713 /* Returns with the othercase. */
2715 if (common
->utf
&& c
> 127)
2718 return UCD_OTHERCASE(c
);
2724 return TABLE_GET(c
, common
->fcc
, c
);
2727 static unsigned int char_get_othercase_bit(compiler_common
*common
, pcre_uchar
*cc
)
2729 /* Detects if the character and its othercase has only 1 bit difference. */
2730 unsigned int c
, oc
, bit
;
2731 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2740 oc
= common
->fcc
[c
];
2744 oc
= UCD_OTHERCASE(c
);
2753 oc
= TABLE_GET(c
, common
->fcc
, c
);
2757 oc
= TABLE_GET(c
, common
->fcc
, c
);
2760 SLJIT_ASSERT(c
!= oc
);
2763 /* Optimized for English alphabet. */
2764 if (c
<= 127 && bit
== 0x20)
2765 return (0 << 8) | 0x20;
2767 /* Since c != oc, they must have at least 1 bit difference. */
2768 if (!is_powerof2(bit
))
2771 #if defined COMPILE_PCRE8
2774 if (common
->utf
&& c
> 127)
2776 n
= GET_EXTRALEN(*cc
);
2777 while ((bit
& 0x3f) == 0)
2782 return (n
<< 8) | bit
;
2784 #endif /* SUPPORT_UTF */
2785 return (0 << 8) | bit
;
2787 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2790 if (common
->utf
&& c
> 65535)
2792 if (bit
>= (1 << 10))
2795 return (bit
< 256) ? ((2 << 8) | bit
) : ((3 << 8) | (bit
>> 8));
2797 #endif /* SUPPORT_UTF */
2798 return (bit
< 256) ? ((0 << 8) | bit
) : ((1 << 8) | (bit
>> 8));
2800 #endif /* COMPILE_PCRE[8|16|32] */
2803 static void check_partial(compiler_common
*common
, BOOL force
)
2805 /* Checks whether a partial matching is occurred. Does not modify registers. */
2807 struct sljit_jump
*jump
= NULL
;
2809 SLJIT_ASSERT(!force
|| common
->mode
!= JIT_COMPILE
);
2811 if (common
->mode
== JIT_COMPILE
)
2815 jump
= CMP(SLJIT_GREATER_EQUAL
, SLJIT_MEM1(SLJIT_SP
), common
->start_used_ptr
, STR_PTR
, 0);
2816 else if (common
->mode
== JIT_PARTIAL_SOFT_COMPILE
)
2817 jump
= CMP(SLJIT_EQUAL
, SLJIT_MEM1(SLJIT_SP
), common
->start_used_ptr
, SLJIT_IMM
, -1);
2819 if (common
->mode
== JIT_PARTIAL_SOFT_COMPILE
)
2820 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), common
->hit_start
, SLJIT_IMM
, 0);
2823 if (common
->partialmatchlabel
!= NULL
)
2824 JUMPTO(SLJIT_JUMP
, common
->partialmatchlabel
);
2826 add_jump(compiler
, &common
->partialmatch
, JUMP(SLJIT_JUMP
));
2833 static void check_str_end(compiler_common
*common
, jump_list
**end_reached
)
2835 /* Does not affect registers. Usually used in a tight spot. */
2837 struct sljit_jump
*jump
;
2839 if (common
->mode
== JIT_COMPILE
)
2841 add_jump(compiler
, end_reached
, CMP(SLJIT_GREATER_EQUAL
, STR_PTR
, 0, STR_END
, 0));
2845 jump
= CMP(SLJIT_LESS
, STR_PTR
, 0, STR_END
, 0);
2846 if (common
->mode
== JIT_PARTIAL_SOFT_COMPILE
)
2848 add_jump(compiler
, end_reached
, CMP(SLJIT_GREATER_EQUAL
, SLJIT_MEM1(SLJIT_SP
), common
->start_used_ptr
, STR_PTR
, 0));
2849 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), common
->hit_start
, SLJIT_IMM
, 0);
2850 add_jump(compiler
, end_reached
, JUMP(SLJIT_JUMP
));
2854 add_jump(compiler
, end_reached
, CMP(SLJIT_GREATER_EQUAL
, SLJIT_MEM1(SLJIT_SP
), common
->start_used_ptr
, STR_PTR
, 0));
2855 if (common
->partialmatchlabel
!= NULL
)
2856 JUMPTO(SLJIT_JUMP
, common
->partialmatchlabel
);
2858 add_jump(compiler
, &common
->partialmatch
, JUMP(SLJIT_JUMP
));
2863 static void detect_partial_match(compiler_common
*common
, jump_list
**backtracks
)
2866 struct sljit_jump
*jump
;
2868 if (common
->mode
== JIT_COMPILE
)
2870 add_jump(compiler
, backtracks
, CMP(SLJIT_GREATER_EQUAL
, STR_PTR
, 0, STR_END
, 0));
2874 /* Partial matching mode. */
2875 jump
= CMP(SLJIT_LESS
, STR_PTR
, 0, STR_END
, 0);
2876 add_jump(compiler
, backtracks
, CMP(SLJIT_GREATER_EQUAL
, SLJIT_MEM1(SLJIT_SP
), common
->start_used_ptr
, STR_PTR
, 0));
2877 if (common
->mode
== JIT_PARTIAL_SOFT_COMPILE
)
2879 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), common
->hit_start
, SLJIT_IMM
, 0);
2880 add_jump(compiler
, backtracks
, JUMP(SLJIT_JUMP
));
2884 if (common
->partialmatchlabel
!= NULL
)
2885 JUMPTO(SLJIT_JUMP
, common
->partialmatchlabel
);
2887 add_jump(compiler
, &common
->partialmatch
, JUMP(SLJIT_JUMP
));
2892 static void peek_char(compiler_common
*common
, sljit_u32 max
)
2894 /* Reads the character into TMP1, keeps STR_PTR.
2895 Does not check STR_END. TMP2 Destroyed. */
2897 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2898 struct sljit_jump
*jump
;
2901 SLJIT_UNUSED_ARG(max
);
2903 OP1(MOV_UCHAR
, TMP1
, 0, SLJIT_MEM1(STR_PTR
), 0);
2904 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2907 if (max
< 128) return;
2909 jump
= CMP(SLJIT_LESS
, TMP1
, 0, SLJIT_IMM
, 0xc0);
2910 OP2(SLJIT_ADD
, STR_PTR
, 0, STR_PTR
, 0, SLJIT_IMM
, IN_UCHARS(1));
2911 add_jump(compiler
, &common
->utfreadchar
, JUMP(SLJIT_FAST_CALL
));
2912 OP2(SLJIT_SUB
, STR_PTR
, 0, STR_PTR
, 0, TMP2
, 0);
2915 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2917 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2920 if (max
< 0xd800) return;
2922 OP2(SLJIT_SUB
, TMP2
, 0, TMP1
, 0, SLJIT_IMM
, 0xd800);
2923 jump
= CMP(SLJIT_GREATER
, TMP2
, 0, SLJIT_IMM
, 0xdc00 - 0xd800 - 1);
2924 /* TMP2 contains the high surrogate. */
2925 OP1(MOV_UCHAR
, TMP1
, 0, SLJIT_MEM1(STR_PTR
), IN_UCHARS(0));
2926 OP2(SLJIT_ADD
, TMP2
, 0, TMP2
, 0, SLJIT_IMM
, 0x40);
2927 OP2(SLJIT_SHL
, TMP2
, 0, TMP2
, 0, SLJIT_IMM
, 10);
2928 OP2(SLJIT_AND
, TMP1
, 0, TMP1
, 0, SLJIT_IMM
, 0x3ff);
2929 OP2(SLJIT_OR
, TMP1
, 0, TMP1
, 0, TMP2
, 0);
2935 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2937 static BOOL
is_char7_bitset(const sljit_u8
*bitset
, BOOL nclass
)
2939 /* Tells whether the character codes below 128 are enough
2940 to determine a match. */
2941 const sljit_u8 value
= nclass
? 0xff : 0;
2942 const sljit_u8
*end
= bitset
+ 32;
2947 if (*bitset
++ != value
)
2950 while (bitset
< end
);
2954 static void read_char7_type(compiler_common
*common
, BOOL full_read
)
2956 /* Reads the precise character type of a character into TMP1, if the character
2957 is less than 128. Otherwise it returns with zero. Does not check STR_END. The
2958 full_read argument tells whether characters above max are accepted or not. */
2960 struct sljit_jump
*jump
;
2962 SLJIT_ASSERT(common
->utf
);
2964 OP1(MOV_UCHAR
, TMP2
, 0, SLJIT_MEM1(STR_PTR
), 0);
2965 OP2(SLJIT_ADD
, STR_PTR
, 0, STR_PTR
, 0, SLJIT_IMM
, IN_UCHARS(1));
2967 OP1(SLJIT_MOV_U8
, TMP1
, 0, SLJIT_MEM1(TMP2
), common
->ctypes
);
2971 jump
= CMP(SLJIT_LESS
, TMP2
, 0, SLJIT_IMM
, 0xc0);
2972 OP1(SLJIT_MOV_U8
, TMP2
, 0, SLJIT_MEM1(TMP2
), (sljit_sw
)PRIV(utf8_table4
) - 0xc0);
2973 OP2(SLJIT_ADD
, STR_PTR
, 0, STR_PTR
, 0, TMP2
, 0);
2978 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2980 static void read_char_range(compiler_common
*common
, sljit_u32 min
, sljit_u32 max
, BOOL update_str_ptr
)
2982 /* Reads the precise value of a character into TMP1, if the character is
2983 between min and max (c >= min && c <= max). Otherwise it returns with a value
2984 outside the range. Does not check STR_END. */
2986 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2987 struct sljit_jump
*jump
;
2989 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2990 struct sljit_jump
*jump2
;
2993 SLJIT_UNUSED_ARG(update_str_ptr
);
2994 SLJIT_UNUSED_ARG(min
);
2995 SLJIT_UNUSED_ARG(max
);
2996 SLJIT_ASSERT(min
<= max
);
2998 OP1(MOV_UCHAR
, TMP1
, 0, SLJIT_MEM1(STR_PTR
), IN_UCHARS(0));
2999 OP2(SLJIT_ADD
, STR_PTR
, 0, STR_PTR
, 0, SLJIT_IMM
, IN_UCHARS(1));
3001 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3004 if (max
< 128 && !update_str_ptr
) return;
3006 jump
= CMP(SLJIT_LESS
, TMP1
, 0, SLJIT_IMM
, 0xc0);
3009 OP2(SLJIT_SUB
, TMP2
, 0, TMP1
, 0, SLJIT_IMM
, 0xf0);
3011 OP1(SLJIT_MOV_U8
, RETURN_ADDR
, 0, SLJIT_MEM1(TMP1
), (sljit_sw
)PRIV(utf8_table4
) - 0xc0);
3012 OP1(MOV_UCHAR
, TMP1
, 0, SLJIT_MEM1(STR_PTR
), IN_UCHARS(0));
3013 jump2
= CMP(SLJIT_GREATER
, TMP2
, 0, SLJIT_IMM
, 0x7);
3014 OP2(SLJIT_SHL
, TMP2
, 0, TMP2
, 0, SLJIT_IMM
, 6);
3015 OP2(SLJIT_AND
, TMP1
, 0, TMP1
, 0, SLJIT_IMM
, 0x3f);
3016 OP2(SLJIT_OR
, TMP1
, 0, TMP1
, 0, TMP2
, 0);
3017 OP1(MOV_UCHAR
, TMP2
, 0, SLJIT_MEM1(STR_PTR
), IN_UCHARS(1));
3018 OP2(SLJIT_SHL
, TMP1
, 0, TMP1
, 0, SLJIT_IMM
, 6);
3019 OP2(SLJIT_AND
, TMP2
, 0, TMP2
, 0, SLJIT_IMM
, 0x3f);
3020 OP2(SLJIT_OR
, TMP1
, 0, TMP1
, 0, TMP2
, 0);
3021 OP1(MOV_UCHAR
, TMP2
, 0, SLJIT_MEM1(STR_PTR
), IN_UCHARS(2));
3022 if (!update_str_ptr
)
3023 OP2(SLJIT_ADD
, STR_PTR
, 0, STR_PTR
, 0, SLJIT_IMM
, IN_UCHARS(3));
3024 OP2(SLJIT_SHL
, TMP1
, 0, TMP1
, 0, SLJIT_IMM
, 6);
3025 OP2(SLJIT_AND
, TMP2
, 0, TMP2
, 0, SLJIT_IMM
, 0x3f);
3026 OP2(SLJIT_OR
, TMP1
, 0, TMP1
, 0, TMP2
, 0);
3029 OP2(SLJIT_ADD
, STR_PTR
, 0, STR_PTR
, 0, RETURN_ADDR
, 0);
3031 else if (min
>= 0x800 && max
<= 0xffff)
3033 OP2(SLJIT_SUB
, TMP2
, 0, TMP1
, 0, SLJIT_IMM
, 0xe0);
3035 OP1(SLJIT_MOV_U8
, RETURN_ADDR
, 0, SLJIT_MEM1(TMP1
), (sljit_sw
)PRIV(utf8_table4
) - 0xc0);
3036 OP1(MOV_UCHAR
, TMP1
, 0, SLJIT_MEM1(STR_PTR
), IN_UCHARS(0));
3037 jump2
= CMP(SLJIT_GREATER
, TMP2
, 0, SLJIT_IMM
, 0xf);
3038 OP2(SLJIT_SHL
, TMP2
, 0, TMP2
, 0, SLJIT_IMM
, 6);
3039 OP2(SLJIT_AND
, TMP1
, 0, TMP1
, 0, SLJIT_IMM
, 0x3f);
3040 OP2(SLJIT_OR
, TMP1
, 0, TMP1
, 0, TMP2
, 0);
3041 OP1(MOV_UCHAR
, TMP2
, 0, SLJIT_MEM1(STR_PTR
), IN_UCHARS(1));
3042 if (!update_str_ptr
)
3043 OP2(SLJIT_ADD
, STR_PTR
, 0, STR_PTR
, 0, SLJIT_IMM
, IN_UCHARS(2));
3044 OP2(SLJIT_SHL
, TMP1
, 0, TMP1
, 0, SLJIT_IMM
, 6);
3045 OP2(SLJIT_AND
, TMP2
, 0, TMP2
, 0, SLJIT_IMM
, 0x3f);
3046 OP2(SLJIT_OR
, TMP1
, 0, TMP1
, 0, TMP2
, 0);
3049 OP2(SLJIT_ADD
, STR_PTR
, 0, STR_PTR
, 0, RETURN_ADDR
, 0);
3051 else if (max
>= 0x800)
3052 add_jump(compiler
, (max
< 0x10000) ? &common
->utfreadchar16
: &common
->utfreadchar
, JUMP(SLJIT_FAST_CALL
));
3055 OP1(SLJIT_MOV_U8
, TMP2
, 0, SLJIT_MEM1(TMP1
), (sljit_sw
)PRIV(utf8_table4
) - 0xc0);
3056 OP2(SLJIT_ADD
, STR_PTR
, 0, STR_PTR
, 0, TMP2
, 0);
3060 OP1(MOV_UCHAR
, TMP2
, 0, SLJIT_MEM1(STR_PTR
), IN_UCHARS(0));
3061 if (!update_str_ptr
)
3062 OP2(SLJIT_ADD
, STR_PTR
, 0, STR_PTR
, 0, SLJIT_IMM
, IN_UCHARS(1));
3064 OP1(SLJIT_MOV_U8
, RETURN_ADDR
, 0, SLJIT_MEM1(TMP1
), (sljit_sw
)PRIV(utf8_table4
) - 0xc0);
3065 OP2(SLJIT_AND
, TMP1
, 0, TMP1
, 0, SLJIT_IMM
, 0x3f);
3066 OP2(SLJIT_SHL
, TMP1
, 0, TMP1
, 0, SLJIT_IMM
, 6);
3067 OP2(SLJIT_AND
, TMP2
, 0, TMP2
, 0, SLJIT_IMM
, 0x3f);
3068 OP2(SLJIT_OR
, TMP1
, 0, TMP1
, 0, TMP2
, 0);
3070 OP2(SLJIT_ADD
, STR_PTR
, 0, STR_PTR
, 0, RETURN_ADDR
, 0);
3076 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
3081 OP2(SLJIT_SUB
, TMP2
, 0, TMP1
, 0, SLJIT_IMM
, 0xd800);
3082 jump
= CMP(SLJIT_GREATER
, TMP2
, 0, SLJIT_IMM
, 0xdc00 - 0xd800 - 1);
3083 /* TMP2 contains the high surrogate. */
3084 OP1(MOV_UCHAR
, TMP1
, 0, SLJIT_MEM1(STR_PTR
), IN_UCHARS(0));
3085 OP2(SLJIT_ADD
, TMP2
, 0, TMP2
, 0, SLJIT_IMM
, 0x40);
3086 OP2(SLJIT_SHL
, TMP2
, 0, TMP2
, 0, SLJIT_IMM
, 10);
3087 OP2(SLJIT_ADD
, STR_PTR
, 0, STR_PTR
, 0, SLJIT_IMM
, IN_UCHARS(1));
3088 OP2(SLJIT_AND
, TMP1
, 0, TMP1
, 0, SLJIT_IMM
, 0x3ff);
3089 OP2(SLJIT_OR
, TMP1
, 0, TMP1
, 0, TMP2
, 0);
3094 if (max
< 0xd800 && !update_str_ptr
) return;
3096 /* Skip low surrogate if necessary. */
3097 OP2(SLJIT_SUB
, TMP2
, 0, TMP1
, 0, SLJIT_IMM
, 0xd800);
3098 jump
= CMP(SLJIT_GREATER
, TMP2
, 0, SLJIT_IMM
, 0xdc00 - 0xd800 - 1);
3100 OP2(SLJIT_ADD
, STR_PTR
, 0, STR_PTR
, 0, SLJIT_IMM
, IN_UCHARS(1));
3102 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_IMM
, 0x10000);
3108 static SLJIT_INLINE
void read_char(compiler_common
*common
)
3110 read_char_range(common
, 0, READ_CHAR_MAX
, TRUE
);
3113 static void read_char8_type(compiler_common
*common
, BOOL update_str_ptr
)
3115 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
3117 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3118 struct sljit_jump
*jump
;
3120 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3121 struct sljit_jump
*jump2
;
3124 SLJIT_UNUSED_ARG(update_str_ptr
);
3126 OP1(MOV_UCHAR
, TMP2
, 0, SLJIT_MEM1(STR_PTR
), 0);
3127 OP2(SLJIT_ADD
, STR_PTR
, 0, STR_PTR
, 0, SLJIT_IMM
, IN_UCHARS(1));
3129 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3132 /* This can be an extra read in some situations, but hopefully
3133 it is needed in most cases. */
3134 OP1(SLJIT_MOV_U8
, TMP1
, 0, SLJIT_MEM1(TMP2
), common
->ctypes
);
3135 jump
= CMP(SLJIT_LESS
, TMP2
, 0, SLJIT_IMM
, 0xc0);
3136 if (!update_str_ptr
)
3138 OP1(MOV_UCHAR
, TMP1
, 0, SLJIT_MEM1(STR_PTR
), IN_UCHARS(0));
3139 OP2(SLJIT_ADD
, STR_PTR
, 0, STR_PTR
, 0, SLJIT_IMM
, IN_UCHARS(1));
3140 OP2(SLJIT_AND
, TMP2
, 0, TMP2
, 0, SLJIT_IMM
, 0x3f);
3141 OP2(SLJIT_SHL
, TMP2
, 0, TMP2
, 0, SLJIT_IMM
, 6);
3142 OP2(SLJIT_AND
, TMP1
, 0, TMP1
, 0, SLJIT_IMM
, 0x3f);
3143 OP2(SLJIT_OR
, TMP2
, 0, TMP2
, 0, TMP1
, 0);
3144 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_IMM
, 0);
3145 jump2
= CMP(SLJIT_GREATER
, TMP2
, 0, SLJIT_IMM
, 255);
3146 OP1(SLJIT_MOV_U8
, TMP1
, 0, SLJIT_MEM1(TMP2
), common
->ctypes
);
3150 add_jump(compiler
, &common
->utfreadtype8
, JUMP(SLJIT_FAST_CALL
));
3154 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
3156 #if !defined COMPILE_PCRE8
3157 /* The ctypes array contains only 256 values. */
3158 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_IMM
, 0);
3159 jump
= CMP(SLJIT_GREATER
, TMP2
, 0, SLJIT_IMM
, 255);
3161 OP1(SLJIT_MOV_U8
, TMP1
, 0, SLJIT_MEM1(TMP2
), common
->ctypes
);
3162 #if !defined COMPILE_PCRE8
3166 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
3167 if (common
->utf
&& update_str_ptr
)
3169 /* Skip low surrogate if necessary. */
3170 OP2(SLJIT_SUB
, TMP2
, 0, TMP2
, 0, SLJIT_IMM
, 0xd800);
3171 jump
= CMP(SLJIT_GREATER
, TMP2
, 0, SLJIT_IMM
, 0xdc00 - 0xd800 - 1);
3172 OP2(SLJIT_ADD
, STR_PTR
, 0, STR_PTR
, 0, SLJIT_IMM
, IN_UCHARS(1));
3175 #endif /* SUPPORT_UTF && COMPILE_PCRE16 */
3178 static void skip_char_back(compiler_common
*common
)
3180 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
3182 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3183 #if defined COMPILE_PCRE8
3184 struct sljit_label
*label
;
3189 OP1(MOV_UCHAR
, TMP1
, 0, SLJIT_MEM1(STR_PTR
), -IN_UCHARS(1));
3190 OP2(SLJIT_SUB
, STR_PTR
, 0, STR_PTR
, 0, SLJIT_IMM
, IN_UCHARS(1));
3191 OP2(SLJIT_AND
, TMP1
, 0, TMP1
, 0, SLJIT_IMM
, 0xc0);
3192 CMPTO(SLJIT_EQUAL
, TMP1
, 0, SLJIT_IMM
, 0x80, label
);
3195 #elif defined COMPILE_PCRE16
3198 OP1(MOV_UCHAR
, TMP1
, 0, SLJIT_MEM1(STR_PTR
), -IN_UCHARS(1));
3199 OP2(SLJIT_SUB
, STR_PTR
, 0, STR_PTR
, 0, SLJIT_IMM
, IN_UCHARS(1));
3200 /* Skip low surrogate if necessary. */
3201 OP2(SLJIT_AND
, TMP1
, 0, TMP1
, 0, SLJIT_IMM
, 0xfc00);
3202 OP2(SLJIT_SUB
| SLJIT_SET_Z
, SLJIT_UNUSED
, 0, TMP1
, 0, SLJIT_IMM
, 0xdc00);
3203 OP_FLAGS(SLJIT_MOV
, TMP1
, 0, SLJIT_EQUAL
);
3204 OP2(SLJIT_SHL
, TMP1
, 0, TMP1
, 0, SLJIT_IMM
, 1);
3205 OP2(SLJIT_SUB
, STR_PTR
, 0, STR_PTR
, 0, TMP1
, 0);
3208 #endif /* COMPILE_PCRE[8|16] */
3209 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
3210 OP2(SLJIT_SUB
, STR_PTR
, 0, STR_PTR
, 0, SLJIT_IMM
, IN_UCHARS(1));
3213 static void check_newlinechar(compiler_common
*common
, int nltype
, jump_list
**backtracks
, BOOL jumpifmatch
)
3215 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
3217 struct sljit_jump
*jump
;
3219 if (nltype
== NLTYPE_ANY
)
3221 add_jump(compiler
, &common
->anynewline
, JUMP(SLJIT_FAST_CALL
));
3222 sljit_set_current_flags(compiler
, SLJIT_SET_Z
);
3223 add_jump(compiler
, backtracks
, JUMP(jumpifmatch
? SLJIT_NOT_ZERO
: SLJIT_ZERO
));
3225 else if (nltype
== NLTYPE_ANYCRLF
)
3229 add_jump(compiler
, backtracks
, CMP(SLJIT_EQUAL
, TMP1
, 0, SLJIT_IMM
, CHAR_CR
));
3230 add_jump(compiler
, backtracks
, CMP(SLJIT_EQUAL
, TMP1
, 0, SLJIT_IMM
, CHAR_NL
));
3234 jump
= CMP(SLJIT_EQUAL
, TMP1
, 0, SLJIT_IMM
, CHAR_CR
);
3235 add_jump(compiler
, backtracks
, CMP(SLJIT_NOT_EQUAL
, TMP1
, 0, SLJIT_IMM
, CHAR_NL
));
3241 SLJIT_ASSERT(nltype
== NLTYPE_FIXED
&& common
->newline
< 256);
3242 add_jump(compiler
, backtracks
, CMP(jumpifmatch
? SLJIT_EQUAL
: SLJIT_NOT_EQUAL
, TMP1
, 0, SLJIT_IMM
, common
->newline
));
3248 #if defined COMPILE_PCRE8
3249 static void do_utfreadchar(compiler_common
*common
)
3251 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
3252 of the character (>= 0xc0). Return char value in TMP1, length in TMP2. */
3254 struct sljit_jump
*jump
;
3256 sljit_emit_fast_enter(compiler
, RETURN_ADDR
, 0);
3257 OP1(MOV_UCHAR
, TMP2
, 0, SLJIT_MEM1(STR_PTR
), IN_UCHARS(0));
3258 OP2(SLJIT_AND
, TMP1
, 0, TMP1
, 0, SLJIT_IMM
, 0x3f);
3259 OP2(SLJIT_SHL
, TMP1
, 0, TMP1
, 0, SLJIT_IMM
, 6);
3260 OP2(SLJIT_AND
, TMP2
, 0, TMP2
, 0, SLJIT_IMM
, 0x3f);
3261 OP2(SLJIT_OR
, TMP1
, 0, TMP1
, 0, TMP2
, 0);
3263 /* Searching for the first zero. */
3264 OP2(SLJIT_AND
| SLJIT_SET_Z
, SLJIT_UNUSED
, 0, TMP1
, 0, SLJIT_IMM
, 0x800);
3265 jump
= JUMP(SLJIT_NOT_ZERO
);
3266 /* Two byte sequence. */
3267 OP2(SLJIT_ADD
, STR_PTR
, 0, STR_PTR
, 0, SLJIT_IMM
, IN_UCHARS(1));
3268 OP1(SLJIT_MOV
, TMP2
, 0, SLJIT_IMM
, IN_UCHARS(2));
3269 sljit_emit_fast_return(compiler
, RETURN_ADDR
, 0);
3272 OP1(MOV_UCHAR
, TMP2
, 0, SLJIT_MEM1(STR_PTR
), IN_UCHARS(1));
3273 OP2(SLJIT_XOR
, TMP1
, 0, TMP1
, 0, SLJIT_IMM
, 0x800);
3274 OP2(SLJIT_SHL
, TMP1
, 0, TMP1
, 0, SLJIT_IMM
, 6);
3275 OP2(SLJIT_AND
, TMP2
, 0, TMP2
, 0, SLJIT_IMM
, 0x3f);
3276 OP2(SLJIT_OR
, TMP1
, 0, TMP1
, 0, TMP2
, 0);
3278 OP2(SLJIT_AND
| SLJIT_SET_Z
, SLJIT_UNUSED
, 0, TMP1
, 0, SLJIT_IMM
, 0x10000);
3279 jump
= JUMP(SLJIT_NOT_ZERO
);
3280 /* Three byte sequence. */
3281 OP2(SLJIT_ADD
, STR_PTR
, 0, STR_PTR
, 0, SLJIT_IMM
, IN_UCHARS(2));
3282 OP1(SLJIT_MOV
, TMP2
, 0, SLJIT_IMM
, IN_UCHARS(3));
3283 sljit_emit_fast_return(compiler
, RETURN_ADDR
, 0);
3285 /* Four byte sequence. */
3287 OP1(MOV_UCHAR
, TMP2
, 0, SLJIT_MEM1(STR_PTR
), IN_UCHARS(2));
3288 OP2(SLJIT_XOR
, TMP1
, 0, TMP1
, 0, SLJIT_IMM
, 0x10000);
3289 OP2(SLJIT_SHL
, TMP1
, 0, TMP1
, 0, SLJIT_IMM
, 6);
3290 OP2(SLJIT_ADD
, STR_PTR
, 0, STR_PTR
, 0, SLJIT_IMM
, IN_UCHARS(3));
3291 OP2(SLJIT_AND
, TMP2
, 0, TMP2
, 0, SLJIT_IMM
, 0x3f);
3292 OP2(SLJIT_OR
, TMP1
, 0, TMP1
, 0, TMP2
, 0);
3293 OP1(SLJIT_MOV
, TMP2
, 0, SLJIT_IMM
, IN_UCHARS(4));
3294 sljit_emit_fast_return(compiler
, RETURN_ADDR
, 0);
3297 static void do_utfreadchar16(compiler_common
*common
)
3299 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
3300 of the character (>= 0xc0). Return value in TMP1. */
3302 struct sljit_jump
*jump
;
3304 sljit_emit_fast_enter(compiler
, RETURN_ADDR
, 0);
3305 OP1(MOV_UCHAR
, TMP2
, 0, SLJIT_MEM1(STR_PTR
), IN_UCHARS(0));
3306 OP2(SLJIT_AND
, TMP1
, 0, TMP1
, 0, SLJIT_IMM
, 0x3f);
3307 OP2(SLJIT_SHL
, TMP1
, 0, TMP1
, 0, SLJIT_IMM
, 6);
3308 OP2(SLJIT_AND
, TMP2
, 0, TMP2
, 0, SLJIT_IMM
, 0x3f);
3309 OP2(SLJIT_OR
, TMP1
, 0, TMP1
, 0, TMP2
, 0);
3311 /* Searching for the first zero. */
3312 OP2(SLJIT_AND
| SLJIT_SET_Z
, SLJIT_UNUSED
, 0, TMP1
, 0, SLJIT_IMM
, 0x800);
3313 jump
= JUMP(SLJIT_NOT_ZERO
);
3314 /* Two byte sequence. */
3315 OP2(SLJIT_ADD
, STR_PTR
, 0, STR_PTR
, 0, SLJIT_IMM
, IN_UCHARS(1));
3316 sljit_emit_fast_return(compiler
, RETURN_ADDR
, 0);
3319 OP2(SLJIT_AND
| SLJIT_SET_Z
, SLJIT_UNUSED
, 0, TMP1
, 0, SLJIT_IMM
, 0x400);
3320 OP_FLAGS(SLJIT_MOV
, TMP2
, 0, SLJIT_NOT_ZERO
);
3321 /* This code runs only in 8 bit mode. No need to shift the value. */
3322 OP2(SLJIT_ADD
, STR_PTR
, 0, STR_PTR
, 0, TMP2
, 0);
3323 OP1(MOV_UCHAR
, TMP2
, 0, SLJIT_MEM1(STR_PTR
), IN_UCHARS(1));
3324 OP2(SLJIT_XOR
, TMP1
, 0, TMP1
, 0, SLJIT_IMM
, 0x800);
3325 OP2(SLJIT_SHL
, TMP1
, 0, TMP1
, 0, SLJIT_IMM
, 6);
3326 OP2(SLJIT_AND
, TMP2
, 0, TMP2
, 0, SLJIT_IMM
, 0x3f);
3327 OP2(SLJIT_OR
, TMP1
, 0, TMP1
, 0, TMP2
, 0);
3328 /* Three byte sequence. */
3329 OP2(SLJIT_ADD
, STR_PTR
, 0, STR_PTR
, 0, SLJIT_IMM
, IN_UCHARS(2));
3330 sljit_emit_fast_return(compiler
, RETURN_ADDR
, 0);
3333 static void do_utfreadtype8(compiler_common
*common
)
3335 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
3336 of the character (>= 0xc0). Return value in TMP1. */
3338 struct sljit_jump
*jump
;
3339 struct sljit_jump
*compare
;
3341 sljit_emit_fast_enter(compiler
, RETURN_ADDR
, 0);
3343 OP2(SLJIT_AND
| SLJIT_SET_Z
, SLJIT_UNUSED
, 0, TMP2
, 0, SLJIT_IMM
, 0x20);
3344 jump
= JUMP(SLJIT_NOT_ZERO
);
3345 /* Two byte sequence. */
3346 OP1(MOV_UCHAR
, TMP1
, 0, SLJIT_MEM1(STR_PTR
), IN_UCHARS(0));
3347 OP2(SLJIT_ADD
, STR_PTR
, 0, STR_PTR
, 0, SLJIT_IMM
, IN_UCHARS(1));
3348 OP2(SLJIT_AND
, TMP2
, 0, TMP2
, 0, SLJIT_IMM
, 0x1f);
3349 /* The upper 5 bits are known at this point. */
3350 compare
= CMP(SLJIT_GREATER
, TMP2
, 0, SLJIT_IMM
, 0x3);
3351 OP2(SLJIT_SHL
, TMP2
, 0, TMP2
, 0, SLJIT_IMM
, 6);
3352 OP2(SLJIT_AND
, TMP1
, 0, TMP1
, 0, SLJIT_IMM
, 0x3f);
3353 OP2(SLJIT_OR
, TMP2
, 0, TMP2
, 0, TMP1
, 0);
3354 OP1(SLJIT_MOV_U8
, TMP1
, 0, SLJIT_MEM1(TMP2
), common
->ctypes
);
3355 sljit_emit_fast_return(compiler
, RETURN_ADDR
, 0);
3358 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_IMM
, 0);
3359 sljit_emit_fast_return(compiler
, RETURN_ADDR
, 0);
3361 /* We only have types for characters less than 256. */
3363 OP1(SLJIT_MOV_U8
, TMP2
, 0, SLJIT_MEM1(TMP2
), (sljit_sw
)PRIV(utf8_table4
) - 0xc0);
3364 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_IMM
, 0);
3365 OP2(SLJIT_ADD
, STR_PTR
, 0, STR_PTR
, 0, TMP2
, 0);
3366 sljit_emit_fast_return(compiler
, RETURN_ADDR
, 0);
3369 #endif /* COMPILE_PCRE8 */
3371 #endif /* SUPPORT_UTF */
3375 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
3376 #define UCD_BLOCK_MASK 127
3377 #define UCD_BLOCK_SHIFT 7
3379 static void do_getucd(compiler_common
*common
)
3381 /* Search the UCD record for the character comes in TMP1.
3382 Returns chartype in TMP1 and UCD offset in TMP2. */
3384 #ifdef COMPILE_PCRE32
3385 struct sljit_jump
*jump
;
3388 #if defined SLJIT_DEBUG && SLJIT_DEBUG
3389 /* dummy_ucd_record */
3390 const ucd_record
*record
= GET_UCD(INVALID_UTF_CHAR
);
3391 SLJIT_ASSERT(record
->script
== ucp_Common
&& record
->chartype
== ucp_Cn
&& record
->gbprop
== ucp_gbOther
);
3392 SLJIT_ASSERT(record
->caseset
== 0 && record
->other_case
== 0);
3395 SLJIT_ASSERT(UCD_BLOCK_SIZE
== 128 && sizeof(ucd_record
) == 8);
3397 sljit_emit_fast_enter(compiler
, RETURN_ADDR
, 0);
3399 #ifdef COMPILE_PCRE32
3402 jump
= CMP(SLJIT_LESS
, TMP1
, 0, SLJIT_IMM
, 0x10ffff + 1);
3403 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_IMM
, INVALID_UTF_CHAR
);
3408 OP2(SLJIT_LSHR
, TMP2
, 0, TMP1
, 0, SLJIT_IMM
, UCD_BLOCK_SHIFT
);
3409 OP1(SLJIT_MOV_U8
, TMP2
, 0, SLJIT_MEM1(TMP2
), (sljit_sw
)PRIV(ucd_stage1
));
3410 OP2(SLJIT_AND
, TMP1
, 0, TMP1
, 0, SLJIT_IMM
, UCD_BLOCK_MASK
);
3411 OP2(SLJIT_SHL
, TMP2
, 0, TMP2
, 0, SLJIT_IMM
, UCD_BLOCK_SHIFT
);
3412 OP2(SLJIT_ADD
, TMP1
, 0, TMP1
, 0, TMP2
, 0);
3413 OP1(SLJIT_MOV
, TMP2
, 0, SLJIT_IMM
, (sljit_sw
)PRIV(ucd_stage2
));
3414 OP1(SLJIT_MOV_U16
, TMP2
, 0, SLJIT_MEM2(TMP2
, TMP1
), 1);
3415 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_IMM
, (sljit_sw
)PRIV(ucd_records
) + SLJIT_OFFSETOF(ucd_record
, chartype
));
3416 OP1(SLJIT_MOV_U8
, TMP1
, 0, SLJIT_MEM2(TMP1
, TMP2
), 3);
3417 sljit_emit_fast_return(compiler
, RETURN_ADDR
, 0);
3421 static SLJIT_INLINE
struct sljit_label
*mainloop_entry(compiler_common
*common
, BOOL hascrorlf
)
3424 struct sljit_label
*mainloop
;
3425 struct sljit_label
*newlinelabel
= NULL
;
3426 struct sljit_jump
*start
;
3427 struct sljit_jump
*end
= NULL
;
3428 struct sljit_jump
*end2
= NULL
;
3429 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3430 struct sljit_jump
*singlechar
;
3432 jump_list
*newline
= NULL
;
3433 BOOL newlinecheck
= FALSE
;
3434 BOOL readuchar
= FALSE
;
3436 if (!(hascrorlf
|| (common
->match_end_ptr
!= 0)) &&
3437 (common
->nltype
== NLTYPE_ANY
|| common
->nltype
== NLTYPE_ANYCRLF
|| common
->newline
> 255))
3438 newlinecheck
= TRUE
;
3440 if (common
->match_end_ptr
!= 0)
3442 /* Search for the end of the first line. */
3443 OP1(SLJIT_MOV
, TMP3
, 0, STR_PTR
, 0);
3445 if (common
->nltype
== NLTYPE_FIXED
&& common
->newline
> 255)
3448 OP2(SLJIT_ADD
, STR_PTR
, 0, STR_PTR
, 0, SLJIT_IMM
, IN_UCHARS(1));
3449 end
= CMP(SLJIT_GREATER_EQUAL
, STR_PTR
, 0, STR_END
, 0);
3450 OP1(MOV_UCHAR
, TMP1
, 0, SLJIT_MEM1(STR_PTR
), IN_UCHARS(-1));
3451 OP1(MOV_UCHAR
, TMP2
, 0, SLJIT_MEM1(STR_PTR
), IN_UCHARS(0));
3452 CMPTO(SLJIT_NOT_EQUAL
, TMP1
, 0, SLJIT_IMM
, (common
->newline
>> 8) & 0xff, mainloop
);
3453 CMPTO(SLJIT_NOT_EQUAL
, TMP2
, 0, SLJIT_IMM
, common
->newline
& 0xff, mainloop
);
3455 OP2(SLJIT_SUB
, SLJIT_MEM1(SLJIT_SP
), common
->match_end_ptr
, STR_PTR
, 0, SLJIT_IMM
, IN_UCHARS(1));
3459 end
= CMP(SLJIT_GREATER_EQUAL
, STR_PTR
, 0, STR_END
, 0);
3461 /* Continual stores does not cause data dependency. */
3462 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), common
->match_end_ptr
, STR_PTR
, 0);
3463 read_char_range(common
, common
->nlmin
, common
->nlmax
, TRUE
);
3464 check_newlinechar(common
, common
->nltype
, &newline
, TRUE
);
3465 CMPTO(SLJIT_LESS
, STR_PTR
, 0, STR_END
, 0, mainloop
);
3467 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), common
->match_end_ptr
, STR_PTR
, 0);
3468 set_jumps(newline
, LABEL());
3471 OP1(SLJIT_MOV
, STR_PTR
, 0, TMP3
, 0);
3474 start
= JUMP(SLJIT_JUMP
);
3478 newlinelabel
= LABEL();
3479 OP2(SLJIT_ADD
, STR_PTR
, 0, STR_PTR
, 0, SLJIT_IMM
, IN_UCHARS(1));
3480 end
= CMP(SLJIT_GREATER_EQUAL
, STR_PTR
, 0, STR_END
, 0);
3481 OP1(MOV_UCHAR
, TMP1
, 0, SLJIT_MEM1(STR_PTR
), 0);
3482 OP2(SLJIT_SUB
| SLJIT_SET_Z
, SLJIT_UNUSED
, 0, TMP1
, 0, SLJIT_IMM
, common
->newline
& 0xff);
3483 OP_FLAGS(SLJIT_MOV
, TMP1
, 0, SLJIT_EQUAL
);
3484 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3485 OP2(SLJIT_SHL
, TMP1
, 0, TMP1
, 0, SLJIT_IMM
, UCHAR_SHIFT
);
3487 OP2(SLJIT_ADD
, STR_PTR
, 0, STR_PTR
, 0, TMP1
, 0);
3488 end2
= JUMP(SLJIT_JUMP
);
3493 /* Increasing the STR_PTR here requires one less jump in the most common case. */
3495 if (common
->utf
) readuchar
= TRUE
;
3497 if (newlinecheck
) readuchar
= TRUE
;
3500 OP1(MOV_UCHAR
, TMP1
, 0, SLJIT_MEM1(STR_PTR
), 0);
3503 CMPTO(SLJIT_EQUAL
, TMP1
, 0, SLJIT_IMM
, (common
->newline
>> 8) & 0xff, newlinelabel
);
3505 OP2(SLJIT_ADD
, STR_PTR
, 0, STR_PTR
, 0, SLJIT_IMM
, IN_UCHARS(1));
3506 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3507 #if defined COMPILE_PCRE8
3510 singlechar
= CMP(SLJIT_LESS
, TMP1
, 0, SLJIT_IMM
, 0xc0);
3511 OP1(SLJIT_MOV_U8
, TMP1
, 0, SLJIT_MEM1(TMP1
), (sljit_sw
)PRIV(utf8_table4
) - 0xc0);
3512 OP2(SLJIT_ADD
, STR_PTR
, 0, STR_PTR
, 0, TMP1
, 0);
3513 JUMPHERE(singlechar
);
3515 #elif defined COMPILE_PCRE16
3518 singlechar
= CMP(SLJIT_LESS
, TMP1
, 0, SLJIT_IMM
, 0xd800);
3519 OP2(SLJIT_AND
, TMP1
, 0, TMP1
, 0, SLJIT_IMM
, 0xfc00);
3520 OP2(SLJIT_SUB
| SLJIT_SET_Z
, SLJIT_UNUSED
, 0, TMP1
, 0, SLJIT_IMM
, 0xd800);
3521 OP_FLAGS(SLJIT_MOV
, TMP1
, 0, SLJIT_EQUAL
);
3522 OP2(SLJIT_SHL
, TMP1
, 0, TMP1
, 0, SLJIT_IMM
, 1);
3523 OP2(SLJIT_ADD
, STR_PTR
, 0, STR_PTR
, 0, TMP1
, 0);
3524 JUMPHERE(singlechar
);
3526 #endif /* COMPILE_PCRE[8|16] */
3527 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
3539 #define MAX_N_CHARS 16
3540 #define MAX_DIFF_CHARS 6
3542 static SLJIT_INLINE
void add_prefix_char(pcre_uchar chr
, pcre_uchar
*chars
)
3557 for (i
= len
; i
> 0; i
--)
3558 if (chars
[i
] == chr
)
3561 if (len
>= MAX_DIFF_CHARS
- 1)
3572 static int scan_prefix(compiler_common
*common
, pcre_uchar
*cc
, pcre_uchar
*chars
, int max_chars
, sljit_u32
*rec_count
)
3574 /* Recursive function, which scans prefix literals. */
3575 BOOL last
, any
, class, caseless
;
3576 int len
, repeat
, len_save
, consumed
= 0;
3577 sljit_u32 chr
; /* Any unicode character. */
3578 sljit_u8
*bytes
, *bytes_end
, byte
;
3579 pcre_uchar
*alternative
, *cc_save
, *oc
;
3580 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3581 pcre_uchar othercase
[8];
3582 #elif defined SUPPORT_UTF && defined COMPILE_PCRE16
3583 pcre_uchar othercase
[2];
3585 pcre_uchar othercase
[1];
3591 if (*rec_count
== 0)
3612 case OP_NOT_WORD_BOUNDARY
:
3613 case OP_WORD_BOUNDARY
:
3620 /* Zero width assertions. */
3627 case OP_ASSERTBACK_NOT
:
3628 cc
= bracketend(cc
);
3644 repeat
= GET2(cc
, 1);
3646 cc
+= 1 + IMM2_SIZE
;
3659 if (common
->utf
&& HAS_EXTRALEN(*cc
)) len
+= GET_EXTRALEN(*cc
);
3661 max_chars
= scan_prefix(common
, cc
+ len
, chars
, max_chars
, rec_count
);
3668 cc
+= 1 + LINK_SIZE
;
3681 alternative
= cc
+ GET(cc
, 1);
3682 while (*alternative
== OP_ALT
)
3684 max_chars
= scan_prefix(common
, alternative
+ 1 + LINK_SIZE
, chars
, max_chars
, rec_count
);
3687 alternative
+= GET(alternative
, 1);
3690 if (*cc
== OP_CBRA
|| *cc
== OP_CBRAPOS
)
3692 cc
+= 1 + LINK_SIZE
;
3696 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3697 if (common
->utf
&& !is_char7_bitset((const sljit_u8
*)(cc
+ 1), FALSE
))
3704 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3705 if (common
->utf
) return consumed
;
3710 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3712 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3713 if (common
->utf
) return consumed
;
3721 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3722 if (common
->utf
&& !is_char7_bitset((const sljit_u8
*)common
->ctypes
- cbit_length
+ cbit_digit
, FALSE
))
3730 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3731 if (common
->utf
&& !is_char7_bitset((const sljit_u8
*)common
->ctypes
- cbit_length
+ cbit_space
, FALSE
))
3739 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3740 if (common
->utf
&& !is_char7_bitset((const sljit_u8
*)common
->ctypes
- cbit_length
+ cbit_word
, FALSE
))
3752 case OP_NOT_WHITESPACE
:
3753 case OP_NOT_WORDCHAR
:
3756 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3757 if (common
->utf
) return consumed
;
3766 #ifndef COMPILE_PCRE32
3767 if (common
->utf
) return consumed
;
3775 repeat
= GET2(cc
, 1);
3776 cc
+= 1 + IMM2_SIZE
;
3781 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3782 if (common
->utf
) return consumed
;
3785 repeat
= GET2(cc
, 1);
3786 cc
+= 1 + IMM2_SIZE
+ 1;
3800 if (--max_chars
== 0)
3802 chars
+= MAX_DIFF_CHARS
;
3804 while (--repeat
> 0);
3812 bytes
= (sljit_u8
*) (cc
+ 1);
3813 cc
+= 1 + 32 / sizeof(pcre_uchar
);
3823 max_chars
= scan_prefix(common
, cc
+ 1, chars
, max_chars
, rec_count
);
3837 repeat
= GET2(cc
, 1);
3845 if (bytes
[31] & 0x80)
3847 else if (chars
[0] != 255)
3849 bytes_end
= bytes
+ 32;
3854 SLJIT_ASSERT((chr
& 0x7) == 0);
3861 if ((byte
& 0x1) != 0)
3862 add_prefix_char(chr
, chars
);
3867 chr
= (chr
+ 7) & ~7;
3870 while (chars
[0] != 255 && bytes
< bytes_end
);
3871 bytes
= bytes_end
- 32;
3875 if (--max_chars
== 0)
3877 chars
+= MAX_DIFF_CHARS
;
3879 while (--repeat
> 0);
3897 if (GET2(cc
, 1) != GET2(cc
, 1 + IMM2_SIZE
))
3899 cc
+= 1 + 2 * IMM2_SIZE
;
3909 if (common
->utf
&& HAS_EXTRALEN(*cc
)) len
+= GET_EXTRALEN(*cc
);
3912 if (caseless
&& char_has_othercase(common
, cc
))
3918 if ((int)PRIV(ord2utf
)(char_othercase(common
, chr
), othercase
) != len
)
3925 othercase
[0] = TABLE_GET(chr
, common
->fcc
, chr
);
3931 othercase
[0] = 0; /* Stops compiler warning - PH */
3942 add_prefix_char(*cc
, chars
);
3945 add_prefix_char(*oc
, chars
);
3949 if (--max_chars
== 0)
3951 chars
+= MAX_DIFF_CHARS
;
3970 #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) && !(defined SUPPORT_VALGRIND)
3972 static sljit_s32
character_to_int32(pcre_uchar chr
)
3974 sljit_s32 value
= (sljit_s32
)chr
;
3975 #if defined COMPILE_PCRE8
3976 #define SSE2_COMPARE_TYPE_INDEX 0
3977 return ((unsigned int)value
<< 24) | ((unsigned int)value
<< 16) | ((unsigned int)value
<< 8) | (unsigned int)value
;
3978 #elif defined COMPILE_PCRE16
3979 #define SSE2_COMPARE_TYPE_INDEX 1
3980 return ((unsigned int)value
<< 16) | value
;
3981 #elif defined COMPILE_PCRE32
3982 #define SSE2_COMPARE_TYPE_INDEX 2
3985 #error "Unsupported unit width"
3989 static SLJIT_INLINE
void fast_forward_first_char2_sse2(compiler_common
*common
, pcre_uchar char1
, pcre_uchar char2
)
3992 struct sljit_label
*start
;
3993 struct sljit_jump
*quit
[3];
3994 struct sljit_jump
*nomatch
;
3995 sljit_u8 instruction
[8];
3996 sljit_s32 tmp1_ind
= sljit_get_register_index(TMP1
);
3997 sljit_s32 tmp2_ind
= sljit_get_register_index(TMP2
);
3998 sljit_s32 str_ptr_ind
= sljit_get_register_index(STR_PTR
);
3999 BOOL load_twice
= FALSE
;
4002 bit
= char1
^ char2
;
4003 if (!is_powerof2(bit
))
4006 if ((char1
!= char2
) && bit
== 0)
4009 quit
[0] = CMP(SLJIT_GREATER_EQUAL
, STR_PTR
, 0, STR_END
, 0);
4011 /* First part (unaligned start) */
4013 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_IMM
, character_to_int32(char1
| bit
));
4015 SLJIT_ASSERT(tmp1_ind
< 8 && tmp2_ind
== 1);
4017 /* MOVD xmm, r/m32 */
4018 instruction
[0] = 0x66;
4019 instruction
[1] = 0x0f;
4020 instruction
[2] = 0x6e;
4021 instruction
[3] = 0xc0 | (2 << 3) | tmp1_ind
;
4022 sljit_emit_op_custom(compiler
, instruction
, 4);
4026 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_IMM
, character_to_int32(bit
!= 0 ? bit
: char2
));
4028 /* MOVD xmm, r/m32 */
4029 instruction
[3] = 0xc0 | (3 << 3) | tmp1_ind
;
4030 sljit_emit_op_custom(compiler
, instruction
, 4);
4033 /* PSHUFD xmm1, xmm2/m128, imm8 */
4034 instruction
[2] = 0x70;
4035 instruction
[3] = 0xc0 | (2 << 3) | 2;
4037 sljit_emit_op_custom(compiler
, instruction
, 5);
4041 /* PSHUFD xmm1, xmm2/m128, imm8 */
4042 instruction
[3] = 0xc0 | (3 << 3) | 3;
4044 sljit_emit_op_custom(compiler
, instruction
, 5);
4047 OP2(SLJIT_AND
, TMP2
, 0, STR_PTR
, 0, SLJIT_IMM
, 0xf);
4048 OP2(SLJIT_AND
, STR_PTR
, 0, STR_PTR
, 0, SLJIT_IMM
, ~0xf);
4050 /* MOVDQA xmm1, xmm2/m128 */
4051 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4053 if (str_ptr_ind
< 8)
4055 instruction
[2] = 0x6f;
4056 instruction
[3] = (0 << 3) | str_ptr_ind
;
4057 sljit_emit_op_custom(compiler
, instruction
, 4);
4061 instruction
[3] = (1 << 3) | str_ptr_ind
;
4062 sljit_emit_op_custom(compiler
, instruction
, 4);
4067 instruction
[1] = 0x41;
4068 instruction
[2] = 0x0f;
4069 instruction
[3] = 0x6f;
4070 instruction
[4] = (0 << 3) | (str_ptr_ind
& 0x7);
4071 sljit_emit_op_custom(compiler
, instruction
, 5);
4075 instruction
[4] = (1 << 3) | str_ptr_ind
;
4076 sljit_emit_op_custom(compiler
, instruction
, 5);
4078 instruction
[1] = 0x0f;
4083 instruction
[2] = 0x6f;
4084 instruction
[3] = (0 << 3) | str_ptr_ind
;
4085 sljit_emit_op_custom(compiler
, instruction
, 4);
4089 instruction
[3] = (1 << 3) | str_ptr_ind
;
4090 sljit_emit_op_custom(compiler
, instruction
, 4);
4097 /* POR xmm1, xmm2/m128 */
4098 instruction
[2] = 0xeb;
4099 instruction
[3] = 0xc0 | (0 << 3) | 3;
4100 sljit_emit_op_custom(compiler
, instruction
, 4);
4103 /* PCMPEQB/W/D xmm1, xmm2/m128 */
4104 instruction
[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX
;
4105 instruction
[3] = 0xc0 | (0 << 3) | 2;
4106 sljit_emit_op_custom(compiler
, instruction
, 4);
4110 instruction
[3] = 0xc0 | (1 << 3) | 3;
4111 sljit_emit_op_custom(compiler
, instruction
, 4);
4114 /* PMOVMSKB reg, xmm */
4115 instruction
[2] = 0xd7;
4116 instruction
[3] = 0xc0 | (tmp1_ind
<< 3) | 0;
4117 sljit_emit_op_custom(compiler
, instruction
, 4);
4121 OP1(SLJIT_MOV
, RETURN_ADDR
, 0, TMP2
, 0);
4122 instruction
[3] = 0xc0 | (tmp2_ind
<< 3) | 1;
4123 sljit_emit_op_custom(compiler
, instruction
, 4);
4125 OP2(SLJIT_OR
, TMP1
, 0, TMP1
, 0, TMP2
, 0);
4126 OP1(SLJIT_MOV
, TMP2
, 0, RETURN_ADDR
, 0);
4129 OP2(SLJIT_ASHR
, TMP1
, 0, TMP1
, 0, TMP2
, 0);
4131 /* BSF r32, r/m32 */
4132 instruction
[0] = 0x0f;
4133 instruction
[1] = 0xbc;
4134 instruction
[2] = 0xc0 | (tmp1_ind
<< 3) | tmp1_ind
;
4135 sljit_emit_op_custom(compiler
, instruction
, 3);
4136 sljit_set_current_flags(compiler
, SLJIT_SET_Z
);
4138 nomatch
= JUMP(SLJIT_ZERO
);
4140 OP2(SLJIT_ADD
, STR_PTR
, 0, STR_PTR
, 0, TMP2
, 0);
4141 OP2(SLJIT_ADD
, STR_PTR
, 0, STR_PTR
, 0, TMP1
, 0);
4142 quit
[1] = JUMP(SLJIT_JUMP
);
4147 OP2(SLJIT_ADD
, STR_PTR
, 0, STR_PTR
, 0, SLJIT_IMM
, 16);
4148 quit
[2] = CMP(SLJIT_GREATER_EQUAL
, STR_PTR
, 0, STR_END
, 0);
4150 /* Second part (aligned) */
4152 instruction
[0] = 0x66;
4153 instruction
[1] = 0x0f;
4155 /* MOVDQA xmm1, xmm2/m128 */
4156 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4158 if (str_ptr_ind
< 8)
4160 instruction
[2] = 0x6f;
4161 instruction
[3] = (0 << 3) | str_ptr_ind
;
4162 sljit_emit_op_custom(compiler
, instruction
, 4);
4166 instruction
[3] = (1 << 3) | str_ptr_ind
;
4167 sljit_emit_op_custom(compiler
, instruction
, 4);
4172 instruction
[1] = 0x41;
4173 instruction
[2] = 0x0f;
4174 instruction
[3] = 0x6f;
4175 instruction
[4] = (0 << 3) | (str_ptr_ind
& 0x7);
4176 sljit_emit_op_custom(compiler
, instruction
, 5);
4180 instruction
[4] = (1 << 3) | str_ptr_ind
;
4181 sljit_emit_op_custom(compiler
, instruction
, 5);
4183 instruction
[1] = 0x0f;
4188 instruction
[2] = 0x6f;
4189 instruction
[3] = (0 << 3) | str_ptr_ind
;
4190 sljit_emit_op_custom(compiler
, instruction
, 4);
4194 instruction
[3] = (1 << 3) | str_ptr_ind
;
4195 sljit_emit_op_custom(compiler
, instruction
, 4);
4202 /* POR xmm1, xmm2/m128 */
4203 instruction
[2] = 0xeb;
4204 instruction
[3] = 0xc0 | (0 << 3) | 3;
4205 sljit_emit_op_custom(compiler
, instruction
, 4);
4208 /* PCMPEQB/W/D xmm1, xmm2/m128 */
4209 instruction
[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX
;
4210 instruction
[3] = 0xc0 | (0 << 3) | 2;
4211 sljit_emit_op_custom(compiler
, instruction
, 4);
4215 instruction
[3] = 0xc0 | (1 << 3) | 3;
4216 sljit_emit_op_custom(compiler
, instruction
, 4);
4219 /* PMOVMSKB reg, xmm */
4220 instruction
[2] = 0xd7;
4221 instruction
[3] = 0xc0 | (tmp1_ind
<< 3) | 0;
4222 sljit_emit_op_custom(compiler
, instruction
, 4);
4226 instruction
[3] = 0xc0 | (tmp2_ind
<< 3) | 1;
4227 sljit_emit_op_custom(compiler
, instruction
, 4);
4229 OP2(SLJIT_OR
, TMP1
, 0, TMP1
, 0, TMP2
, 0);
4232 /* BSF r32, r/m32 */
4233 instruction
[0] = 0x0f;
4234 instruction
[1] = 0xbc;
4235 instruction
[2] = 0xc0 | (tmp1_ind
<< 3) | tmp1_ind
;
4236 sljit_emit_op_custom(compiler
, instruction
, 3);
4237 sljit_set_current_flags(compiler
, SLJIT_SET_Z
);
4239 JUMPTO(SLJIT_ZERO
, start
);
4241 OP2(SLJIT_ADD
, STR_PTR
, 0, STR_PTR
, 0, TMP1
, 0);
4244 SET_LABEL(quit
[0], start
);
4245 SET_LABEL(quit
[1], start
);
4246 SET_LABEL(quit
[2], start
);
4249 #undef SSE2_COMPARE_TYPE_INDEX
4253 static void fast_forward_first_char2(compiler_common
*common
, pcre_uchar char1
, pcre_uchar char2
, sljit_s32 offset
)
4256 struct sljit_label
*start
;
4257 struct sljit_jump
*quit
;
4258 struct sljit_jump
*found
;
4260 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4261 struct sljit_label
*utf_start
= NULL
;
4262 struct sljit_jump
*utf_quit
= NULL
;
4264 BOOL has_match_end
= (common
->match_end_ptr
!= 0);
4267 OP2(SLJIT_ADD
, STR_PTR
, 0, STR_PTR
, 0, SLJIT_IMM
, IN_UCHARS(offset
));
4271 OP1(SLJIT_MOV
, TMP3
, 0, STR_END
, 0);
4273 OP2(SLJIT_ADD
, STR_END
, 0, SLJIT_MEM1(SLJIT_SP
), common
->match_end_ptr
, SLJIT_IMM
, IN_UCHARS(offset
+ 1));
4274 OP2(SLJIT_SUB
| SLJIT_SET_GREATER
, SLJIT_UNUSED
, 0, STR_END
, 0, TMP3
, 0);
4275 sljit_emit_cmov(compiler
, SLJIT_GREATER
, STR_END
, TMP3
, 0);
4278 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4279 if (common
->utf
&& offset
> 0)
4280 utf_start
= LABEL();
4283 #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) && !(defined SUPPORT_VALGRIND)
4285 /* SSE2 accelerated first character search. */
4287 if (sljit_has_cpu_feature(SLJIT_HAS_SSE2
))
4289 fast_forward_first_char2_sse2(common
, char1
, char2
);
4291 SLJIT_ASSERT(common
->mode
== JIT_COMPILE
|| offset
== 0);
4292 if (common
->mode
== JIT_COMPILE
)
4294 /* In complete mode, we don't need to run a match when STR_PTR == STR_END. */
4295 SLJIT_ASSERT(common
->forced_quit_label
== NULL
);
4296 OP1(SLJIT_MOV
, SLJIT_RETURN_REG
, 0, SLJIT_IMM
, PCRE_ERROR_NOMATCH
);
4297 add_jump(compiler
, &common
->forced_quit
, CMP(SLJIT_GREATER_EQUAL
, STR_PTR
, 0, STR_END
, 0));
4299 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4300 if (common
->utf
&& offset
> 0)
4302 SLJIT_ASSERT(common
->mode
== JIT_COMPILE
);
4304 OP1(MOV_UCHAR
, TMP1
, 0, SLJIT_MEM1(STR_PTR
), IN_UCHARS(-offset
));
4305 OP2(SLJIT_ADD
, STR_PTR
, 0, STR_PTR
, 0, SLJIT_IMM
, IN_UCHARS(1));
4306 #if defined COMPILE_PCRE8
4307 OP2(SLJIT_AND
, TMP1
, 0, TMP1
, 0, SLJIT_IMM
, 0xc0);
4308 CMPTO(SLJIT_EQUAL
, TMP1
, 0, SLJIT_IMM
, 0x80, utf_start
);
4309 #elif defined COMPILE_PCRE16
4310 OP2(SLJIT_AND
, TMP1
, 0, TMP1
, 0, SLJIT_IMM
, 0xfc00);
4311 CMPTO(SLJIT_EQUAL
, TMP1
, 0, SLJIT_IMM
, 0xdc00, utf_start
);
4313 #error "Unknown code width"
4315 OP2(SLJIT_SUB
, STR_PTR
, 0, STR_PTR
, 0, SLJIT_IMM
, IN_UCHARS(1));
4320 OP2(SLJIT_SUB
, STR_PTR
, 0, STR_PTR
, 0, SLJIT_IMM
, IN_UCHARS(offset
));
4324 OP2(SLJIT_SUB
| SLJIT_SET_GREATER_EQUAL
, SLJIT_UNUSED
, 0, STR_PTR
, 0, STR_END
, 0);
4327 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(SLJIT_SP
), common
->match_end_ptr
);
4328 sljit_emit_cmov(compiler
, SLJIT_GREATER_EQUAL
, STR_PTR
, TMP1
, 0);
4331 sljit_emit_cmov(compiler
, SLJIT_GREATER_EQUAL
, STR_PTR
, STR_END
, 0);
4335 OP1(SLJIT_MOV
, STR_END
, 0, TMP3
, 0);
4341 quit
= CMP(SLJIT_GREATER_EQUAL
, STR_PTR
, 0, STR_END
, 0);
4344 OP1(MOV_UCHAR
, TMP1
, 0, SLJIT_MEM1(STR_PTR
), 0);
4347 found
= CMP(SLJIT_EQUAL
, TMP1
, 0, SLJIT_IMM
, char1
);
4350 mask
= char1
^ char2
;
4351 if (is_powerof2(mask
))
4353 OP2(SLJIT_OR
, TMP1
, 0, TMP1
, 0, SLJIT_IMM
, mask
);
4354 found
= CMP(SLJIT_EQUAL
, TMP1
, 0, SLJIT_IMM
, char1
| mask
);
4358 OP2(SLJIT_SUB
| SLJIT_SET_Z
, SLJIT_UNUSED
, 0, TMP1
, 0, SLJIT_IMM
, char1
);
4359 OP_FLAGS(SLJIT_MOV
, TMP2
, 0, SLJIT_EQUAL
);
4360 OP2(SLJIT_SUB
| SLJIT_SET_Z
, SLJIT_UNUSED
, 0, TMP1
, 0, SLJIT_IMM
, char2
);
4361 OP_FLAGS(SLJIT_OR
| SLJIT_SET_Z
, TMP2
, 0, SLJIT_EQUAL
);
4362 found
= JUMP(SLJIT_NOT_ZERO
);
4366 OP2(SLJIT_ADD
, STR_PTR
, 0, STR_PTR
, 0, SLJIT_IMM
, IN_UCHARS(1));
4367 CMPTO(SLJIT_LESS
, STR_PTR
, 0, STR_END
, 0, start
);
4369 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4370 if (common
->utf
&& offset
> 0)
4371 utf_quit
= JUMP(SLJIT_JUMP
);
4376 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4377 if (common
->utf
&& offset
> 0)
4379 OP1(MOV_UCHAR
, TMP1
, 0, SLJIT_MEM1(STR_PTR
), IN_UCHARS(-offset
));
4380 OP2(SLJIT_ADD
, STR_PTR
, 0, STR_PTR
, 0, SLJIT_IMM
, IN_UCHARS(1));
4381 #if defined COMPILE_PCRE8
4382 OP2(SLJIT_AND
, TMP1
, 0, TMP1
, 0, SLJIT_IMM
, 0xc0);
4383 CMPTO(SLJIT_EQUAL
, TMP1
, 0, SLJIT_IMM
, 0x80, utf_start
);
4384 #elif defined COMPILE_PCRE16
4385 OP2(SLJIT_AND
, TMP1
, 0, TMP1
, 0, SLJIT_IMM
, 0xfc00);
4386 CMPTO(SLJIT_EQUAL
, TMP1
, 0, SLJIT_IMM
, 0xdc00, utf_start
);
4388 #error "Unknown code width"
4390 OP2(SLJIT_SUB
, STR_PTR
, 0, STR_PTR
, 0, SLJIT_IMM
, IN_UCHARS(1));
4399 quit
= CMP(SLJIT_LESS
, STR_PTR
, 0, STR_END
, 0);
4400 OP1(SLJIT_MOV
, STR_PTR
, 0, SLJIT_MEM1(SLJIT_SP
), common
->match_end_ptr
);
4402 OP2(SLJIT_ADD
, STR_PTR
, 0, STR_PTR
, 0, SLJIT_IMM
, IN_UCHARS(offset
));
4404 OP1(SLJIT_MOV
, STR_END
, 0, TMP3
, 0);
4408 OP2(SLJIT_SUB
, STR_PTR
, 0, STR_PTR
, 0, SLJIT_IMM
, IN_UCHARS(offset
));
4411 static SLJIT_INLINE BOOL
fast_forward_first_n_chars(compiler_common
*common
)
4414 struct sljit_label
*start
;
4415 struct sljit_jump
*quit
;
4416 struct sljit_jump
*match
;
4417 /* bytes[0] represent the number of characters between 0
4418 and MAX_N_BYTES - 1, 255 represents any character. */
4419 pcre_uchar chars
[MAX_N_CHARS
* MAX_DIFF_CHARS
];
4422 pcre_uchar
*char_set
, *char_set_end
;
4424 int range_right
= -1, range_len
;
4425 sljit_u8
*update_table
= NULL
;
4427 sljit_u32 rec_count
;
4429 for (i
= 0; i
< MAX_N_CHARS
; i
++)
4430 chars
[i
* MAX_DIFF_CHARS
] = 0;
4433 max
= scan_prefix(common
, common
->start
, chars
, MAX_N_CHARS
, &rec_count
);
4439 /* Prevent compiler "uninitialized" warning */
4441 range_len
= 4 /* minimum length */ - 1;
4442 for (i
= 0; i
<= max
; i
++)
4444 if (in_range
&& (i
- from
) > range_len
&& (chars
[(i
- 1) * MAX_DIFF_CHARS
] < 255))
4446 range_len
= i
- from
;
4447 range_right
= i
- 1;
4450 if (i
< max
&& chars
[i
* MAX_DIFF_CHARS
] < 255)
4452 SLJIT_ASSERT(chars
[i
* MAX_DIFF_CHARS
] > 0);
4463 if (range_right
>= 0)
4465 update_table
= (sljit_u8
*)allocate_read_only_data(common
, 256);
4466 if (update_table
== NULL
)
4468 memset(update_table
, IN_UCHARS(range_len
), 256);
4470 for (i
= 0; i
< range_len
; i
++)
4472 char_set
= chars
+ ((range_right
- i
) * MAX_DIFF_CHARS
);
4473 SLJIT_ASSERT(char_set
[0] > 0 && char_set
[0] < 255);
4474 char_set_end
= char_set
+ char_set
[0];
4476 while (char_set
<= char_set_end
)
4478 if (update_table
[(*char_set
) & 0xff] > IN_UCHARS(i
))
4479 update_table
[(*char_set
) & 0xff] = IN_UCHARS(i
);
4487 for (i
= 0; i
< max
; i
++)
4491 if (chars
[i
* MAX_DIFF_CHARS
] <= 2)
4494 else if (chars
[offset
* MAX_DIFF_CHARS
] == 2 && chars
[i
* MAX_DIFF_CHARS
] <= 2)
4496 if (chars
[i
* MAX_DIFF_CHARS
] == 1)
4500 mask
= chars
[offset
* MAX_DIFF_CHARS
+ 1] ^ chars
[offset
* MAX_DIFF_CHARS
+ 2];
4501 if (!is_powerof2(mask
))
4503 mask
= chars
[i
* MAX_DIFF_CHARS
+ 1] ^ chars
[i
* MAX_DIFF_CHARS
+ 2];
4504 if (is_powerof2(mask
))
4511 if (range_right
< 0)
4515 SLJIT_ASSERT(chars
[offset
* MAX_DIFF_CHARS
] >= 1 && chars
[offset
* MAX_DIFF_CHARS
] <= 2);
4516 /* Works regardless the value is 1 or 2. */
4517 mask
= chars
[offset
* MAX_DIFF_CHARS
+ chars
[offset
* MAX_DIFF_CHARS
]];
4518 fast_forward_first_char2(common
, chars
[offset
* MAX_DIFF_CHARS
+ 1], mask
, offset
);
4522 if (range_right
== offset
)
4525 SLJIT_ASSERT(offset
== -1 || (chars
[offset
* MAX_DIFF_CHARS
] >= 1 && chars
[offset
* MAX_DIFF_CHARS
] <= 2));
4528 SLJIT_ASSERT(max
> 0);
4529 if (common
->match_end_ptr
!= 0)
4531 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(SLJIT_SP
), common
->match_end_ptr
);
4532 OP1(SLJIT_MOV
, TMP3
, 0, STR_END
, 0);
4533 OP2(SLJIT_SUB
, STR_END
, 0, STR_END
, 0, SLJIT_IMM
, IN_UCHARS(max
));
4534 quit
= CMP(SLJIT_LESS_EQUAL
, STR_END
, 0, TMP1
, 0);
4535 OP1(SLJIT_MOV
, STR_END
, 0, TMP1
, 0);
4539 OP2(SLJIT_SUB
, STR_END
, 0, STR_END
, 0, SLJIT_IMM
, IN_UCHARS(max
));
4541 SLJIT_ASSERT(range_right
>= 0);
4543 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
4544 OP1(SLJIT_MOV
, RETURN_ADDR
, 0, SLJIT_IMM
, (sljit_sw
)update_table
);
4548 quit
= CMP(SLJIT_GREATER_EQUAL
, STR_PTR
, 0, STR_END
, 0);
4550 #if defined COMPILE_PCRE8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
4551 OP1(SLJIT_MOV_U8
, TMP1
, 0, SLJIT_MEM1(STR_PTR
), IN_UCHARS(range_right
));
4553 OP1(SLJIT_MOV_U8
, TMP1
, 0, SLJIT_MEM1(STR_PTR
), IN_UCHARS(range_right
+ 1) - 1);
4556 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
4557 OP1(SLJIT_MOV_U8
, TMP1
, 0, SLJIT_MEM2(RETURN_ADDR
, TMP1
), 0);
4559 OP1(SLJIT_MOV_U8
, TMP1
, 0, SLJIT_MEM1(TMP1
), (sljit_sw
)update_table
);
4561 OP2(SLJIT_ADD
, STR_PTR
, 0, STR_PTR
, 0, TMP1
, 0);
4562 CMPTO(SLJIT_NOT_EQUAL
, TMP1
, 0, SLJIT_IMM
, 0, start
);
4566 OP1(MOV_UCHAR
, TMP1
, 0, SLJIT_MEM1(STR_PTR
), IN_UCHARS(offset
));
4567 OP2(SLJIT_ADD
, STR_PTR
, 0, STR_PTR
, 0, SLJIT_IMM
, IN_UCHARS(1));
4569 if (chars
[offset
* MAX_DIFF_CHARS
] == 1)
4570 CMPTO(SLJIT_NOT_EQUAL
, TMP1
, 0, SLJIT_IMM
, chars
[offset
* MAX_DIFF_CHARS
+ 1], start
);
4573 mask
= chars
[offset
* MAX_DIFF_CHARS
+ 1] ^ chars
[offset
* MAX_DIFF_CHARS
+ 2];
4574 if (is_powerof2(mask
))
4576 OP2(SLJIT_OR
, TMP1
, 0, TMP1
, 0, SLJIT_IMM
, mask
);
4577 CMPTO(SLJIT_NOT_EQUAL
, TMP1
, 0, SLJIT_IMM
, chars
[offset
* MAX_DIFF_CHARS
+ 1] | mask
, start
);
4581 match
= CMP(SLJIT_EQUAL
, TMP1
, 0, SLJIT_IMM
, chars
[offset
* MAX_DIFF_CHARS
+ 1]);
4582 CMPTO(SLJIT_NOT_EQUAL
, TMP1
, 0, SLJIT_IMM
, chars
[offset
* MAX_DIFF_CHARS
+ 2], start
);
4588 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4589 if (common
->utf
&& offset
!= 0)
4593 OP1(MOV_UCHAR
, TMP1
, 0, SLJIT_MEM1(STR_PTR
), 0);
4594 OP2(SLJIT_ADD
, STR_PTR
, 0, STR_PTR
, 0, SLJIT_IMM
, IN_UCHARS(1));
4597 OP1(MOV_UCHAR
, TMP1
, 0, SLJIT_MEM1(STR_PTR
), IN_UCHARS(-1));
4598 #if defined COMPILE_PCRE8
4599 OP2(SLJIT_AND
, TMP1
, 0, TMP1
, 0, SLJIT_IMM
, 0xc0);
4600 CMPTO(SLJIT_EQUAL
, TMP1
, 0, SLJIT_IMM
, 0x80, start
);
4601 #elif defined COMPILE_PCRE16
4602 OP2(SLJIT_AND
, TMP1
, 0, TMP1
, 0, SLJIT_IMM
, 0xfc00);
4603 CMPTO(SLJIT_EQUAL
, TMP1
, 0, SLJIT_IMM
, 0xdc00, start
);
4605 #error "Unknown code width"
4608 OP2(SLJIT_SUB
, STR_PTR
, 0, STR_PTR
, 0, SLJIT_IMM
, IN_UCHARS(1));
4613 OP2(SLJIT_SUB
, STR_PTR
, 0, STR_PTR
, 0, SLJIT_IMM
, IN_UCHARS(1));
4617 if (common
->match_end_ptr
!= 0)
4619 if (range_right
>= 0)
4620 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(SLJIT_SP
), common
->match_end_ptr
);
4621 OP1(SLJIT_MOV
, STR_END
, 0, TMP3
, 0);
4622 if (range_right
>= 0)
4624 quit
= CMP(SLJIT_LESS_EQUAL
, STR_PTR
, 0, TMP1
, 0);
4625 OP1(SLJIT_MOV
, STR_PTR
, 0, TMP1
, 0);
4630 OP2(SLJIT_ADD
, STR_END
, 0, STR_END
, 0, SLJIT_IMM
, IN_UCHARS(max
));
4635 #undef MAX_DIFF_CHARS
4637 static SLJIT_INLINE
void fast_forward_first_char(compiler_common
*common
, pcre_uchar first_char
, BOOL caseless
)
4644 oc
= TABLE_GET(first_char
, common
->fcc
, first_char
);
4645 #if defined SUPPORT_UCP && !defined COMPILE_PCRE8
4646 if (first_char
> 127 && common
->utf
)
4647 oc
= UCD_OTHERCASE(first_char
);
4651 fast_forward_first_char2(common
, first_char
, oc
, 0);
4654 static SLJIT_INLINE
void fast_forward_newline(compiler_common
*common
)
4657 struct sljit_label
*loop
;
4658 struct sljit_jump
*lastchar
;
4659 struct sljit_jump
*firstchar
;
4660 struct sljit_jump
*quit
;
4661 struct sljit_jump
*foundcr
= NULL
;
4662 struct sljit_jump
*notfoundnl
;
4663 jump_list
*newline
= NULL
;
4665 if (common
->match_end_ptr
!= 0)
4667 OP1(SLJIT_MOV
, TMP3
, 0, STR_END
, 0);
4668 OP1(SLJIT_MOV
, STR_END
, 0, SLJIT_MEM1(SLJIT_SP
), common
->match_end_ptr
);
4671 if (common
->nltype
== NLTYPE_FIXED
&& common
->newline
> 255)
4673 lastchar
= CMP(SLJIT_GREATER_EQUAL
, STR_PTR
, 0, STR_END
, 0);
4674 OP1(SLJIT_MOV
, TMP1
, 0, ARGUMENTS
, 0);
4675 OP1(SLJIT_MOV
, TMP2
, 0, SLJIT_MEM1(TMP1
), SLJIT_OFFSETOF(jit_arguments
, str
));
4676 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(TMP1
), SLJIT_OFFSETOF(jit_arguments
, begin
));
4677 firstchar
= CMP(SLJIT_LESS_EQUAL
, STR_PTR
, 0, TMP2
, 0);
4679 OP2(SLJIT_ADD
, TMP1
, 0, TMP1
, 0, SLJIT_IMM
, IN_UCHARS(2));
4680 OP2(SLJIT_SUB
| SLJIT_SET_GREATER_EQUAL
, SLJIT_UNUSED
, 0, STR_PTR
, 0, TMP1
, 0);
4681 OP_FLAGS(SLJIT_MOV
, TMP2
, 0, SLJIT_GREATER_EQUAL
);
4682 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4683 OP2(SLJIT_SHL
, TMP2
, 0, TMP2
, 0, SLJIT_IMM
, UCHAR_SHIFT
);
4685 OP2(SLJIT_SUB
, STR_PTR
, 0, STR_PTR
, 0, TMP2
, 0);
4688 OP2(SLJIT_ADD
, STR_PTR
, 0, STR_PTR
, 0, SLJIT_IMM
, IN_UCHARS(1));
4689 quit
= CMP(SLJIT_GREATER_EQUAL
, STR_PTR
, 0, STR_END
, 0);
4690 OP1(MOV_UCHAR
, TMP1
, 0, SLJIT_MEM1(STR_PTR
), IN_UCHARS(-2));
4691 OP1(MOV_UCHAR
, TMP2
, 0, SLJIT_MEM1(STR_PTR
), IN_UCHARS(-1));
4692 CMPTO(SLJIT_NOT_EQUAL
, TMP1
, 0, SLJIT_IMM
, (common
->newline
>> 8) & 0xff, loop
);
4693 CMPTO(SLJIT_NOT_EQUAL
, TMP2
, 0, SLJIT_IMM
, common
->newline
& 0xff, loop
);
4696 JUMPHERE(firstchar
);
4699 if (common
->match_end_ptr
!= 0)
4700 OP1(SLJIT_MOV
, STR_END
, 0, TMP3
, 0);
4704 OP1(SLJIT_MOV
, TMP1
, 0, ARGUMENTS
, 0);
4705 OP1(SLJIT_MOV
, TMP2
, 0, SLJIT_MEM1(TMP1
), SLJIT_OFFSETOF(jit_arguments
, str
));
4706 firstchar
= CMP(SLJIT_LESS_EQUAL
, STR_PTR
, 0, TMP2
, 0);
4707 skip_char_back(common
);
4710 common
->ff_newline_shortcut
= loop
;
4712 read_char_range(common
, common
->nlmin
, common
->nlmax
, TRUE
);
4713 lastchar
= CMP(SLJIT_GREATER_EQUAL
, STR_PTR
, 0, STR_END
, 0);
4714 if (common
->nltype
== NLTYPE_ANY
|| common
->nltype
== NLTYPE_ANYCRLF
)
4715 foundcr
= CMP(SLJIT_EQUAL
, TMP1
, 0, SLJIT_IMM
, CHAR_CR
);
4716 check_newlinechar(common
, common
->nltype
, &newline
, FALSE
);
4717 set_jumps(newline
, loop
);
4719 if (common
->nltype
== NLTYPE_ANY
|| common
->nltype
== NLTYPE_ANYCRLF
)
4721 quit
= JUMP(SLJIT_JUMP
);
4723 notfoundnl
= CMP(SLJIT_GREATER_EQUAL
, STR_PTR
, 0, STR_END
, 0);
4724 OP1(MOV_UCHAR
, TMP1
, 0, SLJIT_MEM1(STR_PTR
), 0);
4725 OP2(SLJIT_SUB
| SLJIT_SET_Z
, SLJIT_UNUSED
, 0, TMP1
, 0, SLJIT_IMM
, CHAR_NL
);
4726 OP_FLAGS(SLJIT_MOV
, TMP1
, 0, SLJIT_EQUAL
);
4727 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4728 OP2(SLJIT_SHL
, TMP1
, 0, TMP1
, 0, SLJIT_IMM
, UCHAR_SHIFT
);
4730 OP2(SLJIT_ADD
, STR_PTR
, 0, STR_PTR
, 0, TMP1
, 0);
4731 JUMPHERE(notfoundnl
);
4735 JUMPHERE(firstchar
);
4737 if (common
->match_end_ptr
!= 0)
4738 OP1(SLJIT_MOV
, STR_END
, 0, TMP3
, 0);
4741 static BOOL
check_class_ranges(compiler_common
*common
, const sljit_u8
*bits
, BOOL nclass
, BOOL invert
, jump_list
**backtracks
);
4743 static SLJIT_INLINE
void fast_forward_start_bits(compiler_common
*common
, const sljit_u8
*start_bits
)
4746 struct sljit_label
*start
;
4747 struct sljit_jump
*quit
;
4748 struct sljit_jump
*found
= NULL
;
4749 jump_list
*matches
= NULL
;
4750 #ifndef COMPILE_PCRE8
4751 struct sljit_jump
*jump
;
4754 if (common
->match_end_ptr
!= 0)
4756 OP1(SLJIT_MOV
, RETURN_ADDR
, 0, STR_END
, 0);
4757 OP1(SLJIT_MOV
, STR_END
, 0, SLJIT_MEM1(SLJIT_SP
), common
->match_end_ptr
);
4761 quit
= CMP(SLJIT_GREATER_EQUAL
, STR_PTR
, 0, STR_END
, 0);
4762 OP1(MOV_UCHAR
, TMP1
, 0, SLJIT_MEM1(STR_PTR
), 0);
4765 OP1(SLJIT_MOV
, TMP3
, 0, TMP1
, 0);
4768 if (!check_class_ranges(common
, start_bits
, (start_bits
[31] & 0x80) != 0, TRUE
, &matches
))
4770 #ifndef COMPILE_PCRE8
4771 jump
= CMP(SLJIT_LESS
, TMP1
, 0, SLJIT_IMM
, 255);
4772 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_IMM
, 255);
4775 OP2(SLJIT_AND
, TMP2
, 0, TMP1
, 0, SLJIT_IMM
, 0x7);
4776 OP2(SLJIT_LSHR
, TMP1
, 0, TMP1
, 0, SLJIT_IMM
, 3);
4777 OP1(SLJIT_MOV_U8
, TMP1
, 0, SLJIT_MEM1(TMP1
), (sljit_sw
)start_bits
);
4778 OP2(SLJIT_SHL
, TMP2
, 0, SLJIT_IMM
, 1, TMP2
, 0);
4779 OP2(SLJIT_AND
| SLJIT_SET_Z
, SLJIT_UNUSED
, 0, TMP1
, 0, TMP2
, 0);
4780 found
= JUMP(SLJIT_NOT_ZERO
);
4785 OP1(SLJIT_MOV
, TMP1
, 0, TMP3
, 0);
4787 OP2(SLJIT_ADD
, STR_PTR
, 0, STR_PTR
, 0, SLJIT_IMM
, IN_UCHARS(1));
4789 #if defined COMPILE_PCRE8
4792 CMPTO(SLJIT_LESS
, TMP1
, 0, SLJIT_IMM
, 0xc0, start
);
4793 OP1(SLJIT_MOV_U8
, TMP1
, 0, SLJIT_MEM1(TMP1
), (sljit_sw
)PRIV(utf8_table4
) - 0xc0);
4794 OP2(SLJIT_ADD
, STR_PTR
, 0, STR_PTR
, 0, TMP1
, 0);
4796 #elif defined COMPILE_PCRE16
4799 CMPTO(SLJIT_LESS
, TMP1
, 0, SLJIT_IMM
, 0xd800, start
);
4800 OP2(SLJIT_AND
, TMP1
, 0, TMP1
, 0, SLJIT_IMM
, 0xfc00);
4801 OP2(SLJIT_SUB
| SLJIT_SET_Z
, SLJIT_UNUSED
, 0, TMP1
, 0, SLJIT_IMM
, 0xd800);
4802 OP_FLAGS(SLJIT_MOV
, TMP1
, 0, SLJIT_EQUAL
);
4803 OP2(SLJIT_SHL
, TMP1
, 0, TMP1
, 0, SLJIT_IMM
, 1);
4804 OP2(SLJIT_ADD
, STR_PTR
, 0, STR_PTR
, 0, TMP1
, 0);
4806 #endif /* COMPILE_PCRE[8|16] */
4807 #endif /* SUPPORT_UTF */
4808 JUMPTO(SLJIT_JUMP
, start
);
4811 if (matches
!= NULL
)
4812 set_jumps(matches
, LABEL());
4815 if (common
->match_end_ptr
!= 0)
4816 OP1(SLJIT_MOV
, STR_END
, 0, RETURN_ADDR
, 0);
4819 static SLJIT_INLINE
struct sljit_jump
*search_requested_char(compiler_common
*common
, pcre_uchar req_char
, BOOL caseless
, BOOL has_firstchar
)
4822 struct sljit_label
*loop
;
4823 struct sljit_jump
*toolong
;
4824 struct sljit_jump
*alreadyfound
;
4825 struct sljit_jump
*found
;
4826 struct sljit_jump
*foundoc
= NULL
;
4827 struct sljit_jump
*notfound
;
4830 SLJIT_ASSERT(common
->req_char_ptr
!= 0);
4831 OP1(SLJIT_MOV
, TMP2
, 0, SLJIT_MEM1(SLJIT_SP
), common
->req_char_ptr
);
4832 OP2(SLJIT_ADD
, TMP1
, 0, STR_PTR
, 0, SLJIT_IMM
, REQ_BYTE_MAX
);
4833 toolong
= CMP(SLJIT_LESS
, TMP1
, 0, STR_END
, 0);
4834 alreadyfound
= CMP(SLJIT_LESS
, STR_PTR
, 0, TMP2
, 0);
4837 OP2(SLJIT_ADD
, TMP1
, 0, STR_PTR
, 0, SLJIT_IMM
, IN_UCHARS(1));
4839 OP1(SLJIT_MOV
, TMP1
, 0, STR_PTR
, 0);
4842 notfound
= CMP(SLJIT_GREATER_EQUAL
, TMP1
, 0, STR_END
, 0);
4844 OP1(MOV_UCHAR
, TMP2
, 0, SLJIT_MEM1(TMP1
), 0);
4848 oc
= TABLE_GET(req_char
, common
->fcc
, req_char
);
4849 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
4850 if (req_char
> 127 && common
->utf
)
4851 oc
= UCD_OTHERCASE(req_char
);
4855 found
= CMP(SLJIT_EQUAL
, TMP2
, 0, SLJIT_IMM
, req_char
);
4858 bit
= req_char
^ oc
;
4859 if (is_powerof2(bit
))
4861 OP2(SLJIT_OR
, TMP2
, 0, TMP2
, 0, SLJIT_IMM
, bit
);
4862 found
= CMP(SLJIT_EQUAL
, TMP2
, 0, SLJIT_IMM
, req_char
| bit
);
4866 found
= CMP(SLJIT_EQUAL
, TMP2
, 0, SLJIT_IMM
, req_char
);
4867 foundoc
= CMP(SLJIT_EQUAL
, TMP2
, 0, SLJIT_IMM
, oc
);
4870 OP2(SLJIT_ADD
, TMP1
, 0, TMP1
, 0, SLJIT_IMM
, IN_UCHARS(1));
4871 JUMPTO(SLJIT_JUMP
, loop
);
4876 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), common
->req_char_ptr
, TMP1
, 0);
4877 JUMPHERE(alreadyfound
);
4882 static void do_revertframes(compiler_common
*common
)
4885 struct sljit_jump
*jump
;
4886 struct sljit_label
*mainloop
;
4888 sljit_emit_fast_enter(compiler
, RETURN_ADDR
, 0);
4889 OP1(SLJIT_MOV
, TMP3
, 0, STACK_TOP
, 0);
4890 GET_LOCAL_BASE(TMP1
, 0, 0);
4892 /* Drop frames until we reach STACK_TOP. */
4894 OP1(SLJIT_MOV
, TMP2
, 0, SLJIT_MEM1(STACK_TOP
), -sizeof(sljit_sw
));
4895 jump
= CMP(SLJIT_SIG_LESS_EQUAL
, TMP2
, 0, SLJIT_IMM
, 0);
4897 OP2(SLJIT_ADD
, TMP2
, 0, TMP2
, 0, TMP1
, 0);
4898 OP1(SLJIT_MOV
, SLJIT_MEM1(TMP2
), 0, SLJIT_MEM1(STACK_TOP
), -2 * sizeof(sljit_sw
));
4899 OP1(SLJIT_MOV
, SLJIT_MEM1(TMP2
), sizeof(sljit_sw
), SLJIT_MEM1(STACK_TOP
), -3 * sizeof(sljit_sw
));
4900 OP2(SLJIT_SUB
, STACK_TOP
, 0, STACK_TOP
, 0, SLJIT_IMM
, 3 * sizeof(sljit_sw
));
4901 JUMPTO(SLJIT_JUMP
, mainloop
);
4904 jump
= CMP(SLJIT_NOT_ZERO
/* SIG_LESS */, TMP2
, 0, SLJIT_IMM
, 0);
4905 /* End of reverting values. */
4906 OP1(SLJIT_MOV
, STACK_TOP
, 0, TMP3
, 0);
4907 sljit_emit_fast_return(compiler
, RETURN_ADDR
, 0);
4910 OP1(SLJIT_NEG
, TMP2
, 0, TMP2
, 0);
4911 OP2(SLJIT_ADD
, TMP2
, 0, TMP2
, 0, TMP1
, 0);
4912 OP1(SLJIT_MOV
, SLJIT_MEM1(TMP2
), 0, SLJIT_MEM1(STACK_TOP
), -2 * sizeof(sljit_sw
));
4913 OP2(SLJIT_SUB
, STACK_TOP
, 0, STACK_TOP
, 0, SLJIT_IMM
, 2 * sizeof(sljit_sw
));
4914 JUMPTO(SLJIT_JUMP
, mainloop
);
4917 static void check_wordboundary(compiler_common
*common
)
4920 struct sljit_jump
*skipread
;
4921 jump_list
*skipread_list
= NULL
;
4922 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
4923 struct sljit_jump
*jump
;
4926 SLJIT_COMPILE_ASSERT(ctype_word
== 0x10, ctype_word_must_be_16
);
4928 sljit_emit_fast_enter(compiler
, SLJIT_MEM1(SLJIT_SP
), LOCALS0
);
4929 /* Get type of the previous char, and put it to LOCALS1. */
4930 OP1(SLJIT_MOV
, TMP1
, 0, ARGUMENTS
, 0);
4931 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(TMP1
), SLJIT_OFFSETOF(jit_arguments
, begin
));
4932 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), LOCALS1
, SLJIT_IMM
, 0);
4933 skipread
= CMP(SLJIT_LESS_EQUAL
, STR_PTR
, 0, TMP1
, 0);
4934 skip_char_back(common
);
4935 check_start_used_ptr(common
);
4938 /* Testing char type. */
4940 if (common
->use_ucp
)
4942 OP1(SLJIT_MOV
, TMP2
, 0, SLJIT_IMM
, 1);
4943 jump
= CMP(SLJIT_EQUAL
, TMP1
, 0, SLJIT_IMM
, CHAR_UNDERSCORE
);
4944 add_jump(compiler
, &common
->getucd
, JUMP(SLJIT_FAST_CALL
));
4945 OP2(SLJIT_SUB
, TMP1
, 0, TMP1
, 0, SLJIT_IMM
, ucp_Ll
);
4946 OP2(SLJIT_SUB
| SLJIT_SET_LESS_EQUAL
, SLJIT_UNUSED
, 0, TMP1
, 0, SLJIT_IMM
, ucp_Lu
- ucp_Ll
);
4947 OP_FLAGS(SLJIT_MOV
, TMP2
, 0, SLJIT_LESS_EQUAL
);
4948 OP2(SLJIT_SUB
, TMP1
, 0, TMP1
, 0, SLJIT_IMM
, ucp_Nd
- ucp_Ll
);
4949 OP2(SLJIT_SUB
| SLJIT_SET_LESS_EQUAL
, SLJIT_UNUSED
, 0, TMP1
, 0, SLJIT_IMM
, ucp_No
- ucp_Nd
);
4950 OP_FLAGS(SLJIT_OR
, TMP2
, 0, SLJIT_LESS_EQUAL
);
4952 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), LOCALS1
, TMP2
, 0);
4957 #ifndef COMPILE_PCRE8
4958 jump
= CMP(SLJIT_GREATER
, TMP1
, 0, SLJIT_IMM
, 255);
4959 #elif defined SUPPORT_UTF
4960 /* Here LOCALS1 has already been zeroed. */
4963 jump
= CMP(SLJIT_GREATER
, TMP1
, 0, SLJIT_IMM
, 255);
4964 #endif /* COMPILE_PCRE8 */
4965 OP1(SLJIT_MOV_U8
, TMP1
, 0, SLJIT_MEM1(TMP1
), common
->ctypes
);
4966 OP2(SLJIT_LSHR
, TMP1
, 0, TMP1
, 0, SLJIT_IMM
, 4 /* ctype_word */);
4967 OP2(SLJIT_AND
, TMP1
, 0, TMP1
, 0, SLJIT_IMM
, 1);
4968 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), LOCALS1
, TMP1
, 0);
4969 #ifndef COMPILE_PCRE8
4971 #elif defined SUPPORT_UTF
4974 #endif /* COMPILE_PCRE8 */
4978 OP1(SLJIT_MOV
, TMP2
, 0, SLJIT_IMM
, 0);
4979 check_str_end(common
, &skipread_list
);
4980 peek_char(common
, READ_CHAR_MAX
);
4982 /* Testing char type. This is a code duplication. */
4984 if (common
->use_ucp
)
4986 OP1(SLJIT_MOV
, TMP2
, 0, SLJIT_IMM
, 1);
4987 jump
= CMP(SLJIT_EQUAL
, TMP1
, 0, SLJIT_IMM
, CHAR_UNDERSCORE
);
4988 add_jump(compiler
, &common
->getucd
, JUMP(SLJIT_FAST_CALL
));
4989 OP2(SLJIT_SUB
, TMP1
, 0, TMP1
, 0, SLJIT_IMM
, ucp_Ll
);
4990 OP2(SLJIT_SUB
| SLJIT_SET_LESS_EQUAL
, SLJIT_UNUSED
, 0, TMP1
, 0, SLJIT_IMM
, ucp_Lu
- ucp_Ll
);
4991 OP_FLAGS(SLJIT_MOV
, TMP2
, 0, SLJIT_LESS_EQUAL
);
4992 OP2(SLJIT_SUB
, TMP1
, 0, TMP1
, 0, SLJIT_IMM
, ucp_Nd
- ucp_Ll
);
4993 OP2(SLJIT_SUB
| SLJIT_SET_LESS_EQUAL
, SLJIT_UNUSED
, 0, TMP1
, 0, SLJIT_IMM
, ucp_No
- ucp_Nd
);
4994 OP_FLAGS(SLJIT_OR
, TMP2
, 0, SLJIT_LESS_EQUAL
);
5000 #ifndef COMPILE_PCRE8
5001 /* TMP2 may be destroyed by peek_char. */
5002 OP1(SLJIT_MOV
, TMP2
, 0, SLJIT_IMM
, 0);
5003 jump
= CMP(SLJIT_GREATER
, TMP1
, 0, SLJIT_IMM
, 255);
5004 #elif defined SUPPORT_UTF
5005 OP1(SLJIT_MOV
, TMP2
, 0, SLJIT_IMM
, 0);
5008 jump
= CMP(SLJIT_GREATER
, TMP1
, 0, SLJIT_IMM
, 255);
5010 OP1(SLJIT_MOV_U8
, TMP2
, 0, SLJIT_MEM1(TMP1
), common
->ctypes
);
5011 OP2(SLJIT_LSHR
, TMP2
, 0, TMP2
, 0, SLJIT_IMM
, 4 /* ctype_word */);
5012 OP2(SLJIT_AND
, TMP2
, 0, TMP2
, 0, SLJIT_IMM
, 1);
5013 #ifndef COMPILE_PCRE8
5015 #elif defined SUPPORT_UTF
5018 #endif /* COMPILE_PCRE8 */
5020 set_jumps(skipread_list
, LABEL());
5022 OP2(SLJIT_XOR
| SLJIT_SET_Z
, SLJIT_UNUSED
, 0, TMP2
, 0, SLJIT_MEM1(SLJIT_SP
), LOCALS1
);
5023 sljit_emit_fast_return(compiler
, SLJIT_MEM1(SLJIT_SP
), LOCALS0
);
5026 static BOOL
check_class_ranges(compiler_common
*common
, const sljit_u8
*bits
, BOOL nclass
, BOOL invert
, jump_list
**backtracks
)
5028 /* May destroy TMP1. */
5030 int ranges
[MAX_RANGE_SIZE
];
5031 sljit_u8 bit
, cbit
, all
;
5032 int i
, byte
, length
= 0;
5034 bit
= bits
[0] & 0x1;
5035 /* All bits will be zero or one (since bit is zero or one). */
5038 for (i
= 0; i
< 256; )
5041 if ((i
& 0x7) == 0 && bits
[byte
] == all
)
5045 cbit
= (bits
[byte
] >> (i
& 0x7)) & 0x1;
5048 if (length
>= MAX_RANGE_SIZE
)
5059 if (((bit
== 0) && nclass
) || ((bit
== 1) && !nclass
))
5061 if (length
>= MAX_RANGE_SIZE
)
5063 ranges
[length
] = 256;
5067 if (length
< 0 || length
> 4)
5070 bit
= bits
[0] & 0x1;
5071 if (invert
) bit
^= 0x1;
5073 /* No character is accepted. */
5074 if (length
== 0 && bit
== 0)
5075 add_jump(compiler
, backtracks
, JUMP(SLJIT_JUMP
));
5080 /* When bit != 0, all characters are accepted. */
5084 add_jump(compiler
, backtracks
, CMP(bit
== 0 ? SLJIT_LESS
: SLJIT_GREATER_EQUAL
, TMP1
, 0, SLJIT_IMM
, ranges
[0]));
5088 if (ranges
[0] + 1 != ranges
[1])
5090 OP2(SLJIT_SUB
, TMP1
, 0, TMP1
, 0, SLJIT_IMM
, ranges
[0]);
5091 add_jump(compiler
, backtracks
, CMP(bit
!= 0 ? SLJIT_LESS
: SLJIT_GREATER_EQUAL
, TMP1
, 0, SLJIT_IMM
, ranges
[1] - ranges
[0]));
5094 add_jump(compiler
, backtracks
, CMP(bit
!= 0 ? SLJIT_EQUAL
: SLJIT_NOT_EQUAL
, TMP1
, 0, SLJIT_IMM
, ranges
[0]));
5100 add_jump(compiler
, backtracks
, CMP(SLJIT_GREATER_EQUAL
, TMP1
, 0, SLJIT_IMM
, ranges
[2]));
5101 if (ranges
[0] + 1 != ranges
[1])
5103 OP2(SLJIT_SUB
, TMP1
, 0, TMP1
, 0, SLJIT_IMM
, ranges
[0]);
5104 add_jump(compiler
, backtracks
, CMP(SLJIT_LESS
, TMP1
, 0, SLJIT_IMM
, ranges
[1] - ranges
[0]));
5107 add_jump(compiler
, backtracks
, CMP(SLJIT_EQUAL
, TMP1
, 0, SLJIT_IMM
, ranges
[0]));
5111 add_jump(compiler
, backtracks
, CMP(SLJIT_LESS
, TMP1
, 0, SLJIT_IMM
, ranges
[0]));
5112 if (ranges
[1] + 1 != ranges
[2])
5114 OP2(SLJIT_SUB
, TMP1
, 0, TMP1
, 0, SLJIT_IMM
, ranges
[1]);
5115 add_jump(compiler
, backtracks
, CMP(SLJIT_LESS
, TMP1
, 0, SLJIT_IMM
, ranges
[2] - ranges
[1]));
5118 add_jump(compiler
, backtracks
, CMP(SLJIT_EQUAL
, TMP1
, 0, SLJIT_IMM
, ranges
[1]));
5122 if ((ranges
[1] - ranges
[0]) == (ranges
[3] - ranges
[2])
5123 && (ranges
[0] | (ranges
[2] - ranges
[0])) == ranges
[2]
5124 && (ranges
[1] & (ranges
[2] - ranges
[0])) == 0
5125 && is_powerof2(ranges
[2] - ranges
[0]))
5127 SLJIT_ASSERT((ranges
[0] & (ranges
[2] - ranges
[0])) == 0 && (ranges
[2] & ranges
[3] & (ranges
[2] - ranges
[0])) != 0);
5128 OP2(SLJIT_OR
, TMP1
, 0, TMP1
, 0, SLJIT_IMM
, ranges
[2] - ranges
[0]);
5129 if (ranges
[2] + 1 != ranges
[3])
5131 OP2(SLJIT_SUB
, TMP1
, 0, TMP1
, 0, SLJIT_IMM
, ranges
[2]);
5132 add_jump(compiler
, backtracks
, CMP(bit
!= 0 ? SLJIT_LESS
: SLJIT_GREATER_EQUAL
, TMP1
, 0, SLJIT_IMM
, ranges
[3] - ranges
[2]));
5135 add_jump(compiler
, backtracks
, CMP(bit
!= 0 ? SLJIT_EQUAL
: SLJIT_NOT_EQUAL
, TMP1
, 0, SLJIT_IMM
, ranges
[2]));
5142 if (ranges
[0] + 1 != ranges
[1])
5144 OP2(SLJIT_SUB
, TMP1
, 0, TMP1
, 0, SLJIT_IMM
, ranges
[0]);
5145 add_jump(compiler
, backtracks
, CMP(SLJIT_LESS
, TMP1
, 0, SLJIT_IMM
, ranges
[1] - ranges
[0]));
5149 add_jump(compiler
, backtracks
, CMP(SLJIT_EQUAL
, TMP1
, 0, SLJIT_IMM
, ranges
[0]));
5151 if (ranges
[2] + 1 != ranges
[3])
5153 OP2(SLJIT_SUB
, TMP1
, 0, TMP1
, 0, SLJIT_IMM
, ranges
[2] - i
);
5154 add_jump(compiler
, backtracks
, CMP(SLJIT_LESS
, TMP1
, 0, SLJIT_IMM
, ranges
[3] - ranges
[2]));
5157 add_jump(compiler
, backtracks
, CMP(SLJIT_EQUAL
, TMP1
, 0, SLJIT_IMM
, ranges
[2] - i
));
5161 OP2(SLJIT_SUB
, TMP1
, 0, TMP1
, 0, SLJIT_IMM
, ranges
[0]);
5162 add_jump(compiler
, backtracks
, CMP(SLJIT_GREATER_EQUAL
, TMP1
, 0, SLJIT_IMM
, ranges
[3] - ranges
[0]));
5163 if (ranges
[1] + 1 != ranges
[2])
5165 OP2(SLJIT_SUB
, TMP1
, 0, TMP1
, 0, SLJIT_IMM
, ranges
[1] - ranges
[0]);
5166 add_jump(compiler
, backtracks
, CMP(SLJIT_LESS
, TMP1
, 0, SLJIT_IMM
, ranges
[2] - ranges
[1]));
5169 add_jump(compiler
, backtracks
, CMP(SLJIT_EQUAL
, TMP1
, 0, SLJIT_IMM
, ranges
[1] - ranges
[0]));
5173 SLJIT_UNREACHABLE();
5178 static void check_anynewline(compiler_common
*common
)
5180 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
5183 sljit_emit_fast_enter(compiler
, RETURN_ADDR
, 0);
5185 OP2(SLJIT_SUB
, TMP1
, 0, TMP1
, 0, SLJIT_IMM
, 0x0a);
5186 OP2(SLJIT_SUB
| SLJIT_SET_LESS_EQUAL
, SLJIT_UNUSED
, 0, TMP1
, 0, SLJIT_IMM
, 0x0d - 0x0a);
5187 OP_FLAGS(SLJIT_MOV
, TMP2
, 0, SLJIT_LESS_EQUAL
);
5188 OP2(SLJIT_SUB
| SLJIT_SET_Z
, SLJIT_UNUSED
, 0, TMP1
, 0, SLJIT_IMM
, 0x85 - 0x0a);
5189 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5190 #ifdef COMPILE_PCRE8
5194 OP_FLAGS(SLJIT_OR
, TMP2
, 0, SLJIT_EQUAL
);
5195 OP2(SLJIT_OR
, TMP1
, 0, TMP1
, 0, SLJIT_IMM
, 0x1);
5196 OP2(SLJIT_SUB
| SLJIT_SET_Z
, SLJIT_UNUSED
, 0, TMP1
, 0, SLJIT_IMM
, 0x2029 - 0x0a);
5197 #ifdef COMPILE_PCRE8
5200 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
5201 OP_FLAGS(SLJIT_OR
| SLJIT_SET_Z
, TMP2
, 0, SLJIT_EQUAL
);
5202 sljit_emit_fast_return(compiler
, RETURN_ADDR
, 0);
5205 static void check_hspace(compiler_common
*common
)
5207 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
5210 sljit_emit_fast_enter(compiler
, RETURN_ADDR
, 0);
5212 OP2(SLJIT_SUB
| SLJIT_SET_Z
, SLJIT_UNUSED
, 0, TMP1
, 0, SLJIT_IMM
, 0x09);
5213 OP_FLAGS(SLJIT_MOV
, TMP2
, 0, SLJIT_EQUAL
);
5214 OP2(SLJIT_SUB
| SLJIT_SET_Z
, SLJIT_UNUSED
, 0, TMP1
, 0, SLJIT_IMM
, 0x20);
5215 OP_FLAGS(SLJIT_OR
, TMP2
, 0, SLJIT_EQUAL
);
5216 OP2(SLJIT_SUB
| SLJIT_SET_Z
, SLJIT_UNUSED
, 0, TMP1
, 0, SLJIT_IMM
, 0xa0);
5217 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5218 #ifdef COMPILE_PCRE8
5222 OP_FLAGS(SLJIT_OR
, TMP2
, 0, SLJIT_EQUAL
);
5223 OP2(SLJIT_SUB
| SLJIT_SET_Z
, SLJIT_UNUSED
, 0, TMP1
, 0, SLJIT_IMM
, 0x1680);
5224 OP_FLAGS(SLJIT_OR
, TMP2
, 0, SLJIT_EQUAL
);
5225 OP2(SLJIT_SUB
| SLJIT_SET_Z
, SLJIT_UNUSED
, 0, TMP1
, 0, SLJIT_IMM
, 0x180e);
5226 OP_FLAGS(SLJIT_OR
, TMP2
, 0, SLJIT_EQUAL
);
5227 OP2(SLJIT_SUB
, TMP1
, 0, TMP1
, 0, SLJIT_IMM
, 0x2000);
5228 OP2(SLJIT_SUB
| SLJIT_SET_LESS_EQUAL
, SLJIT_UNUSED
, 0, TMP1
, 0, SLJIT_IMM
, 0x200A - 0x2000);
5229 OP_FLAGS(SLJIT_OR
, TMP2
, 0, SLJIT_LESS_EQUAL
);
5230 OP2(SLJIT_SUB
| SLJIT_SET_Z
, SLJIT_UNUSED
, 0, TMP1
, 0, SLJIT_IMM
, 0x202f - 0x2000);
5231 OP_FLAGS(SLJIT_OR
, TMP2
, 0, SLJIT_EQUAL
);
5232 OP2(SLJIT_SUB
| SLJIT_SET_Z
, SLJIT_UNUSED
, 0, TMP1
, 0, SLJIT_IMM
, 0x205f - 0x2000);
5233 OP_FLAGS(SLJIT_OR
, TMP2
, 0, SLJIT_EQUAL
);
5234 OP2(SLJIT_SUB
| SLJIT_SET_Z
, SLJIT_UNUSED
, 0, TMP1
, 0, SLJIT_IMM
, 0x3000 - 0x2000);
5235 #ifdef COMPILE_PCRE8
5238 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
5239 OP_FLAGS(SLJIT_OR
| SLJIT_SET_Z
, TMP2
, 0, SLJIT_EQUAL
);
5241 sljit_emit_fast_return(compiler
, RETURN_ADDR
, 0);
5244 static void check_vspace(compiler_common
*common
)
5246 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
5249 sljit_emit_fast_enter(compiler
, RETURN_ADDR
, 0);
5251 OP2(SLJIT_SUB
, TMP1
, 0, TMP1
, 0, SLJIT_IMM
, 0x0a);
5252 OP2(SLJIT_SUB
| SLJIT_SET_LESS_EQUAL
, SLJIT_UNUSED
, 0, TMP1
, 0, SLJIT_IMM
, 0x0d - 0x0a);
5253 OP_FLAGS(SLJIT_MOV
, TMP2
, 0, SLJIT_LESS_EQUAL
);
5254 OP2(SLJIT_SUB
| SLJIT_SET_Z
, SLJIT_UNUSED
, 0, TMP1
, 0, SLJIT_IMM
, 0x85 - 0x0a);
5255 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5256 #ifdef COMPILE_PCRE8
5260 OP_FLAGS(SLJIT_OR
, TMP2
, 0, SLJIT_EQUAL
);
5261 OP2(SLJIT_OR
, TMP1
, 0, TMP1
, 0, SLJIT_IMM
, 0x1);
5262 OP2(SLJIT_SUB
| SLJIT_SET_Z
, SLJIT_UNUSED
, 0, TMP1
, 0, SLJIT_IMM
, 0x2029 - 0x0a);
5263 #ifdef COMPILE_PCRE8
5266 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
5267 OP_FLAGS(SLJIT_OR
| SLJIT_SET_Z
, TMP2
, 0, SLJIT_EQUAL
);
5269 sljit_emit_fast_return(compiler
, RETURN_ADDR
, 0);
5272 static void do_casefulcmp(compiler_common
*common
)
5275 struct sljit_jump
*jump
;
5276 struct sljit_label
*label
;
5280 if (sljit_get_register_index(TMP3
) < 0)
5282 char1_reg
= STR_END
;
5283 char2_reg
= STACK_TOP
;
5288 char2_reg
= RETURN_ADDR
;
5291 sljit_emit_fast_enter(compiler
, SLJIT_MEM1(SLJIT_SP
), LOCALS0
);
5292 OP2(SLJIT_SUB
, STR_PTR
, 0, STR_PTR
, 0, TMP2
, 0);
5294 if (char1_reg
== STR_END
)
5296 OP1(SLJIT_MOV
, TMP3
, 0, char1_reg
, 0);
5297 OP1(SLJIT_MOV
, RETURN_ADDR
, 0, char2_reg
, 0);
5300 if (sljit_emit_mem(compiler
, MOV_UCHAR
| SLJIT_MEM_SUPP
| SLJIT_MEM_POST
, char1_reg
, SLJIT_MEM1(TMP1
), IN_UCHARS(1)) == SLJIT_SUCCESS
)
5303 sljit_emit_mem(compiler
, MOV_UCHAR
| SLJIT_MEM_POST
, char1_reg
, SLJIT_MEM1(TMP1
), IN_UCHARS(1));
5304 sljit_emit_mem(compiler
, MOV_UCHAR
| SLJIT_MEM_POST
, char2_reg
, SLJIT_MEM1(STR_PTR
), IN_UCHARS(1));
5305 jump
= CMP(SLJIT_NOT_EQUAL
, char1_reg
, 0, char2_reg
, 0);
5306 OP2(SLJIT_SUB
| SLJIT_SET_Z
, TMP2
, 0, TMP2
, 0, SLJIT_IMM
, IN_UCHARS(1));
5307 JUMPTO(SLJIT_NOT_ZERO
, label
);
5310 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(SLJIT_SP
), LOCALS0
);
5312 else if (sljit_emit_mem(compiler
, MOV_UCHAR
| SLJIT_MEM_SUPP
| SLJIT_MEM_PRE
, char1_reg
, SLJIT_MEM1(TMP1
), IN_UCHARS(1)) == SLJIT_SUCCESS
)
5314 OP2(SLJIT_SUB
, TMP1
, 0, TMP1
, 0, SLJIT_IMM
, IN_UCHARS(1));
5315 OP2(SLJIT_SUB
, STR_PTR
, 0, STR_PTR
, 0, SLJIT_IMM
, IN_UCHARS(1));
5318 sljit_emit_mem(compiler
, MOV_UCHAR
| SLJIT_MEM_PRE
, char1_reg
, SLJIT_MEM1(TMP1
), IN_UCHARS(1));
5319 sljit_emit_mem(compiler
, MOV_UCHAR
| SLJIT_MEM_PRE
, char2_reg
, SLJIT_MEM1(STR_PTR
), IN_UCHARS(1));
5320 jump
= CMP(SLJIT_NOT_EQUAL
, char1_reg
, 0, char2_reg
, 0);
5321 OP2(SLJIT_SUB
| SLJIT_SET_Z
, TMP2
, 0, TMP2
, 0, SLJIT_IMM
, IN_UCHARS(1));
5322 JUMPTO(SLJIT_NOT_ZERO
, label
);
5325 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(SLJIT_SP
), LOCALS0
);
5326 OP2(SLJIT_ADD
, STR_PTR
, 0, STR_PTR
, 0, SLJIT_IMM
, IN_UCHARS(1));
5331 OP1(MOV_UCHAR
, char1_reg
, 0, SLJIT_MEM1(TMP1
), 0);
5332 OP1(MOV_UCHAR
, char2_reg
, 0, SLJIT_MEM1(STR_PTR
), 0);
5333 OP2(SLJIT_ADD
, TMP1
, 0, TMP1
, 0, SLJIT_IMM
, IN_UCHARS(1));
5334 OP2(SLJIT_ADD
, STR_PTR
, 0, STR_PTR
, 0, SLJIT_IMM
, IN_UCHARS(1));
5335 jump
= CMP(SLJIT_NOT_EQUAL
, char1_reg
, 0, char2_reg
, 0);
5336 OP2(SLJIT_SUB
| SLJIT_SET_Z
, TMP2
, 0, TMP2
, 0, SLJIT_IMM
, IN_UCHARS(1));
5337 JUMPTO(SLJIT_NOT_ZERO
, label
);
5340 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(SLJIT_SP
), LOCALS0
);
5343 if (char1_reg
== STR_END
)
5345 OP1(SLJIT_MOV
, char1_reg
, 0, TMP3
, 0);
5346 OP1(SLJIT_MOV
, char2_reg
, 0, RETURN_ADDR
, 0);
5349 sljit_emit_fast_return(compiler
, TMP1
, 0);
5352 static void do_caselesscmp(compiler_common
*common
)
5355 struct sljit_jump
*jump
;
5356 struct sljit_label
*label
;
5357 int char1_reg
= STR_END
;
5362 if (sljit_get_register_index(TMP3
) < 0)
5364 char2_reg
= STACK_TOP
;
5365 lcc_table
= STACK_LIMIT
;
5369 char2_reg
= RETURN_ADDR
;
5373 if (sljit_emit_mem(compiler
, MOV_UCHAR
| SLJIT_MEM_SUPP
| SLJIT_MEM_POST
, char1_reg
, SLJIT_MEM1(TMP1
), IN_UCHARS(1)) == SLJIT_SUCCESS
)
5375 else if (sljit_emit_mem(compiler
, MOV_UCHAR
| SLJIT_MEM_SUPP
| SLJIT_MEM_PRE
, char1_reg
, SLJIT_MEM1(TMP1
), IN_UCHARS(1)) == SLJIT_SUCCESS
)
5378 sljit_emit_fast_enter(compiler
, SLJIT_MEM1(SLJIT_SP
), LOCALS0
);
5379 OP2(SLJIT_SUB
, STR_PTR
, 0, STR_PTR
, 0, TMP2
, 0);
5381 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), LOCALS1
, char1_reg
, 0);
5383 if (char2_reg
== STACK_TOP
)
5385 OP1(SLJIT_MOV
, TMP3
, 0, char2_reg
, 0);
5386 OP1(SLJIT_MOV
, RETURN_ADDR
, 0, lcc_table
, 0);
5389 OP1(SLJIT_MOV
, lcc_table
, 0, SLJIT_IMM
, common
->lcc
);
5394 sljit_emit_mem(compiler
, MOV_UCHAR
| SLJIT_MEM_POST
, char1_reg
, SLJIT_MEM1(TMP1
), IN_UCHARS(1));
5395 sljit_emit_mem(compiler
, MOV_UCHAR
| SLJIT_MEM_POST
, char2_reg
, SLJIT_MEM1(STR_PTR
), IN_UCHARS(1));
5397 else if (opt_type
== 2)
5399 OP2(SLJIT_SUB
, TMP1
, 0, TMP1
, 0, SLJIT_IMM
, IN_UCHARS(1));
5400 OP2(SLJIT_SUB
, STR_PTR
, 0, STR_PTR
, 0, SLJIT_IMM
, IN_UCHARS(1));
5403 sljit_emit_mem(compiler
, MOV_UCHAR
| SLJIT_MEM_PRE
, char1_reg
, SLJIT_MEM1(TMP1
), IN_UCHARS(1));
5404 sljit_emit_mem(compiler
, MOV_UCHAR
| SLJIT_MEM_PRE
, char2_reg
, SLJIT_MEM1(STR_PTR
), IN_UCHARS(1));
5409 OP1(MOV_UCHAR
, char1_reg
, 0, SLJIT_MEM1(TMP1
), 0);
5410 OP1(MOV_UCHAR
, char2_reg
, 0, SLJIT_MEM1(STR_PTR
), 0);
5411 OP2(SLJIT_ADD
, TMP1
, 0, TMP1
, 0, SLJIT_IMM
, IN_UCHARS(1));
5414 #ifndef COMPILE_PCRE8
5415 jump
= CMP(SLJIT_GREATER
, char1_reg
, 0, SLJIT_IMM
, 255);
5417 OP1(SLJIT_MOV_U8
, char1_reg
, 0, SLJIT_MEM2(lcc_table
, char1_reg
), 0);
5418 #ifndef COMPILE_PCRE8
5420 jump
= CMP(SLJIT_GREATER
, char2_reg
, 0, SLJIT_IMM
, 255);
5422 OP1(SLJIT_MOV_U8
, char2_reg
, 0, SLJIT_MEM2(lcc_table
, char2_reg
), 0);
5423 #ifndef COMPILE_PCRE8
5428 OP2(SLJIT_ADD
, STR_PTR
, 0, STR_PTR
, 0, SLJIT_IMM
, IN_UCHARS(1));
5430 jump
= CMP(SLJIT_NOT_EQUAL
, char1_reg
, 0, char2_reg
, 0);
5431 OP2(SLJIT_SUB
| SLJIT_SET_Z
, TMP2
, 0, TMP2
, 0, SLJIT_IMM
, IN_UCHARS(1));
5432 JUMPTO(SLJIT_NOT_ZERO
, label
);
5435 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(SLJIT_SP
), LOCALS0
);
5438 OP2(SLJIT_ADD
, STR_PTR
, 0, STR_PTR
, 0, SLJIT_IMM
, IN_UCHARS(1));
5440 if (char2_reg
== STACK_TOP
)
5442 OP1(SLJIT_MOV
, char2_reg
, 0, TMP3
, 0);
5443 OP1(SLJIT_MOV
, lcc_table
, 0, RETURN_ADDR
, 0);
5446 OP1(SLJIT_MOV
, char1_reg
, 0, SLJIT_MEM1(SLJIT_SP
), LOCALS1
);
5447 sljit_emit_fast_return(compiler
, TMP1
, 0);
5450 #if defined SUPPORT_UTF && defined SUPPORT_UCP
5452 static const pcre_uchar
* SLJIT_FUNC
do_utf_caselesscmp(pcre_uchar
*src1
, pcre_uchar
*src2
, pcre_uchar
*end1
, pcre_uchar
*end2
)
5454 /* This function would be ineffective to do in JIT level. */
5456 const ucd_record
*ur
;
5457 const sljit_u32
*pp
;
5462 return (pcre_uchar
*)1;
5463 GETCHARINC(c1
, src1
);
5464 GETCHARINC(c2
, src2
);
5466 if (c1
!= c2
&& c1
!= c2
+ ur
->other_case
)
5468 pp
= PRIV(ucd_caseless_sets
) + ur
->caseset
;
5471 if (c1
< *pp
) return NULL
;
5472 if (c1
== *pp
++) break;
5479 #endif /* SUPPORT_UTF && SUPPORT_UCP */
5481 static pcre_uchar
*byte_sequence_compare(compiler_common
*common
, BOOL caseless
, pcre_uchar
*cc
,
5482 compare_context
*context
, jump_list
**backtracks
)
5485 unsigned int othercasebit
= 0;
5486 pcre_uchar
*othercasechar
= NULL
;
5491 if (caseless
&& char_has_othercase(common
, cc
))
5493 othercasebit
= char_get_othercase_bit(common
, cc
);
5494 SLJIT_ASSERT(othercasebit
);
5495 /* Extracting bit difference info. */
5496 #if defined COMPILE_PCRE8
5497 othercasechar
= cc
+ (othercasebit
>> 8);
5498 othercasebit
&= 0xff;
5499 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5500 /* Note that this code only handles characters in the BMP. If there
5501 ever are characters outside the BMP whose othercase differs in only one
5502 bit from itself (there currently are none), this code will need to be
5503 revised for COMPILE_PCRE32. */
5504 othercasechar
= cc
+ (othercasebit
>> 9);
5505 if ((othercasebit
& 0x100) != 0)
5506 othercasebit
= (othercasebit
& 0xff) << 8;
5508 othercasebit
&= 0xff;
5509 #endif /* COMPILE_PCRE[8|16|32] */
5512 if (context
->sourcereg
== -1)
5514 #if defined COMPILE_PCRE8
5515 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5516 if (context
->length
>= 4)
5517 OP1(SLJIT_MOV_S32
, TMP1
, 0, SLJIT_MEM1(STR_PTR
), -context
->length
);
5518 else if (context
->length
>= 2)
5519 OP1(SLJIT_MOV_U16
, TMP1
, 0, SLJIT_MEM1(STR_PTR
), -context
->length
);
5522 OP1(SLJIT_MOV_U8
, TMP1
, 0, SLJIT_MEM1(STR_PTR
), -context
->length
);
5523 #elif defined COMPILE_PCRE16
5524 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5525 if (context
->length
>= 4)
5526 OP1(SLJIT_MOV_S32
, TMP1
, 0, SLJIT_MEM1(STR_PTR
), -context
->length
);
5529 OP1(MOV_UCHAR
, TMP1
, 0, SLJIT_MEM1(STR_PTR
), -context
->length
);
5530 #elif defined COMPILE_PCRE32
5531 OP1(MOV_UCHAR
, TMP1
, 0, SLJIT_MEM1(STR_PTR
), -context
->length
);
5532 #endif /* COMPILE_PCRE[8|16|32] */
5533 context
->sourcereg
= TMP2
;
5538 if (common
->utf
&& HAS_EXTRALEN(*cc
))
5539 utflength
+= GET_EXTRALEN(*cc
);
5545 context
->length
-= IN_UCHARS(1);
5546 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
5548 /* Unaligned read is supported. */
5549 if (othercasebit
!= 0 && othercasechar
== cc
)
5551 context
->c
.asuchars
[context
->ucharptr
] = *cc
| othercasebit
;
5552 context
->oc
.asuchars
[context
->ucharptr
] = othercasebit
;
5556 context
->c
.asuchars
[context
->ucharptr
] = *cc
;
5557 context
->oc
.asuchars
[context
->ucharptr
] = 0;
5559 context
->ucharptr
++;
5561 #if defined COMPILE_PCRE8
5562 if (context
->ucharptr
>= 4 || context
->length
== 0 || (context
->ucharptr
== 2 && context
->length
== 1))
5564 if (context
->ucharptr
>= 2 || context
->length
== 0)
5567 if (context
->length
>= 4)
5568 OP1(SLJIT_MOV_S32
, context
->sourcereg
, 0, SLJIT_MEM1(STR_PTR
), -context
->length
);
5569 else if (context
->length
>= 2)
5570 OP1(SLJIT_MOV_U16
, context
->sourcereg
, 0, SLJIT_MEM1(STR_PTR
), -context
->length
);
5571 #if defined COMPILE_PCRE8
5572 else if (context
->length
>= 1)
5573 OP1(SLJIT_MOV_U8
, context
->sourcereg
, 0, SLJIT_MEM1(STR_PTR
), -context
->length
);
5574 #endif /* COMPILE_PCRE8 */
5575 context
->sourcereg
= context
->sourcereg
== TMP1
? TMP2
: TMP1
;
5577 switch(context
->ucharptr
)
5579 case 4 / sizeof(pcre_uchar
):
5580 if (context
->oc
.asint
!= 0)
5581 OP2(SLJIT_OR
, context
->sourcereg
, 0, context
->sourcereg
, 0, SLJIT_IMM
, context
->oc
.asint
);
5582 add_jump(compiler
, backtracks
, CMP(SLJIT_NOT_EQUAL
, context
->sourcereg
, 0, SLJIT_IMM
, context
->c
.asint
| context
->oc
.asint
));
5585 case 2 / sizeof(pcre_uchar
):
5586 if (context
->oc
.asushort
!= 0)
5587 OP2(SLJIT_OR
, context
->sourcereg
, 0, context
->sourcereg
, 0, SLJIT_IMM
, context
->oc
.asushort
);
5588 add_jump(compiler
, backtracks
, CMP(SLJIT_NOT_EQUAL
, context
->sourcereg
, 0, SLJIT_IMM
, context
->c
.asushort
| context
->oc
.asushort
));
5591 #ifdef COMPILE_PCRE8
5593 if (context
->oc
.asbyte
!= 0)
5594 OP2(SLJIT_OR
, context
->sourcereg
, 0, context
->sourcereg
, 0, SLJIT_IMM
, context
->oc
.asbyte
);
5595 add_jump(compiler
, backtracks
, CMP(SLJIT_NOT_EQUAL
, context
->sourcereg
, 0, SLJIT_IMM
, context
->c
.asbyte
| context
->oc
.asbyte
));
5600 SLJIT_UNREACHABLE();
5603 context
->ucharptr
= 0;
5608 /* Unaligned read is unsupported or in 32 bit mode. */
5609 if (context
->length
>= 1)
5610 OP1(MOV_UCHAR
, context
->sourcereg
, 0, SLJIT_MEM1(STR_PTR
), -context
->length
);
5612 context
->sourcereg
= context
->sourcereg
== TMP1
? TMP2
: TMP1
;
5614 if (othercasebit
!= 0 && othercasechar
== cc
)
5616 OP2(SLJIT_OR
, context
->sourcereg
, 0, context
->sourcereg
, 0, SLJIT_IMM
, othercasebit
);
5617 add_jump(compiler
, backtracks
, CMP(SLJIT_NOT_EQUAL
, context
->sourcereg
, 0, SLJIT_IMM
, *cc
| othercasebit
));
5620 add_jump(compiler
, backtracks
, CMP(SLJIT_NOT_EQUAL
, context
->sourcereg
, 0, SLJIT_IMM
, *cc
));
5628 while (utflength
> 0);
5634 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
5636 #define SET_TYPE_OFFSET(value) \
5637 if ((value) != typeoffset) \
5639 if ((value) < typeoffset) \
5640 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
5642 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
5644 typeoffset = (value);
5646 #define SET_CHAR_OFFSET(value) \
5647 if ((value) != charoffset) \
5649 if ((value) < charoffset) \
5650 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
5652 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
5654 charoffset = (value);
5656 static pcre_uchar
*compile_char1_matchingpath(compiler_common
*common
, pcre_uchar type
, pcre_uchar
*cc
, jump_list
**backtracks
, BOOL check_str_ptr
);
5658 static void compile_xclass_matchingpath(compiler_common
*common
, pcre_uchar
*cc
, jump_list
**backtracks
)
5661 jump_list
*found
= NULL
;
5662 jump_list
**list
= (cc
[0] & XCL_NOT
) == 0 ? &found
: backtracks
;
5663 sljit_uw c
, charoffset
, max
= 256, min
= READ_CHAR_MAX
;
5664 struct sljit_jump
*jump
= NULL
;
5665 pcre_uchar
*ccbegin
;
5666 int compares
, invertcmp
, numberofcmps
;
5667 #if defined SUPPORT_UTF && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
5668 BOOL utf
= common
->utf
;
5672 BOOL needstype
= FALSE
, needsscript
= FALSE
, needschar
= FALSE
;
5673 BOOL charsaved
= FALSE
;
5675 const sljit_u32
*other_cases
;
5676 sljit_uw typeoffset
;
5679 /* Scanning the necessary info. */
5683 if (cc
[-1] & XCL_MAP
)
5686 cc
+= 32 / sizeof(pcre_uchar
);
5689 while (*cc
!= XCL_END
)
5692 if (*cc
== XCL_SINGLE
)
5695 GETCHARINCTEST(c
, cc
);
5696 if (c
> max
) max
= c
;
5697 if (c
< min
) min
= c
;
5702 else if (*cc
== XCL_RANGE
)
5705 GETCHARINCTEST(c
, cc
);
5706 if (c
< min
) min
= c
;
5707 GETCHARINCTEST(c
, cc
);
5708 if (c
> max
) max
= c
;
5716 SLJIT_ASSERT(*cc
== XCL_PROP
|| *cc
== XCL_NOTPROP
);
5718 if (*cc
== PT_CLIST
)
5720 other_cases
= PRIV(ucd_caseless_sets
) + cc
[1];
5721 while (*other_cases
!= NOTACHAR
)
5723 if (*other_cases
> max
) max
= *other_cases
;
5724 if (*other_cases
< min
) min
= *other_cases
;
5730 max
= READ_CHAR_MAX
;
5737 /* Any either accepts everything or ignored. */
5738 if (cc
[-1] == XCL_PROP
)
5740 compile_char1_matchingpath(common
, OP_ALLANY
, cc
, backtracks
, FALSE
);
5741 if (list
== backtracks
)
5742 add_jump(compiler
, backtracks
, JUMP(SLJIT_JUMP
));
5774 SLJIT_UNREACHABLE();
5781 SLJIT_ASSERT(compares
> 0);
5783 /* We are not necessary in utf mode even in 8 bit mode. */
5785 read_char_range(common
, min
, max
, (cc
[-1] & XCL_NOT
) != 0);
5787 if ((cc
[-1] & XCL_HASPROP
) == 0)
5789 if ((cc
[-1] & XCL_MAP
) != 0)
5791 jump
= CMP(SLJIT_GREATER
, TMP1
, 0, SLJIT_IMM
, 255);
5792 if (!check_class_ranges(common
, (const sljit_u8
*)cc
, (((const sljit_u8
*)cc
)[31] & 0x80) != 0, TRUE
, &found
))
5794 OP2(SLJIT_AND
, TMP2
, 0, TMP1
, 0, SLJIT_IMM
, 0x7);
5795 OP2(SLJIT_LSHR
, TMP1
, 0, TMP1
, 0, SLJIT_IMM
, 3);
5796 OP1(SLJIT_MOV_U8
, TMP1
, 0, SLJIT_MEM1(TMP1
), (sljit_sw
)cc
);
5797 OP2(SLJIT_SHL
, TMP2
, 0, SLJIT_IMM
, 1, TMP2
, 0);
5798 OP2(SLJIT_AND
| SLJIT_SET_Z
, SLJIT_UNUSED
, 0, TMP1
, 0, TMP2
, 0);
5799 add_jump(compiler
, &found
, JUMP(SLJIT_NOT_ZERO
));
5802 add_jump(compiler
, backtracks
, JUMP(SLJIT_JUMP
));
5805 cc
+= 32 / sizeof(pcre_uchar
);
5809 OP2(SLJIT_SUB
, TMP2
, 0, TMP1
, 0, SLJIT_IMM
, min
);
5810 add_jump(compiler
, (cc
[-1] & XCL_NOT
) == 0 ? backtracks
: &found
, CMP(SLJIT_GREATER
, TMP2
, 0, SLJIT_IMM
, max
- min
));
5813 else if ((cc
[-1] & XCL_MAP
) != 0)
5815 OP1(SLJIT_MOV
, RETURN_ADDR
, 0, TMP1
, 0);
5819 if (!check_class_ranges(common
, (const sljit_u8
*)cc
, FALSE
, TRUE
, list
))
5821 #ifdef COMPILE_PCRE8
5825 jump
= CMP(SLJIT_GREATER
, TMP1
, 0, SLJIT_IMM
, 255);
5827 OP2(SLJIT_AND
, TMP2
, 0, TMP1
, 0, SLJIT_IMM
, 0x7);
5828 OP2(SLJIT_LSHR
, TMP1
, 0, TMP1
, 0, SLJIT_IMM
, 3);
5829 OP1(SLJIT_MOV_U8
, TMP1
, 0, SLJIT_MEM1(TMP1
), (sljit_sw
)cc
);
5830 OP2(SLJIT_SHL
, TMP2
, 0, SLJIT_IMM
, 1, TMP2
, 0);
5831 OP2(SLJIT_AND
| SLJIT_SET_Z
, SLJIT_UNUSED
, 0, TMP1
, 0, TMP2
, 0);
5832 add_jump(compiler
, list
, JUMP(SLJIT_NOT_ZERO
));
5834 #ifdef COMPILE_PCRE8
5840 OP1(SLJIT_MOV
, TMP1
, 0, RETURN_ADDR
, 0);
5841 cc
+= 32 / sizeof(pcre_uchar
);
5845 if (needstype
|| needsscript
)
5847 if (needschar
&& !charsaved
)
5848 OP1(SLJIT_MOV
, RETURN_ADDR
, 0, TMP1
, 0);
5850 #ifdef COMPILE_PCRE32
5853 jump
= CMP(SLJIT_LESS
, TMP1
, 0, SLJIT_IMM
, 0x10ffff + 1);
5854 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_IMM
, INVALID_UTF_CHAR
);
5859 OP2(SLJIT_LSHR
, TMP2
, 0, TMP1
, 0, SLJIT_IMM
, UCD_BLOCK_SHIFT
);
5860 OP1(SLJIT_MOV_U8
, TMP2
, 0, SLJIT_MEM1(TMP2
), (sljit_sw
)PRIV(ucd_stage1
));
5861 OP2(SLJIT_AND
, TMP1
, 0, TMP1
, 0, SLJIT_IMM
, UCD_BLOCK_MASK
);
5862 OP2(SLJIT_SHL
, TMP2
, 0, TMP2
, 0, SLJIT_IMM
, UCD_BLOCK_SHIFT
);
5863 OP2(SLJIT_ADD
, TMP1
, 0, TMP1
, 0, TMP2
, 0);
5864 OP1(SLJIT_MOV
, TMP2
, 0, SLJIT_IMM
, (sljit_sw
)PRIV(ucd_stage2
));
5865 OP1(SLJIT_MOV_U16
, TMP2
, 0, SLJIT_MEM2(TMP2
, TMP1
), 1);
5867 /* Before anything else, we deal with scripts. */
5870 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_IMM
, (sljit_sw
)PRIV(ucd_records
) + SLJIT_OFFSETOF(ucd_record
, script
));
5871 OP1(SLJIT_MOV_U8
, TMP1
, 0, SLJIT_MEM2(TMP1
, TMP2
), 3);
5875 while (*cc
!= XCL_END
)
5877 if (*cc
== XCL_SINGLE
)
5880 GETCHARINCTEST(c
, cc
);
5882 else if (*cc
== XCL_RANGE
)
5885 GETCHARINCTEST(c
, cc
);
5886 GETCHARINCTEST(c
, cc
);
5890 SLJIT_ASSERT(*cc
== XCL_PROP
|| *cc
== XCL_NOTPROP
);
5895 invertcmp
= (compares
== 0 && list
!= backtracks
);
5896 if (cc
[-1] == XCL_NOTPROP
)
5898 jump
= CMP(SLJIT_EQUAL
^ invertcmp
, TMP1
, 0, SLJIT_IMM
, (int)cc
[1]);
5899 add_jump(compiler
, compares
> 0 ? list
: backtracks
, jump
);
5910 OP1(SLJIT_MOV
, TMP1
, 0, RETURN_ADDR
, 0);
5917 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_IMM
, (sljit_sw
)PRIV(ucd_records
) + SLJIT_OFFSETOF(ucd_record
, chartype
));
5918 OP1(SLJIT_MOV_U8
, TMP1
, 0, SLJIT_MEM2(TMP1
, TMP2
), 3);
5922 OP2(SLJIT_SHL
, TMP2
, 0, TMP2
, 0, SLJIT_IMM
, 3);
5923 OP1(SLJIT_MOV_U8
, RETURN_ADDR
, 0, SLJIT_MEM1(TMP2
), (sljit_sw
)PRIV(ucd_records
) + SLJIT_OFFSETOF(ucd_record
, chartype
));
5924 typereg
= RETURN_ADDR
;
5930 /* Generating code. */
5937 while (*cc
!= XCL_END
)
5940 invertcmp
= (compares
== 0 && list
!= backtracks
);
5943 if (*cc
== XCL_SINGLE
)
5946 GETCHARINCTEST(c
, cc
);
5948 if (numberofcmps
< 3 && (*cc
== XCL_SINGLE
|| *cc
== XCL_RANGE
))
5950 OP2(SLJIT_SUB
| SLJIT_SET_Z
, SLJIT_UNUSED
, 0, TMP1
, 0, SLJIT_IMM
, (sljit_sw
)(c
- charoffset
));
5951 OP_FLAGS(numberofcmps
== 0 ? SLJIT_MOV
: SLJIT_OR
, TMP2
, 0, SLJIT_EQUAL
);
5954 else if (numberofcmps
> 0)
5956 OP2(SLJIT_SUB
| SLJIT_SET_Z
, SLJIT_UNUSED
, 0, TMP1
, 0, SLJIT_IMM
, (sljit_sw
)(c
- charoffset
));
5957 OP_FLAGS(SLJIT_OR
| SLJIT_SET_Z
, TMP2
, 0, SLJIT_EQUAL
);
5958 jump
= JUMP(SLJIT_NOT_ZERO
^ invertcmp
);
5963 jump
= CMP(SLJIT_EQUAL
^ invertcmp
, TMP1
, 0, SLJIT_IMM
, (sljit_sw
)(c
- charoffset
));
5967 else if (*cc
== XCL_RANGE
)
5970 GETCHARINCTEST(c
, cc
);
5972 GETCHARINCTEST(c
, cc
);
5974 if (numberofcmps
< 3 && (*cc
== XCL_SINGLE
|| *cc
== XCL_RANGE
))
5976 OP2(SLJIT_SUB
| SLJIT_SET_LESS_EQUAL
, SLJIT_UNUSED
, 0, TMP1
, 0, SLJIT_IMM
, (sljit_sw
)(c
- charoffset
));
5977 OP_FLAGS(numberofcmps
== 0 ? SLJIT_MOV
: SLJIT_OR
, TMP2
, 0, SLJIT_LESS_EQUAL
);
5980 else if (numberofcmps
> 0)
5982 OP2(SLJIT_SUB
| SLJIT_SET_LESS_EQUAL
, SLJIT_UNUSED
, 0, TMP1
, 0, SLJIT_IMM
, (sljit_sw
)(c
- charoffset
));
5983 OP_FLAGS(SLJIT_OR
| SLJIT_SET_Z
, TMP2
, 0, SLJIT_LESS_EQUAL
);
5984 jump
= JUMP(SLJIT_NOT_ZERO
^ invertcmp
);
5989 jump
= CMP(SLJIT_LESS_EQUAL
^ invertcmp
, TMP1
, 0, SLJIT_IMM
, (sljit_sw
)(c
- charoffset
));
5996 SLJIT_ASSERT(*cc
== XCL_PROP
|| *cc
== XCL_NOTPROP
);
5997 if (*cc
== XCL_NOTPROP
)
6004 jump
= JUMP(SLJIT_JUMP
);
6008 OP2(SLJIT_SUB
| SLJIT_SET_Z
, SLJIT_UNUSED
, 0, typereg
, 0, SLJIT_IMM
, ucp_Lu
- typeoffset
);
6009 OP_FLAGS(SLJIT_MOV
, TMP2
, 0, SLJIT_EQUAL
);
6010 OP2(SLJIT_SUB
| SLJIT_SET_Z
, SLJIT_UNUSED
, 0, typereg
, 0, SLJIT_IMM
, ucp_Ll
- typeoffset
);
6011 OP_FLAGS(SLJIT_OR
, TMP2
, 0, SLJIT_EQUAL
);
6012 OP2(SLJIT_SUB
| SLJIT_SET_Z
, SLJIT_UNUSED
, 0, typereg
, 0, SLJIT_IMM
, ucp_Lt
- typeoffset
);
6013 OP_FLAGS(SLJIT_OR
| SLJIT_SET_Z
, TMP2
, 0, SLJIT_EQUAL
);
6014 jump
= JUMP(SLJIT_NOT_ZERO
^ invertcmp
);
6018 c
= PRIV(ucp_typerange
)[(int)cc
[1] * 2];
6020 jump
= CMP(SLJIT_LESS_EQUAL
^ invertcmp
, typereg
, 0, SLJIT_IMM
, PRIV(ucp_typerange
)[(int)cc
[1] * 2 + 1] - c
);
6024 jump
= CMP(SLJIT_EQUAL
^ invertcmp
, typereg
, 0, SLJIT_IMM
, (int)cc
[1] - typeoffset
);
6035 OP2(SLJIT_SUB
| SLJIT_SET_LESS_EQUAL
, SLJIT_UNUSED
, 0, TMP1
, 0, SLJIT_IMM
, 0xd - 0x9);
6036 OP_FLAGS(SLJIT_MOV
, TMP2
, 0, SLJIT_LESS_EQUAL
);
6038 OP2(SLJIT_SUB
| SLJIT_SET_Z
, SLJIT_UNUSED
, 0, TMP1
, 0, SLJIT_IMM
, 0x85 - 0x9);
6039 OP_FLAGS(SLJIT_OR
, TMP2
, 0, SLJIT_EQUAL
);
6041 OP2(SLJIT_SUB
| SLJIT_SET_Z
, SLJIT_UNUSED
, 0, TMP1
, 0, SLJIT_IMM
, 0x180e - 0x9);
6042 OP_FLAGS(SLJIT_OR
, TMP2
, 0, SLJIT_EQUAL
);
6044 SET_TYPE_OFFSET(ucp_Zl
);
6045 OP2(SLJIT_SUB
| SLJIT_SET_LESS_EQUAL
, SLJIT_UNUSED
, 0, typereg
, 0, SLJIT_IMM
, ucp_Zs
- ucp_Zl
);
6046 OP_FLAGS(SLJIT_OR
| SLJIT_SET_Z
, TMP2
, 0, SLJIT_LESS_EQUAL
);
6047 jump
= JUMP(SLJIT_NOT_ZERO
^ invertcmp
);
6051 OP2(SLJIT_SUB
| SLJIT_SET_Z
, SLJIT_UNUSED
, 0, TMP1
, 0, SLJIT_IMM
, (sljit_sw
)(CHAR_UNDERSCORE
- charoffset
));
6052 OP_FLAGS(SLJIT_MOV
, TMP2
, 0, SLJIT_EQUAL
);
6056 SET_TYPE_OFFSET(ucp_Ll
);
6057 OP2(SLJIT_SUB
| SLJIT_SET_LESS_EQUAL
, SLJIT_UNUSED
, 0, typereg
, 0, SLJIT_IMM
, ucp_Lu
- ucp_Ll
);
6058 OP_FLAGS((*cc
== PT_ALNUM
) ? SLJIT_MOV
: SLJIT_OR
, TMP2
, 0, SLJIT_LESS_EQUAL
);
6059 SET_TYPE_OFFSET(ucp_Nd
);
6060 OP2(SLJIT_SUB
| SLJIT_SET_LESS_EQUAL
, SLJIT_UNUSED
, 0, typereg
, 0, SLJIT_IMM
, ucp_No
- ucp_Nd
);
6061 OP_FLAGS(SLJIT_OR
| SLJIT_SET_Z
, TMP2
, 0, SLJIT_LESS_EQUAL
);
6062 jump
= JUMP(SLJIT_NOT_ZERO
^ invertcmp
);
6066 other_cases
= PRIV(ucd_caseless_sets
) + cc
[1];
6068 /* At least three characters are required.
6069 Otherwise this case would be handled by the normal code path. */
6070 SLJIT_ASSERT(other_cases
[0] != NOTACHAR
&& other_cases
[1] != NOTACHAR
&& other_cases
[2] != NOTACHAR
);
6071 SLJIT_ASSERT(other_cases
[0] < other_cases
[1] && other_cases
[1] < other_cases
[2]);
6073 /* Optimizing character pairs, if their difference is power of 2. */
6074 if (is_powerof2(other_cases
[1] ^ other_cases
[0]))
6076 if (charoffset
== 0)
6077 OP2(SLJIT_OR
, TMP2
, 0, TMP1
, 0, SLJIT_IMM
, other_cases
[1] ^ other_cases
[0]);
6080 OP2(SLJIT_ADD
, TMP2
, 0, TMP1
, 0, SLJIT_IMM
, (sljit_sw
)charoffset
);
6081 OP2(SLJIT_OR
, TMP2
, 0, TMP2
, 0, SLJIT_IMM
, other_cases
[1] ^ other_cases
[0]);
6083 OP2(SLJIT_SUB
| SLJIT_SET_Z
, SLJIT_UNUSED
, 0, TMP2
, 0, SLJIT_IMM
, other_cases
[1]);
6084 OP_FLAGS(SLJIT_MOV
, TMP2
, 0, SLJIT_EQUAL
);
6087 else if (is_powerof2(other_cases
[2] ^ other_cases
[1]))
6089 if (charoffset
== 0)
6090 OP2(SLJIT_OR
, TMP2
, 0, TMP1
, 0, SLJIT_IMM
, other_cases
[2] ^ other_cases
[1]);
6093 OP2(SLJIT_ADD
, TMP2
, 0, TMP1
, 0, SLJIT_IMM
, (sljit_sw
)charoffset
);
6094 OP2(SLJIT_OR
, TMP2
, 0, TMP2
, 0, SLJIT_IMM
, other_cases
[1] ^ other_cases
[0]);
6096 OP2(SLJIT_SUB
| SLJIT_SET_Z
, SLJIT_UNUSED
, 0, TMP2
, 0, SLJIT_IMM
, other_cases
[2]);
6097 OP_FLAGS(SLJIT_MOV
, TMP2
, 0, SLJIT_EQUAL
);
6099 OP2(SLJIT_SUB
| SLJIT_SET_Z
, SLJIT_UNUSED
, 0, TMP1
, 0, SLJIT_IMM
, (sljit_sw
)(other_cases
[0] - charoffset
));
6100 OP_FLAGS(SLJIT_OR
| ((other_cases
[3] == NOTACHAR
) ? SLJIT_SET_Z
: 0), TMP2
, 0, SLJIT_EQUAL
);
6106 OP2(SLJIT_SUB
| SLJIT_SET_Z
, SLJIT_UNUSED
, 0, TMP1
, 0, SLJIT_IMM
, (sljit_sw
)(*other_cases
++ - charoffset
));
6107 OP_FLAGS(SLJIT_MOV
, TMP2
, 0, SLJIT_EQUAL
);
6110 while (*other_cases
!= NOTACHAR
)
6112 OP2(SLJIT_SUB
| SLJIT_SET_Z
, SLJIT_UNUSED
, 0, TMP1
, 0, SLJIT_IMM
, (sljit_sw
)(*other_cases
++ - charoffset
));
6113 OP_FLAGS(SLJIT_OR
| ((*other_cases
== NOTACHAR
) ? SLJIT_SET_Z
: 0), TMP2
, 0, SLJIT_EQUAL
);
6115 jump
= JUMP(SLJIT_NOT_ZERO
^ invertcmp
);
6119 OP2(SLJIT_SUB
| SLJIT_SET_Z
, SLJIT_UNUSED
, 0, TMP1
, 0, SLJIT_IMM
, (sljit_sw
)(CHAR_DOLLAR_SIGN
- charoffset
));
6120 OP_FLAGS(SLJIT_MOV
, TMP2
, 0, SLJIT_EQUAL
);
6121 OP2(SLJIT_SUB
| SLJIT_SET_Z
, SLJIT_UNUSED
, 0, TMP1
, 0, SLJIT_IMM
, (sljit_sw
)(CHAR_COMMERCIAL_AT
- charoffset
));
6122 OP_FLAGS(SLJIT_OR
, TMP2
, 0, SLJIT_EQUAL
);
6123 OP2(SLJIT_SUB
| SLJIT_SET_Z
, SLJIT_UNUSED
, 0, TMP1
, 0, SLJIT_IMM
, (sljit_sw
)(CHAR_GRAVE_ACCENT
- charoffset
));
6124 OP_FLAGS(SLJIT_OR
, TMP2
, 0, SLJIT_EQUAL
);
6126 SET_CHAR_OFFSET(0xa0);
6127 OP2(SLJIT_SUB
| SLJIT_SET_LESS_EQUAL
, SLJIT_UNUSED
, 0, TMP1
, 0, SLJIT_IMM
, (sljit_sw
)(0xd7ff - charoffset
));
6128 OP_FLAGS(SLJIT_OR
, TMP2
, 0, SLJIT_LESS_EQUAL
);
6130 OP2(SLJIT_SUB
| SLJIT_SET_GREATER_EQUAL
, SLJIT_UNUSED
, 0, TMP1
, 0, SLJIT_IMM
, 0xe000 - 0);
6131 OP_FLAGS(SLJIT_OR
| SLJIT_SET_Z
, TMP2
, 0, SLJIT_GREATER_EQUAL
);
6132 jump
= JUMP(SLJIT_NOT_ZERO
^ invertcmp
);
6136 /* C and Z groups are the farthest two groups. */
6137 SET_TYPE_OFFSET(ucp_Ll
);
6138 OP2(SLJIT_SUB
| SLJIT_SET_GREATER
, SLJIT_UNUSED
, 0, typereg
, 0, SLJIT_IMM
, ucp_So
- ucp_Ll
);
6139 OP_FLAGS(SLJIT_MOV
, TMP2
, 0, SLJIT_GREATER
);
6141 jump
= CMP(SLJIT_NOT_EQUAL
, typereg
, 0, SLJIT_IMM
, ucp_Cf
- ucp_Ll
);
6143 /* In case of ucp_Cf, we overwrite the result. */
6144 SET_CHAR_OFFSET(0x2066);
6145 OP2(SLJIT_SUB
| SLJIT_SET_LESS_EQUAL
, SLJIT_UNUSED
, 0, TMP1
, 0, SLJIT_IMM
, 0x2069 - 0x2066);
6146 OP_FLAGS(SLJIT_MOV
, TMP2
, 0, SLJIT_LESS_EQUAL
);
6148 OP2(SLJIT_SUB
| SLJIT_SET_Z
, SLJIT_UNUSED
, 0, TMP1
, 0, SLJIT_IMM
, 0x061c - 0x2066);
6149 OP_FLAGS(SLJIT_OR
, TMP2
, 0, SLJIT_EQUAL
);
6151 OP2(SLJIT_SUB
| SLJIT_SET_Z
, SLJIT_UNUSED
, 0, TMP1
, 0, SLJIT_IMM
, 0x180e - 0x2066);
6152 OP_FLAGS(SLJIT_OR
, TMP2
, 0, SLJIT_EQUAL
);
6155 jump
= CMP(SLJIT_ZERO
^ invertcmp
, TMP2
, 0, SLJIT_IMM
, 0);
6159 /* C and Z groups are the farthest two groups. */
6160 SET_TYPE_OFFSET(ucp_Ll
);
6161 OP2(SLJIT_SUB
| SLJIT_SET_GREATER
, SLJIT_UNUSED
, 0, typereg
, 0, SLJIT_IMM
, ucp_So
- ucp_Ll
);
6162 OP_FLAGS(SLJIT_MOV
, TMP2
, 0, SLJIT_GREATER
);
6164 OP2(SLJIT_SUB
| SLJIT_SET_Z
, SLJIT_UNUSED
, 0, typereg
, 0, SLJIT_IMM
, ucp_Zs
- ucp_Ll
);
6165 OP_FLAGS(SLJIT_AND
, TMP2
, 0, SLJIT_NOT_EQUAL
);
6167 jump
= CMP(SLJIT_NOT_EQUAL
, typereg
, 0, SLJIT_IMM
, ucp_Cf
- ucp_Ll
);
6169 /* In case of ucp_Cf, we overwrite the result. */
6170 SET_CHAR_OFFSET(0x2066);
6171 OP2(SLJIT_SUB
| SLJIT_SET_LESS_EQUAL
, SLJIT_UNUSED
, 0, TMP1
, 0, SLJIT_IMM
, 0x2069 - 0x2066);
6172 OP_FLAGS(SLJIT_MOV
, TMP2
, 0, SLJIT_LESS_EQUAL
);
6174 OP2(SLJIT_SUB
| SLJIT_SET_Z
, SLJIT_UNUSED
, 0, TMP1
, 0, SLJIT_IMM
, 0x061c - 0x2066);
6175 OP_FLAGS(SLJIT_OR
, TMP2
, 0, SLJIT_EQUAL
);
6178 jump
= CMP(SLJIT_ZERO
^ invertcmp
, TMP2
, 0, SLJIT_IMM
, 0);
6182 SET_TYPE_OFFSET(ucp_Sc
);
6183 OP2(SLJIT_SUB
| SLJIT_SET_LESS_EQUAL
, SLJIT_UNUSED
, 0, typereg
, 0, SLJIT_IMM
, ucp_So
- ucp_Sc
);
6184 OP_FLAGS(SLJIT_MOV
, TMP2
, 0, SLJIT_LESS_EQUAL
);
6187 OP2(SLJIT_SUB
| SLJIT_SET_LESS_EQUAL
, SLJIT_UNUSED
, 0, TMP1
, 0, SLJIT_IMM
, 0x7f);
6188 OP_FLAGS(SLJIT_AND
, TMP2
, 0, SLJIT_LESS_EQUAL
);
6190 SET_TYPE_OFFSET(ucp_Pc
);
6191 OP2(SLJIT_SUB
| SLJIT_SET_LESS_EQUAL
, SLJIT_UNUSED
, 0, typereg
, 0, SLJIT_IMM
, ucp_Ps
- ucp_Pc
);
6192 OP_FLAGS(SLJIT_OR
| SLJIT_SET_Z
, TMP2
, 0, SLJIT_LESS_EQUAL
);
6193 jump
= JUMP(SLJIT_NOT_ZERO
^ invertcmp
);
6197 SLJIT_UNREACHABLE();
6205 add_jump(compiler
, compares
> 0 ? list
: backtracks
, jump
);
6209 set_jumps(found
, LABEL());
6212 #undef SET_TYPE_OFFSET
6213 #undef SET_CHAR_OFFSET
6217 static pcre_uchar
*compile_simple_assertion_matchingpath(compiler_common
*common
, pcre_uchar type
, pcre_uchar
*cc
, jump_list
**backtracks
)
6221 struct sljit_jump
*jump
[4];
6223 struct sljit_label
*label
;
6224 #endif /* SUPPORT_UTF */
6229 OP1(SLJIT_MOV
, TMP1
, 0, ARGUMENTS
, 0);
6230 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(TMP1
), SLJIT_OFFSETOF(jit_arguments
, begin
));
6231 add_jump(compiler
, backtracks
, CMP(SLJIT_NOT_EQUAL
, STR_PTR
, 0, TMP1
, 0));
6235 OP1(SLJIT_MOV
, TMP1
, 0, ARGUMENTS
, 0);
6236 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(TMP1
), SLJIT_OFFSETOF(jit_arguments
, str
));
6237 add_jump(compiler
, backtracks
, CMP(SLJIT_NOT_EQUAL
, STR_PTR
, 0, TMP1
, 0));
6240 case OP_NOT_WORD_BOUNDARY
:
6241 case OP_WORD_BOUNDARY
:
6242 add_jump(compiler
, &common
->wordboundary
, JUMP(SLJIT_FAST_CALL
));
6243 sljit_set_current_flags(compiler
, SLJIT_SET_Z
);
6244 add_jump(compiler
, backtracks
, JUMP(type
== OP_NOT_WORD_BOUNDARY
? SLJIT_NOT_ZERO
: SLJIT_ZERO
));
6248 /* Requires rather complex checks. */
6249 jump
[0] = CMP(SLJIT_GREATER_EQUAL
, STR_PTR
, 0, STR_END
, 0);
6250 if (common
->nltype
== NLTYPE_FIXED
&& common
->newline
> 255)
6252 OP2(SLJIT_ADD
, TMP2
, 0, STR_PTR
, 0, SLJIT_IMM
, IN_UCHARS(2));
6253 OP1(MOV_UCHAR
, TMP1
, 0, SLJIT_MEM1(STR_PTR
), IN_UCHARS(0));
6254 if (common
->mode
== JIT_COMPILE
)
6255 add_jump(compiler
, backtracks
, CMP(SLJIT_NOT_EQUAL
, TMP2
, 0, STR_END
, 0));
6258 jump
[1] = CMP(SLJIT_EQUAL
, TMP2
, 0, STR_END
, 0);
6259 OP2(SLJIT_SUB
| SLJIT_SET_LESS
, SLJIT_UNUSED
, 0, TMP2
, 0, STR_END
, 0);
6260 OP_FLAGS(SLJIT_MOV
, TMP2
, 0, SLJIT_LESS
);
6261 OP2(SLJIT_SUB
| SLJIT_SET_Z
, SLJIT_UNUSED
, 0, TMP1
, 0, SLJIT_IMM
, (common
->newline
>> 8) & 0xff);
6262 OP_FLAGS(SLJIT_OR
| SLJIT_SET_Z
, TMP2
, 0, SLJIT_NOT_EQUAL
);
6263 add_jump(compiler
, backtracks
, JUMP(SLJIT_NOT_EQUAL
));
6264 check_partial(common
, TRUE
);
6265 add_jump(compiler
, backtracks
, JUMP(SLJIT_JUMP
));
6268 OP1(MOV_UCHAR
, TMP2
, 0, SLJIT_MEM1(STR_PTR
), IN_UCHARS(1));
6269 add_jump(compiler
, backtracks
, CMP(SLJIT_NOT_EQUAL
, TMP1
, 0, SLJIT_IMM
, (common
->newline
>> 8) & 0xff));
6270 add_jump(compiler
, backtracks
, CMP(SLJIT_NOT_EQUAL
, TMP2
, 0, SLJIT_IMM
, common
->newline
& 0xff));
6272 else if (common
->nltype
== NLTYPE_FIXED
)
6274 OP2(SLJIT_ADD
, TMP2
, 0, STR_PTR
, 0, SLJIT_IMM
, IN_UCHARS(1));
6275 OP1(MOV_UCHAR
, TMP1
, 0, SLJIT_MEM1(STR_PTR
), IN_UCHARS(0));
6276 add_jump(compiler
, backtracks
, CMP(SLJIT_NOT_EQUAL
, TMP2
, 0, STR_END
, 0));
6277 add_jump(compiler
, backtracks
, CMP(SLJIT_NOT_EQUAL
, TMP1
, 0, SLJIT_IMM
, common
->newline
));
6281 OP1(MOV_UCHAR
, TMP1
, 0, SLJIT_MEM1(STR_PTR
), IN_UCHARS(0));
6282 jump
[1] = CMP(SLJIT_NOT_EQUAL
, TMP1
, 0, SLJIT_IMM
, CHAR_CR
);
6283 OP2(SLJIT_ADD
, TMP2
, 0, STR_PTR
, 0, SLJIT_IMM
, IN_UCHARS(2));
6284 OP2(SLJIT_SUB
| SLJIT_SET_Z
| SLJIT_SET_GREATER
, SLJIT_UNUSED
, 0, TMP2
, 0, STR_END
, 0);
6285 jump
[2] = JUMP(SLJIT_GREATER
);
6286 add_jump(compiler
, backtracks
, JUMP(SLJIT_NOT_EQUAL
) /* LESS */);
6288 OP1(MOV_UCHAR
, TMP1
, 0, SLJIT_MEM1(STR_PTR
), IN_UCHARS(1));
6289 jump
[3] = CMP(SLJIT_EQUAL
, TMP1
, 0, SLJIT_IMM
, CHAR_NL
);
6290 add_jump(compiler
, backtracks
, JUMP(SLJIT_JUMP
));
6293 if (common
->nltype
== NLTYPE_ANYCRLF
)
6295 OP2(SLJIT_ADD
, TMP2
, 0, STR_PTR
, 0, SLJIT_IMM
, IN_UCHARS(1));
6296 add_jump(compiler
, backtracks
, CMP(SLJIT_LESS
, TMP2
, 0, STR_END
, 0));
6297 add_jump(compiler
, backtracks
, CMP(SLJIT_NOT_EQUAL
, TMP1
, 0, SLJIT_IMM
, CHAR_NL
));
6301 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), LOCALS1
, STR_PTR
, 0);
6302 read_char_range(common
, common
->nlmin
, common
->nlmax
, TRUE
);
6303 add_jump(compiler
, backtracks
, CMP(SLJIT_NOT_EQUAL
, STR_PTR
, 0, STR_END
, 0));
6304 add_jump(compiler
, &common
->anynewline
, JUMP(SLJIT_FAST_CALL
));
6305 sljit_set_current_flags(compiler
, SLJIT_SET_Z
);
6306 add_jump(compiler
, backtracks
, JUMP(SLJIT_ZERO
));
6307 OP1(SLJIT_MOV
, STR_PTR
, 0, SLJIT_MEM1(SLJIT_SP
), LOCALS1
);
6313 check_partial(common
, FALSE
);
6317 add_jump(compiler
, backtracks
, CMP(SLJIT_LESS
, STR_PTR
, 0, STR_END
, 0));
6318 check_partial(common
, FALSE
);
6322 OP1(SLJIT_MOV
, TMP2
, 0, ARGUMENTS
, 0);
6323 OP1(SLJIT_MOV_U8
, TMP2
, 0, SLJIT_MEM1(TMP2
), SLJIT_OFFSETOF(jit_arguments
, noteol
));
6324 add_jump(compiler
, backtracks
, CMP(SLJIT_NOT_EQUAL
, TMP2
, 0, SLJIT_IMM
, 0));
6326 if (!common
->endonly
)
6327 compile_simple_assertion_matchingpath(common
, OP_EODN
, cc
, backtracks
);
6330 add_jump(compiler
, backtracks
, CMP(SLJIT_LESS
, STR_PTR
, 0, STR_END
, 0));
6331 check_partial(common
, FALSE
);
6336 jump
[1] = CMP(SLJIT_LESS
, STR_PTR
, 0, STR_END
, 0);
6337 OP1(SLJIT_MOV
, TMP2
, 0, ARGUMENTS
, 0);
6338 OP1(SLJIT_MOV_U8
, TMP2
, 0, SLJIT_MEM1(TMP2
), SLJIT_OFFSETOF(jit_arguments
, noteol
));
6339 add_jump(compiler
, backtracks
, CMP(SLJIT_NOT_EQUAL
, TMP2
, 0, SLJIT_IMM
, 0));
6340 check_partial(common
, FALSE
);
6341 jump
[0] = JUMP(SLJIT_JUMP
);
6344 if (common
->nltype
== NLTYPE_FIXED
&& common
->newline
> 255)
6346 OP2(SLJIT_ADD
, TMP2
, 0, STR_PTR
, 0, SLJIT_IMM
, IN_UCHARS(2));
6347 OP1(MOV_UCHAR
, TMP1
, 0, SLJIT_MEM1(STR_PTR
), IN_UCHARS(0));
6348 if (common
->mode
== JIT_COMPILE
)
6349 add_jump(compiler
, backtracks
, CMP(SLJIT_GREATER
, TMP2
, 0, STR_END
, 0));
6352 jump
[1] = CMP(SLJIT_LESS_EQUAL
, TMP2
, 0, STR_END
, 0);
6353 /* STR_PTR = STR_END - IN_UCHARS(1) */
6354 add_jump(compiler
, backtracks
, CMP(SLJIT_NOT_EQUAL
, TMP1
, 0, SLJIT_IMM
, (common
->newline
>> 8) & 0xff));
6355 check_partial(common
, TRUE
);
6356 add_jump(compiler
, backtracks
, JUMP(SLJIT_JUMP
));
6360 OP1(MOV_UCHAR
, TMP2
, 0, SLJIT_MEM1(STR_PTR
), IN_UCHARS(1));
6361 add_jump(compiler
, backtracks
, CMP(SLJIT_NOT_EQUAL
, TMP1
, 0, SLJIT_IMM
, (common
->newline
>> 8) & 0xff));
6362 add_jump(compiler
, backtracks
, CMP(SLJIT_NOT_EQUAL
, TMP2
, 0, SLJIT_IMM
, common
->newline
& 0xff));
6366 peek_char(common
, common
->nlmax
);
6367 check_newlinechar(common
, common
->nltype
, backtracks
, FALSE
);
6373 OP1(SLJIT_MOV
, TMP2
, 0, ARGUMENTS
, 0);
6374 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(TMP2
), SLJIT_OFFSETOF(jit_arguments
, begin
));
6375 add_jump(compiler
, backtracks
, CMP(SLJIT_GREATER
, STR_PTR
, 0, TMP1
, 0));
6376 OP1(SLJIT_MOV_U8
, TMP2
, 0, SLJIT_MEM1(TMP2
), SLJIT_OFFSETOF(jit_arguments
, notbol
));
6377 add_jump(compiler
, backtracks
, CMP(SLJIT_NOT_EQUAL
, TMP2
, 0, SLJIT_IMM
, 0));
6381 OP1(SLJIT_MOV
, TMP2
, 0, ARGUMENTS
, 0);
6382 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(TMP2
), SLJIT_OFFSETOF(jit_arguments
, begin
));
6383 jump
[1] = CMP(SLJIT_GREATER
, STR_PTR
, 0, TMP1
, 0);
6384 OP1(SLJIT_MOV_U8
, TMP2
, 0, SLJIT_MEM1(TMP2
), SLJIT_OFFSETOF(jit_arguments
, notbol
));
6385 add_jump(compiler
, backtracks
, CMP(SLJIT_NOT_EQUAL
, TMP2
, 0, SLJIT_IMM
, 0));
6386 jump
[0] = JUMP(SLJIT_JUMP
);
6389 add_jump(compiler
, backtracks
, CMP(SLJIT_GREATER_EQUAL
, STR_PTR
, 0, STR_END
, 0));
6390 if (common
->nltype
== NLTYPE_FIXED
&& common
->newline
> 255)
6392 OP2(SLJIT_SUB
, TMP2
, 0, STR_PTR
, 0, SLJIT_IMM
, IN_UCHARS(2));
6393 add_jump(compiler
, backtracks
, CMP(SLJIT_LESS
, TMP2
, 0, TMP1
, 0));
6394 OP1(MOV_UCHAR
, TMP1
, 0, SLJIT_MEM1(STR_PTR
), IN_UCHARS(-2));
6395 OP1(MOV_UCHAR
, TMP2
, 0, SLJIT_MEM1(STR_PTR
), IN_UCHARS(-1));
6396 add_jump(compiler
, backtracks
, CMP(SLJIT_NOT_EQUAL
, TMP1
, 0, SLJIT_IMM
, (common
->newline
>> 8) & 0xff));
6397 add_jump(compiler
, backtracks
, CMP(SLJIT_NOT_EQUAL
, TMP2
, 0, SLJIT_IMM
, common
->newline
& 0xff));
6401 skip_char_back(common
);
6402 read_char_range(common
, common
->nlmin
, common
->nlmax
, TRUE
);
6403 check_newlinechar(common
, common
->nltype
, backtracks
, FALSE
);
6409 length
= GET(cc
, 0);
6411 return cc
+ LINK_SIZE
;
6412 OP1(SLJIT_MOV
, TMP1
, 0, ARGUMENTS
, 0);
6416 OP1(SLJIT_MOV
, TMP3
, 0, SLJIT_MEM1(TMP1
), SLJIT_OFFSETOF(jit_arguments
, begin
));
6417 OP1(SLJIT_MOV
, TMP2
, 0, SLJIT_IMM
, length
);
6419 add_jump(compiler
, backtracks
, CMP(SLJIT_LESS_EQUAL
, STR_PTR
, 0, TMP3
, 0));
6420 skip_char_back(common
);
6421 OP2(SLJIT_SUB
| SLJIT_SET_Z
, TMP2
, 0, TMP2
, 0, SLJIT_IMM
, 1);
6422 JUMPTO(SLJIT_NOT_ZERO
, label
);
6427 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(TMP1
), SLJIT_OFFSETOF(jit_arguments
, begin
));
6428 OP2(SLJIT_SUB
, STR_PTR
, 0, STR_PTR
, 0, SLJIT_IMM
, IN_UCHARS(length
));
6429 add_jump(compiler
, backtracks
, CMP(SLJIT_LESS
, STR_PTR
, 0, TMP1
, 0));
6431 check_start_used_ptr(common
);
6432 return cc
+ LINK_SIZE
;
6434 SLJIT_UNREACHABLE();
6438 static pcre_uchar
*compile_char1_matchingpath(compiler_common
*common
, pcre_uchar type
, pcre_uchar
*cc
, jump_list
**backtracks
, BOOL check_str_ptr
)
6442 unsigned int c
, oc
, bit
;
6443 compare_context context
;
6444 struct sljit_jump
*jump
[3];
6445 jump_list
*end_list
;
6447 struct sljit_label
*label
;
6449 pcre_uchar propdata
[5];
6451 #endif /* SUPPORT_UTF */
6457 /* Digits are usually 0-9, so it is worth to optimize them. */
6459 detect_partial_match(common
, backtracks
);
6460 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
6461 if (common
->utf
&& is_char7_bitset((const sljit_u8
*)common
->ctypes
- cbit_length
+ cbit_digit
, FALSE
))
6462 read_char7_type(common
, type
== OP_NOT_DIGIT
);
6465 read_char8_type(common
, type
== OP_NOT_DIGIT
);
6466 /* Flip the starting bit in the negative case. */
6467 OP2(SLJIT_AND
| SLJIT_SET_Z
, SLJIT_UNUSED
, 0, TMP1
, 0, SLJIT_IMM
, ctype_digit
);
6468 add_jump(compiler
, backtracks
, JUMP(type
== OP_DIGIT
? SLJIT_ZERO
: SLJIT_NOT_ZERO
));
6471 case OP_NOT_WHITESPACE
:
6474 detect_partial_match(common
, backtracks
);
6475 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
6476 if (common
->utf
&& is_char7_bitset((const sljit_u8
*)common
->ctypes
- cbit_length
+ cbit_space
, FALSE
))
6477 read_char7_type(common
, type
== OP_NOT_WHITESPACE
);
6480 read_char8_type(common
, type
== OP_NOT_WHITESPACE
);
6481 OP2(SLJIT_AND
| SLJIT_SET_Z
, SLJIT_UNUSED
, 0, TMP1
, 0, SLJIT_IMM
, ctype_space
);
6482 add_jump(compiler
, backtracks
, JUMP(type
== OP_WHITESPACE
? SLJIT_ZERO
: SLJIT_NOT_ZERO
));
6485 case OP_NOT_WORDCHAR
:
6488 detect_partial_match(common
, backtracks
);
6489 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
6490 if (common
->utf
&& is_char7_bitset((const sljit_u8
*)common
->ctypes
- cbit_length
+ cbit_word
, FALSE
))
6491 read_char7_type(common
, type
== OP_NOT_WORDCHAR
);
6494 read_char8_type(common
, type
== OP_NOT_WORDCHAR
);
6495 OP2(SLJIT_AND
| SLJIT_SET_Z
, SLJIT_UNUSED
, 0, TMP1
, 0, SLJIT_IMM
, ctype_word
);
6496 add_jump(compiler
, backtracks
, JUMP(type
== OP_WORDCHAR
? SLJIT_ZERO
: SLJIT_NOT_ZERO
));
6501 detect_partial_match(common
, backtracks
);
6502 read_char_range(common
, common
->nlmin
, common
->nlmax
, TRUE
);
6503 if (common
->nltype
== NLTYPE_FIXED
&& common
->newline
> 255)
6505 jump
[0] = CMP(SLJIT_NOT_EQUAL
, TMP1
, 0, SLJIT_IMM
, (common
->newline
>> 8) & 0xff);
6507 if (common
->mode
!= JIT_PARTIAL_HARD_COMPILE
)
6508 add_jump(compiler
, &end_list
, CMP(SLJIT_GREATER_EQUAL
, STR_PTR
, 0, STR_END
, 0));
6510 check_str_end(common
, &end_list
);
6512 OP1(MOV_UCHAR
, TMP1
, 0, SLJIT_MEM1(STR_PTR
), 0);
6513 add_jump(compiler
, backtracks
, CMP(SLJIT_EQUAL
, TMP1
, 0, SLJIT_IMM
, common
->newline
& 0xff));
6514 set_jumps(end_list
, LABEL());
6518 check_newlinechar(common
, common
->nltype
, backtracks
, TRUE
);
6523 detect_partial_match(common
, backtracks
);
6527 OP1(MOV_UCHAR
, TMP1
, 0, SLJIT_MEM1(STR_PTR
), 0);
6528 OP2(SLJIT_ADD
, STR_PTR
, 0, STR_PTR
, 0, SLJIT_IMM
, IN_UCHARS(1));
6529 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
6530 #if defined COMPILE_PCRE8
6531 jump
[0] = CMP(SLJIT_LESS
, TMP1
, 0, SLJIT_IMM
, 0xc0);
6532 OP1(SLJIT_MOV_U8
, TMP1
, 0, SLJIT_MEM1(TMP1
), (sljit_sw
)PRIV(utf8_table4
) - 0xc0);
6533 OP2(SLJIT_ADD
, STR_PTR
, 0, STR_PTR
, 0, TMP1
, 0);
6534 #elif defined COMPILE_PCRE16
6535 jump
[0] = CMP(SLJIT_LESS
, TMP1
, 0, SLJIT_IMM
, 0xd800);
6536 OP2(SLJIT_AND
, TMP1
, 0, TMP1
, 0, SLJIT_IMM
, 0xfc00);
6537 OP2(SLJIT_SUB
| SLJIT_SET_Z
, SLJIT_UNUSED
, 0, TMP1
, 0, SLJIT_IMM
, 0xd800);
6538 OP_FLAGS(SLJIT_MOV
, TMP1
, 0, SLJIT_EQUAL
);
6539 OP2(SLJIT_SHL
, TMP1
, 0, TMP1
, 0, SLJIT_IMM
, 1);
6540 OP2(SLJIT_ADD
, STR_PTR
, 0, STR_PTR
, 0, TMP1
, 0);
6543 #endif /* COMPILE_PCRE[8|16] */
6547 OP2(SLJIT_ADD
, STR_PTR
, 0, STR_PTR
, 0, SLJIT_IMM
, IN_UCHARS(1));
6552 detect_partial_match(common
, backtracks
);
6553 OP2(SLJIT_ADD
, STR_PTR
, 0, STR_PTR
, 0, SLJIT_IMM
, IN_UCHARS(1));
6560 propdata
[0] = XCL_HASPROP
;
6561 propdata
[1] = type
== OP_NOTPROP
? XCL_NOTPROP
: XCL_PROP
;
6562 propdata
[2] = cc
[0];
6563 propdata
[3] = cc
[1];
6564 propdata
[4] = XCL_END
;
6566 detect_partial_match(common
, backtracks
);
6567 compile_xclass_matchingpath(common
, propdata
, backtracks
);
6574 detect_partial_match(common
, backtracks
);
6575 read_char_range(common
, common
->bsr_nlmin
, common
->bsr_nlmax
, FALSE
);
6576 jump
[0] = CMP(SLJIT_NOT_EQUAL
, TMP1
, 0, SLJIT_IMM
, CHAR_CR
);
6577 /* We don't need to handle soft partial matching case. */
6579 if (common
->mode
!= JIT_PARTIAL_HARD_COMPILE
)
6580 add_jump(compiler
, &end_list
, CMP(SLJIT_GREATER_EQUAL
, STR_PTR
, 0, STR_END
, 0));
6582 check_str_end(common
, &end_list
);
6583 OP1(MOV_UCHAR
, TMP1
, 0, SLJIT_MEM1(STR_PTR
), 0);
6584 jump
[1] = CMP(SLJIT_NOT_EQUAL
, TMP1
, 0, SLJIT_IMM
, CHAR_NL
);
6585 OP2(SLJIT_ADD
, STR_PTR
, 0, STR_PTR
, 0, SLJIT_IMM
, IN_UCHARS(1));
6586 jump
[2] = JUMP(SLJIT_JUMP
);
6588 check_newlinechar(common
, common
->bsr_nltype
, backtracks
, FALSE
);
6589 set_jumps(end_list
, LABEL());
6597 detect_partial_match(common
, backtracks
);
6598 read_char_range(common
, 0x9, 0x3000, type
== OP_NOT_HSPACE
);
6599 add_jump(compiler
, &common
->hspace
, JUMP(SLJIT_FAST_CALL
));
6600 sljit_set_current_flags(compiler
, SLJIT_SET_Z
);
6601 add_jump(compiler
, backtracks
, JUMP(type
== OP_NOT_HSPACE
? SLJIT_NOT_ZERO
: SLJIT_ZERO
));
6607 detect_partial_match(common
, backtracks
);
6608 read_char_range(common
, 0xa, 0x2029, type
== OP_NOT_VSPACE
);
6609 add_jump(compiler
, &common
->vspace
, JUMP(SLJIT_FAST_CALL
));
6610 sljit_set_current_flags(compiler
, SLJIT_SET_Z
);
6611 add_jump(compiler
, backtracks
, JUMP(type
== OP_NOT_VSPACE
? SLJIT_NOT_ZERO
: SLJIT_ZERO
));
6617 detect_partial_match(common
, backtracks
);
6619 add_jump(compiler
, &common
->getucd
, JUMP(SLJIT_FAST_CALL
));
6620 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_IMM
, (sljit_sw
)PRIV(ucd_records
) + SLJIT_OFFSETOF(ucd_record
, gbprop
));
6621 /* Optimize register allocation: use a real register. */
6622 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), LOCALS0
, STACK_TOP
, 0);
6623 OP1(SLJIT_MOV_U8
, STACK_TOP
, 0, SLJIT_MEM2(TMP1
, TMP2
), 3);
6626 jump
[0] = CMP(SLJIT_GREATER_EQUAL
, STR_PTR
, 0, STR_END
, 0);
6627 OP1(SLJIT_MOV
, TMP3
, 0, STR_PTR
, 0);
6629 add_jump(compiler
, &common
->getucd
, JUMP(SLJIT_FAST_CALL
));
6630 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_IMM
, (sljit_sw
)PRIV(ucd_records
) + SLJIT_OFFSETOF(ucd_record
, gbprop
));
6631 OP1(SLJIT_MOV_U8
, TMP2
, 0, SLJIT_MEM2(TMP1
, TMP2
), 3);
6633 OP2(SLJIT_SHL
, STACK_TOP
, 0, STACK_TOP
, 0, SLJIT_IMM
, 2);
6634 OP1(SLJIT_MOV_U32
, TMP1
, 0, SLJIT_MEM1(STACK_TOP
), (sljit_sw
)PRIV(ucp_gbtable
));
6635 OP1(SLJIT_MOV
, STACK_TOP
, 0, TMP2
, 0);
6636 OP2(SLJIT_SHL
, TMP2
, 0, SLJIT_IMM
, 1, TMP2
, 0);
6637 OP2(SLJIT_AND
| SLJIT_SET_Z
, SLJIT_UNUSED
, 0, TMP1
, 0, TMP2
, 0);
6638 JUMPTO(SLJIT_NOT_ZERO
, label
);
6640 OP1(SLJIT_MOV
, STR_PTR
, 0, TMP3
, 0);
6642 OP1(SLJIT_MOV
, STACK_TOP
, 0, SLJIT_MEM1(SLJIT_SP
), LOCALS0
);
6644 if (common
->mode
== JIT_PARTIAL_HARD_COMPILE
)
6646 jump
[0] = CMP(SLJIT_LESS
, STR_PTR
, 0, STR_END
, 0);
6647 /* Since we successfully read a char above, partial matching must occure. */
6648 check_partial(common
, TRUE
);
6658 if (common
->utf
&& HAS_EXTRALEN(*cc
)) length
+= GET_EXTRALEN(*cc
);
6660 if (common
->mode
== JIT_COMPILE
&& check_str_ptr
6661 && (type
== OP_CHAR
|| !char_has_othercase(common
, cc
) || char_get_othercase_bit(common
, cc
) != 0))
6663 OP2(SLJIT_ADD
, STR_PTR
, 0, STR_PTR
, 0, SLJIT_IMM
, IN_UCHARS(length
));
6664 add_jump(compiler
, backtracks
, CMP(SLJIT_GREATER
, STR_PTR
, 0, STR_END
, 0));
6666 context
.length
= IN_UCHARS(length
);
6667 context
.sourcereg
= -1;
6668 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
6669 context
.ucharptr
= 0;
6671 return byte_sequence_compare(common
, type
== OP_CHARI
, cc
, &context
, backtracks
);
6675 detect_partial_match(common
, backtracks
);
6685 if (type
== OP_CHAR
|| !char_has_othercase(common
, cc
))
6687 read_char_range(common
, c
, c
, FALSE
);
6688 add_jump(compiler
, backtracks
, CMP(SLJIT_NOT_EQUAL
, TMP1
, 0, SLJIT_IMM
, c
));
6691 oc
= char_othercase(common
, c
);
6692 read_char_range(common
, c
< oc
? c
: oc
, c
> oc
? c
: oc
, FALSE
);
6694 if (is_powerof2(bit
))
6696 OP2(SLJIT_OR
, TMP1
, 0, TMP1
, 0, SLJIT_IMM
, bit
);
6697 add_jump(compiler
, backtracks
, CMP(SLJIT_NOT_EQUAL
, TMP1
, 0, SLJIT_IMM
, c
| bit
));
6700 jump
[0] = CMP(SLJIT_EQUAL
, TMP1
, 0, SLJIT_IMM
, c
);
6701 add_jump(compiler
, backtracks
, CMP(SLJIT_NOT_EQUAL
, TMP1
, 0, SLJIT_IMM
, oc
));
6708 detect_partial_match(common
, backtracks
);
6713 #ifdef COMPILE_PCRE8
6717 OP1(SLJIT_MOV_U8
, TMP1
, 0, SLJIT_MEM1(STR_PTR
), 0);
6718 if (type
== OP_NOT
|| !char_has_othercase(common
, cc
))
6719 add_jump(compiler
, backtracks
, CMP(SLJIT_EQUAL
, TMP1
, 0, SLJIT_IMM
, c
));
6722 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
6723 OP2(SLJIT_OR
, TMP2
, 0, TMP1
, 0, SLJIT_IMM
, 0x20);
6724 add_jump(compiler
, backtracks
, CMP(SLJIT_EQUAL
, TMP2
, 0, SLJIT_IMM
, c
| 0x20));
6726 /* Skip the variable-length character. */
6727 OP2(SLJIT_ADD
, STR_PTR
, 0, STR_PTR
, 0, SLJIT_IMM
, IN_UCHARS(1));
6728 jump
[0] = CMP(SLJIT_LESS
, TMP1
, 0, SLJIT_IMM
, 0xc0);
6729 OP1(MOV_UCHAR
, TMP1
, 0, SLJIT_MEM1(TMP1
), (sljit_sw
)PRIV(utf8_table4
) - 0xc0);
6730 OP2(SLJIT_ADD
, STR_PTR
, 0, STR_PTR
, 0, TMP1
, 0);
6735 #endif /* COMPILE_PCRE8 */
6737 GETCHARLEN(c
, cc
, length
);
6741 #endif /* SUPPORT_UTF */
6744 if (type
== OP_NOT
|| !char_has_othercase(common
, cc
))
6746 read_char_range(common
, c
, c
, TRUE
);
6747 add_jump(compiler
, backtracks
, CMP(SLJIT_EQUAL
, TMP1
, 0, SLJIT_IMM
, c
));
6751 oc
= char_othercase(common
, c
);
6752 read_char_range(common
, c
< oc
? c
: oc
, c
> oc
? c
: oc
, TRUE
);
6754 if (is_powerof2(bit
))
6756 OP2(SLJIT_OR
, TMP1
, 0, TMP1
, 0, SLJIT_IMM
, bit
);
6757 add_jump(compiler
, backtracks
, CMP(SLJIT_EQUAL
, TMP1
, 0, SLJIT_IMM
, c
| bit
));
6761 add_jump(compiler
, backtracks
, CMP(SLJIT_EQUAL
, TMP1
, 0, SLJIT_IMM
, c
));
6762 add_jump(compiler
, backtracks
, CMP(SLJIT_EQUAL
, TMP1
, 0, SLJIT_IMM
, oc
));
6770 detect_partial_match(common
, backtracks
);
6772 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
6773 bit
= (common
->utf
&& is_char7_bitset((const sljit_u8
*)cc
, type
== OP_NCLASS
)) ? 127 : 255;
6774 read_char_range(common
, 0, bit
, type
== OP_NCLASS
);
6776 read_char_range(common
, 0, 255, type
== OP_NCLASS
);
6779 if (check_class_ranges(common
, (const sljit_u8
*)cc
, type
== OP_NCLASS
, FALSE
, backtracks
))
6780 return cc
+ 32 / sizeof(pcre_uchar
);
6782 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
6786 jump
[0] = CMP(SLJIT_GREATER
, TMP1
, 0, SLJIT_IMM
, bit
);
6787 if (type
== OP_CLASS
)
6789 add_jump(compiler
, backtracks
, jump
[0]);
6793 #elif !defined COMPILE_PCRE8
6794 jump
[0] = CMP(SLJIT_GREATER
, TMP1
, 0, SLJIT_IMM
, 255);
6795 if (type
== OP_CLASS
)
6797 add_jump(compiler
, backtracks
, jump
[0]);
6800 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
6802 OP2(SLJIT_AND
, TMP2
, 0, TMP1
, 0, SLJIT_IMM
, 0x7);
6803 OP2(SLJIT_LSHR
, TMP1
, 0, TMP1
, 0, SLJIT_IMM
, 3);
6804 OP1(SLJIT_MOV_U8
, TMP1
, 0, SLJIT_MEM1(TMP1
), (sljit_sw
)cc
);
6805 OP2(SLJIT_SHL
, TMP2
, 0, SLJIT_IMM
, 1, TMP2
, 0);
6806 OP2(SLJIT_AND
| SLJIT_SET_Z
, SLJIT_UNUSED
, 0, TMP1
, 0, TMP2
, 0);
6807 add_jump(compiler
, backtracks
, JUMP(SLJIT_ZERO
));
6809 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
6810 if (jump
[0] != NULL
)
6813 return cc
+ 32 / sizeof(pcre_uchar
);
6815 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
6818 detect_partial_match(common
, backtracks
);
6819 compile_xclass_matchingpath(common
, cc
+ LINK_SIZE
, backtracks
);
6820 return cc
+ GET(cc
, 0) - 1;
6823 SLJIT_UNREACHABLE();
6827 static SLJIT_INLINE pcre_uchar
*compile_charn_matchingpath(compiler_common
*common
, pcre_uchar
*cc
, pcre_uchar
*ccend
, jump_list
**backtracks
)
6829 /* This function consumes at least one input character. */
6830 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
6832 pcre_uchar
*ccbegin
= cc
;
6833 compare_context context
;
6846 if (common
->utf
&& HAS_EXTRALEN(cc
[1]))
6847 size
+= GET_EXTRALEN(cc
[1]);
6850 else if (*cc
== OP_CHARI
)
6856 if (char_has_othercase(common
, cc
+ 1) && char_get_othercase_bit(common
, cc
+ 1) == 0)
6858 else if (HAS_EXTRALEN(cc
[1]))
6859 size
+= GET_EXTRALEN(cc
[1]);
6863 if (char_has_othercase(common
, cc
+ 1) && char_get_othercase_bit(common
, cc
+ 1) == 0)
6870 context
.length
+= IN_UCHARS(size
);
6872 while (size
> 0 && context
.length
<= 128);
6875 if (context
.length
> 0)
6877 /* We have a fixed-length byte sequence. */
6878 OP2(SLJIT_ADD
, STR_PTR
, 0, STR_PTR
, 0, SLJIT_IMM
, context
.length
);
6879 add_jump(compiler
, backtracks
, CMP(SLJIT_GREATER
, STR_PTR
, 0, STR_END
, 0));
6881 context
.sourcereg
= -1;
6882 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
6883 context
.ucharptr
= 0;
6885 do cc
= byte_sequence_compare(common
, *cc
== OP_CHARI
, cc
+ 1, &context
, backtracks
); while (context
.length
> 0);
6889 /* A non-fixed length character will be checked if length == 0. */
6890 return compile_char1_matchingpath(common
, *cc
, cc
+ 1, backtracks
, TRUE
);
6893 /* Forward definitions. */
6894 static void compile_matchingpath(compiler_common
*, pcre_uchar
*, pcre_uchar
*, backtrack_common
*);
6895 static void compile_backtrackingpath(compiler_common
*, struct backtrack_common
*);
6897 #define PUSH_BACKTRACK(size, ccstart, error) \
6900 backtrack = sljit_alloc_memory(compiler, (size)); \
6901 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
6903 memset(backtrack, 0, size); \
6904 backtrack->prev = parent->top; \
6905 backtrack->cc = (ccstart); \
6906 parent->top = backtrack; \
6910 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
6913 backtrack = sljit_alloc_memory(compiler, (size)); \
6914 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
6916 memset(backtrack, 0, size); \
6917 backtrack->prev = parent->top; \
6918 backtrack->cc = (ccstart); \
6919 parent->top = backtrack; \
6923 #define BACKTRACK_AS(type) ((type *)backtrack)
6925 static void compile_dnref_search(compiler_common
*common
, pcre_uchar
*cc
, jump_list
**backtracks
)
6927 /* The OVECTOR offset goes to TMP2. */
6929 int count
= GET2(cc
, 1 + IMM2_SIZE
);
6930 pcre_uchar
*slot
= common
->name_table
+ GET2(cc
, 1) * common
->name_entry_size
;
6931 unsigned int offset
;
6932 jump_list
*found
= NULL
;
6934 SLJIT_ASSERT(*cc
== OP_DNREF
|| *cc
== OP_DNREFI
);
6936 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(SLJIT_SP
), OVECTOR(1));
6941 offset
= GET2(slot
, 0) << 1;
6942 GET_LOCAL_BASE(TMP2
, 0, OVECTOR(offset
));
6943 add_jump(compiler
, &found
, CMP(SLJIT_NOT_EQUAL
, SLJIT_MEM1(SLJIT_SP
), OVECTOR(offset
), TMP1
, 0));
6944 slot
+= common
->name_entry_size
;
6947 offset
= GET2(slot
, 0) << 1;
6948 GET_LOCAL_BASE(TMP2
, 0, OVECTOR(offset
));
6949 if (backtracks
!= NULL
&& !common
->jscript_compat
)
6950 add_jump(compiler
, backtracks
, CMP(SLJIT_EQUAL
, SLJIT_MEM1(SLJIT_SP
), OVECTOR(offset
), TMP1
, 0));
6952 set_jumps(found
, LABEL());
6955 static void compile_ref_matchingpath(compiler_common
*common
, pcre_uchar
*cc
, jump_list
**backtracks
, BOOL withchecks
, BOOL emptyfail
)
6958 BOOL ref
= (*cc
== OP_REF
|| *cc
== OP_REFI
);
6960 struct sljit_jump
*jump
= NULL
;
6961 struct sljit_jump
*partial
;
6962 struct sljit_jump
*nopartial
;
6966 offset
= GET2(cc
, 1) << 1;
6967 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(SLJIT_SP
), OVECTOR(offset
));
6968 /* OVECTOR(1) contains the "string begin - 1" constant. */
6969 if (withchecks
&& !common
->jscript_compat
)
6970 add_jump(compiler
, backtracks
, CMP(SLJIT_EQUAL
, TMP1
, 0, SLJIT_MEM1(SLJIT_SP
), OVECTOR(1)));
6973 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(TMP2
), 0);
6975 #if defined SUPPORT_UTF && defined SUPPORT_UCP
6976 if (common
->utf
&& *cc
== OP_REFI
)
6978 SLJIT_ASSERT(TMP1
== SLJIT_R0
&& STACK_TOP
== SLJIT_R1
);
6980 OP1(SLJIT_MOV
, SLJIT_R2
, 0, SLJIT_MEM1(SLJIT_SP
), OVECTOR(offset
+ 1));
6982 OP1(SLJIT_MOV
, SLJIT_R2
, 0, SLJIT_MEM1(TMP2
), sizeof(sljit_sw
));
6985 jump
= CMP(SLJIT_EQUAL
, TMP1
, 0, SLJIT_R2
, 0);
6987 /* No free saved registers so save data on stack. */
6988 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), LOCALS0
, STACK_TOP
, 0);
6989 OP1(SLJIT_MOV
, SLJIT_R1
, 0, STR_PTR
, 0);
6990 OP1(SLJIT_MOV
, SLJIT_R3
, 0, STR_END
, 0);
6991 sljit_emit_icall(compiler
, SLJIT_CALL
, SLJIT_RET(SW
) | SLJIT_ARG1(SW
) | SLJIT_ARG2(SW
) | SLJIT_ARG3(SW
) | SLJIT_ARG4(SW
), SLJIT_IMM
, SLJIT_FUNC_OFFSET(do_utf_caselesscmp
));
6992 OP1(SLJIT_MOV
, STACK_TOP
, 0, SLJIT_MEM1(SLJIT_SP
), LOCALS0
);
6993 OP1(SLJIT_MOV
, STR_PTR
, 0, SLJIT_RETURN_REG
, 0);
6995 if (common
->mode
== JIT_COMPILE
)
6996 add_jump(compiler
, backtracks
, CMP(SLJIT_LESS_EQUAL
, SLJIT_RETURN_REG
, 0, SLJIT_IMM
, 1));
6999 OP2(SLJIT_SUB
| SLJIT_SET_Z
| SLJIT_SET_LESS
, SLJIT_UNUSED
, 0, SLJIT_RETURN_REG
, 0, SLJIT_IMM
, 1);
7001 add_jump(compiler
, backtracks
, JUMP(SLJIT_LESS
));
7003 nopartial
= JUMP(SLJIT_NOT_EQUAL
);
7004 OP1(SLJIT_MOV
, STR_PTR
, 0, STR_END
, 0);
7005 check_partial(common
, FALSE
);
7006 add_jump(compiler
, backtracks
, JUMP(SLJIT_JUMP
));
7007 JUMPHERE(nopartial
);
7011 #endif /* SUPPORT_UTF && SUPPORT_UCP */
7014 OP2(SLJIT_SUB
| SLJIT_SET_Z
, TMP2
, 0, SLJIT_MEM1(SLJIT_SP
), OVECTOR(offset
+ 1), TMP1
, 0);
7016 OP2(SLJIT_SUB
| SLJIT_SET_Z
, TMP2
, 0, SLJIT_MEM1(TMP2
), sizeof(sljit_sw
), TMP1
, 0);
7019 jump
= JUMP(SLJIT_ZERO
);
7021 OP2(SLJIT_ADD
, STR_PTR
, 0, STR_PTR
, 0, TMP2
, 0);
7022 partial
= CMP(SLJIT_GREATER
, STR_PTR
, 0, STR_END
, 0);
7023 if (common
->mode
== JIT_COMPILE
)
7024 add_jump(compiler
, backtracks
, partial
);
7026 add_jump(compiler
, *cc
== OP_REF
? &common
->casefulcmp
: &common
->caselesscmp
, JUMP(SLJIT_FAST_CALL
));
7027 add_jump(compiler
, backtracks
, CMP(SLJIT_NOT_EQUAL
, TMP2
, 0, SLJIT_IMM
, 0));
7029 if (common
->mode
!= JIT_COMPILE
)
7031 nopartial
= JUMP(SLJIT_JUMP
);
7033 /* TMP2 -= STR_END - STR_PTR */
7034 OP2(SLJIT_SUB
, TMP2
, 0, TMP2
, 0, STR_PTR
, 0);
7035 OP2(SLJIT_ADD
, TMP2
, 0, TMP2
, 0, STR_END
, 0);
7036 partial
= CMP(SLJIT_EQUAL
, TMP2
, 0, SLJIT_IMM
, 0);
7037 OP1(SLJIT_MOV
, STR_PTR
, 0, STR_END
, 0);
7038 add_jump(compiler
, *cc
== OP_REF
? &common
->casefulcmp
: &common
->caselesscmp
, JUMP(SLJIT_FAST_CALL
));
7039 add_jump(compiler
, backtracks
, CMP(SLJIT_NOT_EQUAL
, TMP2
, 0, SLJIT_IMM
, 0));
7041 check_partial(common
, FALSE
);
7042 add_jump(compiler
, backtracks
, JUMP(SLJIT_JUMP
));
7043 JUMPHERE(nopartial
);
7050 add_jump(compiler
, backtracks
, jump
);
7056 static SLJIT_INLINE pcre_uchar
*compile_ref_iterator_matchingpath(compiler_common
*common
, pcre_uchar
*cc
, backtrack_common
*parent
)
7059 BOOL ref
= (*cc
== OP_REF
|| *cc
== OP_REFI
);
7060 backtrack_common
*backtrack
;
7063 struct sljit_label
*label
;
7064 struct sljit_jump
*zerolength
;
7065 struct sljit_jump
*jump
= NULL
;
7066 pcre_uchar
*ccbegin
= cc
;
7067 int min
= 0, max
= 0;
7070 PUSH_BACKTRACK(sizeof(ref_iterator_backtrack
), cc
, NULL
);
7073 offset
= GET2(cc
, 1) << 1;
7076 type
= cc
[1 + IMM2_SIZE
];
7078 SLJIT_COMPILE_ASSERT((OP_CRSTAR
& 0x1) == 0, crstar_opcode_must_be_even
);
7079 minimize
= (type
& 0x1) != 0;
7086 cc
+= 1 + IMM2_SIZE
+ 1;
7092 cc
+= 1 + IMM2_SIZE
+ 1;
7098 cc
+= 1 + IMM2_SIZE
+ 1;
7102 min
= GET2(cc
, 1 + IMM2_SIZE
+ 1);
7103 max
= GET2(cc
, 1 + IMM2_SIZE
+ 1 + IMM2_SIZE
);
7104 cc
+= 1 + IMM2_SIZE
+ 1 + 2 * IMM2_SIZE
;
7107 SLJIT_UNREACHABLE();
7115 allocate_stack(common
, 2);
7117 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(SLJIT_SP
), OVECTOR(offset
));
7118 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(0), STR_PTR
, 0);
7119 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(1), SLJIT_IMM
, 0);
7120 /* Temporary release of STR_PTR. */
7121 OP2(SLJIT_ADD
, STACK_TOP
, 0, STACK_TOP
, 0, SLJIT_IMM
, sizeof(sljit_sw
));
7122 /* Handles both invalid and empty cases. Since the minimum repeat,
7123 is zero the invalid case is basically the same as an empty case. */
7125 zerolength
= CMP(SLJIT_EQUAL
, TMP1
, 0, SLJIT_MEM1(SLJIT_SP
), OVECTOR(offset
+ 1));
7128 compile_dnref_search(common
, ccbegin
, NULL
);
7129 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(TMP2
), 0);
7130 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), POSSESSIVE1
, TMP2
, 0);
7131 zerolength
= CMP(SLJIT_EQUAL
, TMP1
, 0, SLJIT_MEM1(TMP2
), sizeof(sljit_sw
));
7133 /* Restore if not zero length. */
7134 OP2(SLJIT_SUB
, STACK_TOP
, 0, STACK_TOP
, 0, SLJIT_IMM
, sizeof(sljit_sw
));
7138 allocate_stack(common
, 1);
7140 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(SLJIT_SP
), OVECTOR(offset
));
7141 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(0), SLJIT_IMM
, 0);
7144 add_jump(compiler
, &backtrack
->topbacktracks
, CMP(SLJIT_EQUAL
, TMP1
, 0, SLJIT_MEM1(SLJIT_SP
), OVECTOR(1)));
7145 zerolength
= CMP(SLJIT_EQUAL
, TMP1
, 0, SLJIT_MEM1(SLJIT_SP
), OVECTOR(offset
+ 1));
7149 compile_dnref_search(common
, ccbegin
, &backtrack
->topbacktracks
);
7150 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(TMP2
), 0);
7151 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), POSSESSIVE1
, TMP2
, 0);
7152 zerolength
= CMP(SLJIT_EQUAL
, TMP1
, 0, SLJIT_MEM1(TMP2
), sizeof(sljit_sw
));
7156 if (min
> 1 || max
> 1)
7157 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), POSSESSIVE0
, SLJIT_IMM
, 0);
7161 OP1(SLJIT_MOV
, TMP2
, 0, SLJIT_MEM1(SLJIT_SP
), POSSESSIVE1
);
7162 compile_ref_matchingpath(common
, ccbegin
, &backtrack
->topbacktracks
, FALSE
, FALSE
);
7164 if (min
> 1 || max
> 1)
7166 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(SLJIT_SP
), POSSESSIVE0
);
7167 OP2(SLJIT_ADD
, TMP1
, 0, TMP1
, 0, SLJIT_IMM
, 1);
7168 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), POSSESSIVE0
, TMP1
, 0);
7170 CMPTO(SLJIT_LESS
, TMP1
, 0, SLJIT_IMM
, min
, label
);
7173 jump
= CMP(SLJIT_GREATER_EQUAL
, TMP1
, 0, SLJIT_IMM
, max
);
7174 allocate_stack(common
, 1);
7175 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(0), STR_PTR
, 0);
7176 JUMPTO(SLJIT_JUMP
, label
);
7183 /* Includes min > 1 case as well. */
7184 allocate_stack(common
, 1);
7185 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(0), STR_PTR
, 0);
7186 JUMPTO(SLJIT_JUMP
, label
);
7189 JUMPHERE(zerolength
);
7190 BACKTRACK_AS(ref_iterator_backtrack
)->matchingpath
= LABEL();
7192 count_match(common
);
7196 allocate_stack(common
, ref
? 2 : 3);
7198 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(SLJIT_SP
), OVECTOR(offset
));
7199 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(0), SLJIT_IMM
, 0);
7200 if (type
!= OP_CRMINSTAR
)
7201 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(1), SLJIT_IMM
, 0);
7205 /* Handles both invalid and empty cases. Since the minimum repeat,
7206 is zero the invalid case is basically the same as an empty case. */
7208 zerolength
= CMP(SLJIT_EQUAL
, TMP1
, 0, SLJIT_MEM1(SLJIT_SP
), OVECTOR(offset
+ 1));
7211 compile_dnref_search(common
, ccbegin
, NULL
);
7212 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(TMP2
), 0);
7213 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(2), TMP2
, 0);
7214 zerolength
= CMP(SLJIT_EQUAL
, TMP1
, 0, SLJIT_MEM1(TMP2
), sizeof(sljit_sw
));
7216 /* Length is non-zero, we can match real repeats. */
7217 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(0), STR_PTR
, 0);
7218 jump
= JUMP(SLJIT_JUMP
);
7224 add_jump(compiler
, &backtrack
->topbacktracks
, CMP(SLJIT_EQUAL
, TMP1
, 0, SLJIT_MEM1(SLJIT_SP
), OVECTOR(1)));
7225 zerolength
= CMP(SLJIT_EQUAL
, TMP1
, 0, SLJIT_MEM1(SLJIT_SP
), OVECTOR(offset
+ 1));
7229 compile_dnref_search(common
, ccbegin
, &backtrack
->topbacktracks
);
7230 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(TMP2
), 0);
7231 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(2), TMP2
, 0);
7232 zerolength
= CMP(SLJIT_EQUAL
, TMP1
, 0, SLJIT_MEM1(TMP2
), sizeof(sljit_sw
));
7236 BACKTRACK_AS(ref_iterator_backtrack
)->matchingpath
= LABEL();
7238 add_jump(compiler
, &backtrack
->topbacktracks
, CMP(SLJIT_GREATER_EQUAL
, SLJIT_MEM1(STACK_TOP
), STACK(1), SLJIT_IMM
, max
));
7241 OP1(SLJIT_MOV
, TMP2
, 0, SLJIT_MEM1(STACK_TOP
), STACK(2));
7242 compile_ref_matchingpath(common
, ccbegin
, &backtrack
->topbacktracks
, TRUE
, TRUE
);
7243 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(0), STR_PTR
, 0);
7247 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(STACK_TOP
), STACK(1));
7248 OP2(SLJIT_ADD
, TMP1
, 0, TMP1
, 0, SLJIT_IMM
, 1);
7249 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(1), TMP1
, 0);
7250 CMPTO(SLJIT_LESS
, TMP1
, 0, SLJIT_IMM
, min
, BACKTRACK_AS(ref_iterator_backtrack
)->matchingpath
);
7253 OP2(SLJIT_ADD
, SLJIT_MEM1(STACK_TOP
), STACK(1), SLJIT_MEM1(STACK_TOP
), STACK(1), SLJIT_IMM
, 1);
7257 JUMPHERE(zerolength
);
7259 count_match(common
);
7263 static SLJIT_INLINE pcre_uchar
*compile_recurse_matchingpath(compiler_common
*common
, pcre_uchar
*cc
, backtrack_common
*parent
)
7266 backtrack_common
*backtrack
;
7267 recurse_entry
*entry
= common
->entries
;
7268 recurse_entry
*prev
= NULL
;
7269 sljit_sw start
= GET(cc
, 1);
7270 pcre_uchar
*start_cc
;
7271 BOOL needs_control_head
;
7273 PUSH_BACKTRACK(sizeof(recurse_backtrack
), cc
, NULL
);
7275 /* Inlining simple patterns. */
7276 if (get_framesize(common
, common
->start
+ start
, NULL
, TRUE
, &needs_control_head
) == no_stack
)
7278 start_cc
= common
->start
+ start
;
7279 compile_matchingpath(common
, next_opcode(common
, start_cc
), bracketend(start_cc
) - (1 + LINK_SIZE
), backtrack
);
7280 BACKTRACK_AS(recurse_backtrack
)->inlined_pattern
= TRUE
;
7281 return cc
+ 1 + LINK_SIZE
;
7284 while (entry
!= NULL
)
7286 if (entry
->start
== start
)
7289 entry
= entry
->next
;
7294 entry
= sljit_alloc_memory(compiler
, sizeof(recurse_entry
));
7295 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler
)))
7298 entry
->entry
= NULL
;
7299 entry
->calls
= NULL
;
7300 entry
->start
= start
;
7305 common
->entries
= entry
;
7308 if (common
->has_set_som
&& common
->mark_ptr
!= 0)
7310 OP1(SLJIT_MOV
, TMP2
, 0, SLJIT_MEM1(SLJIT_SP
), OVECTOR(0));
7311 allocate_stack(common
, 2);
7312 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(SLJIT_SP
), common
->mark_ptr
);
7313 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(0), TMP2
, 0);
7314 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(1), TMP1
, 0);
7316 else if (common
->has_set_som
|| common
->mark_ptr
!= 0)
7318 OP1(SLJIT_MOV
, TMP2
, 0, SLJIT_MEM1(SLJIT_SP
), common
->has_set_som
? (int)(OVECTOR(0)) : common
->mark_ptr
);
7319 allocate_stack(common
, 1);
7320 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(0), TMP2
, 0);
7323 if (entry
->entry
== NULL
)
7324 add_jump(compiler
, &entry
->calls
, JUMP(SLJIT_FAST_CALL
));
7326 JUMPTO(SLJIT_FAST_CALL
, entry
->entry
);
7327 /* Leave if the match is failed. */
7328 add_jump(compiler
, &backtrack
->topbacktracks
, CMP(SLJIT_EQUAL
, TMP1
, 0, SLJIT_IMM
, 0));
7329 return cc
+ 1 + LINK_SIZE
;
7332 static sljit_s32 SLJIT_FUNC
do_callout(struct jit_arguments
*arguments
, PUBL(callout_block
) *callout_block
, pcre_uchar
**jit_ovector
)
7334 const pcre_uchar
*begin
= arguments
->begin
;
7335 int *offset_vector
= arguments
->offsets
;
7336 int offset_count
= arguments
->offset_count
;
7339 if (PUBL(callout
) == NULL
)
7342 callout_block
->version
= 2;
7343 callout_block
->callout_data
= arguments
->callout_data
;
7345 /* Offsets in subject. */
7346 callout_block
->subject_length
= arguments
->end
- arguments
->begin
;
7347 callout_block
->start_match
= (pcre_uchar
*)callout_block
->subject
- arguments
->begin
;
7348 callout_block
->current_position
= (pcre_uchar
*)callout_block
->offset_vector
- arguments
->begin
;
7349 #if defined COMPILE_PCRE8
7350 callout_block
->subject
= (PCRE_SPTR
)begin
;
7351 #elif defined COMPILE_PCRE16
7352 callout_block
->subject
= (PCRE_SPTR16
)begin
;
7353 #elif defined COMPILE_PCRE32
7354 callout_block
->subject
= (PCRE_SPTR32
)begin
;
7357 /* Convert and copy the JIT offset vector to the offset_vector array. */
7358 callout_block
->capture_top
= 0;
7359 callout_block
->offset_vector
= offset_vector
;
7360 for (i
= 2; i
< offset_count
; i
+= 2)
7362 offset_vector
[i
] = jit_ovector
[i
] - begin
;
7363 offset_vector
[i
+ 1] = jit_ovector
[i
+ 1] - begin
;
7364 if (jit_ovector
[i
] >= begin
)
7365 callout_block
->capture_top
= i
;
7368 callout_block
->capture_top
= (callout_block
->capture_top
>> 1) + 1;
7369 if (offset_count
> 0)
7370 offset_vector
[0] = -1;
7371 if (offset_count
> 1)
7372 offset_vector
[1] = -1;
7373 return (*PUBL(callout
))(callout_block
);
7376 /* Aligning to 8 byte. */
7377 #define CALLOUT_ARG_SIZE \
7378 (((int)sizeof(PUBL(callout_block)) + 7) & ~7)
7380 #define CALLOUT_ARG_OFFSET(arg) \
7381 SLJIT_OFFSETOF(PUBL(callout_block), arg)
7383 static SLJIT_INLINE pcre_uchar
*compile_callout_matchingpath(compiler_common
*common
, pcre_uchar
*cc
, backtrack_common
*parent
)
7386 backtrack_common
*backtrack
;
7388 PUSH_BACKTRACK(sizeof(backtrack_common
), cc
, NULL
);
7390 allocate_stack(common
, CALLOUT_ARG_SIZE
/ sizeof(sljit_sw
));
7392 SLJIT_ASSERT(common
->capture_last_ptr
!= 0);
7393 OP1(SLJIT_MOV
, TMP2
, 0, SLJIT_MEM1(SLJIT_SP
), common
->capture_last_ptr
);
7394 OP1(SLJIT_MOV
, TMP1
, 0, ARGUMENTS
, 0);
7395 OP1(SLJIT_MOV_S32
, SLJIT_MEM1(STACK_TOP
), CALLOUT_ARG_OFFSET(callout_number
), SLJIT_IMM
, cc
[1]);
7396 OP1(SLJIT_MOV_S32
, SLJIT_MEM1(STACK_TOP
), CALLOUT_ARG_OFFSET(capture_last
), TMP2
, 0);
7398 /* These pointer sized fields temporarly stores internal variables. */
7399 OP1(SLJIT_MOV
, TMP2
, 0, SLJIT_MEM1(SLJIT_SP
), OVECTOR(0));
7400 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), CALLOUT_ARG_OFFSET(offset_vector
), STR_PTR
, 0);
7401 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), CALLOUT_ARG_OFFSET(subject
), TMP2
, 0);
7403 if (common
->mark_ptr
!= 0)
7404 OP1(SLJIT_MOV
, TMP2
, 0, SLJIT_MEM1(TMP1
), SLJIT_OFFSETOF(jit_arguments
, mark_ptr
));
7405 OP1(SLJIT_MOV_S32
, SLJIT_MEM1(STACK_TOP
), CALLOUT_ARG_OFFSET(pattern_position
), SLJIT_IMM
, GET(cc
, 2));
7406 OP1(SLJIT_MOV_S32
, SLJIT_MEM1(STACK_TOP
), CALLOUT_ARG_OFFSET(next_item_length
), SLJIT_IMM
, GET(cc
, 2 + LINK_SIZE
));
7407 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), CALLOUT_ARG_OFFSET(mark
), (common
->mark_ptr
!= 0) ? TMP2
: SLJIT_IMM
, 0);
7409 /* Needed to save important temporary registers. */
7410 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), LOCALS0
, STACK_TOP
, 0);
7411 /* SLJIT_R0 = arguments */
7412 OP1(SLJIT_MOV
, SLJIT_R1
, 0, STACK_TOP
, 0);
7413 GET_LOCAL_BASE(SLJIT_R2
, 0, OVECTOR_START
);
7414 sljit_emit_icall(compiler
, SLJIT_CALL
, SLJIT_RET(S32
) | SLJIT_ARG1(SW
) | SLJIT_ARG2(SW
) | SLJIT_ARG3(SW
), SLJIT_IMM
, SLJIT_FUNC_OFFSET(do_callout
));
7415 OP1(SLJIT_MOV
, STACK_TOP
, 0, SLJIT_MEM1(SLJIT_SP
), LOCALS0
);
7416 free_stack(common
, CALLOUT_ARG_SIZE
/ sizeof(sljit_sw
));
7418 /* Check return value. */
7419 OP2(SLJIT_SUB32
| SLJIT_SET_Z
| SLJIT_SET_SIG_GREATER
, SLJIT_UNUSED
, 0, SLJIT_RETURN_REG
, 0, SLJIT_IMM
, 0);
7420 add_jump(compiler
, &backtrack
->topbacktracks
, JUMP(SLJIT_SIG_GREATER32
));
7421 if (common
->forced_quit_label
== NULL
)
7422 add_jump(compiler
, &common
->forced_quit
, JUMP(SLJIT_NOT_EQUAL32
) /* SIG_LESS */);
7424 JUMPTO(SLJIT_NOT_EQUAL32
/* SIG_LESS */, common
->forced_quit_label
);
7425 return cc
+ 2 + 2 * LINK_SIZE
;
7428 #undef CALLOUT_ARG_SIZE
7429 #undef CALLOUT_ARG_OFFSET
7431 static SLJIT_INLINE BOOL
assert_needs_str_ptr_saving(pcre_uchar
*cc
)
7437 case OP_NOT_WORD_BOUNDARY
:
7438 case OP_WORD_BOUNDARY
:
7445 cc
+= PRIV(OP_lengths
)[*cc
];
7457 static pcre_uchar
*compile_assert_matchingpath(compiler_common
*common
, pcre_uchar
*cc
, assert_backtrack
*backtrack
, BOOL conditional
)
7462 BOOL needs_control_head
;
7463 int private_data_ptr
;
7464 backtrack_common altbacktrack
;
7465 pcre_uchar
*ccbegin
;
7467 pcre_uchar bra
= OP_BRA
;
7468 jump_list
*tmp
= NULL
;
7469 jump_list
**target
= (conditional
) ? &backtrack
->condfailed
: &backtrack
->common
.topbacktracks
;
7471 /* Saving previous accept variables. */
7472 BOOL save_local_exit
= common
->local_exit
;
7473 BOOL save_positive_assert
= common
->positive_assert
;
7474 then_trap_backtrack
*save_then_trap
= common
->then_trap
;
7475 struct sljit_label
*save_quit_label
= common
->quit_label
;
7476 struct sljit_label
*save_accept_label
= common
->accept_label
;
7477 jump_list
*save_quit
= common
->quit
;
7478 jump_list
*save_positive_assert_quit
= common
->positive_assert_quit
;
7479 jump_list
*save_accept
= common
->accept
;
7480 struct sljit_jump
*jump
;
7481 struct sljit_jump
*brajump
= NULL
;
7483 /* Assert captures then. */
7484 common
->then_trap
= NULL
;
7486 if (*cc
== OP_BRAZERO
|| *cc
== OP_BRAMINZERO
)
7488 SLJIT_ASSERT(!conditional
);
7492 private_data_ptr
= PRIVATE_DATA(cc
);
7493 SLJIT_ASSERT(private_data_ptr
!= 0);
7494 framesize
= get_framesize(common
, cc
, NULL
, FALSE
, &needs_control_head
);
7495 backtrack
->framesize
= framesize
;
7496 backtrack
->private_data_ptr
= private_data_ptr
;
7498 SLJIT_ASSERT(opcode
>= OP_ASSERT
&& opcode
<= OP_ASSERTBACK_NOT
);
7499 found
= (opcode
== OP_ASSERT
|| opcode
== OP_ASSERTBACK
) ? &tmp
: target
;
7503 if (bra
== OP_BRAMINZERO
)
7505 /* This is a braminzero backtrack path. */
7506 OP1(SLJIT_MOV
, STR_PTR
, 0, SLJIT_MEM1(STACK_TOP
), STACK(0));
7507 free_stack(common
, 1);
7508 brajump
= CMP(SLJIT_EQUAL
, STR_PTR
, 0, SLJIT_IMM
, 0);
7514 if (bra
== OP_BRA
&& !assert_needs_str_ptr_saving(ccbegin
+ 1 + LINK_SIZE
))
7517 if (needs_control_head
)
7520 if (framesize
== no_frame
)
7521 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), private_data_ptr
, STACK_TOP
, 0);
7524 allocate_stack(common
, extrasize
);
7526 if (needs_control_head
)
7527 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(SLJIT_SP
), common
->control_head_ptr
);
7530 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(0), STR_PTR
, 0);
7532 if (needs_control_head
)
7534 SLJIT_ASSERT(extrasize
== 2);
7535 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), common
->control_head_ptr
, SLJIT_IMM
, 0);
7536 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(1), TMP1
, 0);
7541 extrasize
= needs_control_head
? 3 : 2;
7542 allocate_stack(common
, framesize
+ extrasize
);
7544 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(SLJIT_SP
), private_data_ptr
);
7545 OP2(SLJIT_ADD
, TMP2
, 0, STACK_TOP
, 0, SLJIT_IMM
, (framesize
+ extrasize
) * sizeof(sljit_sw
));
7546 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), private_data_ptr
, TMP2
, 0);
7547 if (needs_control_head
)
7548 OP1(SLJIT_MOV
, TMP2
, 0, SLJIT_MEM1(SLJIT_SP
), common
->control_head_ptr
);
7549 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(0), STR_PTR
, 0);
7551 if (needs_control_head
)
7553 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(2), TMP1
, 0);
7554 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(1), TMP2
, 0);
7555 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), common
->control_head_ptr
, SLJIT_IMM
, 0);
7558 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(1), TMP1
, 0);
7560 init_frame(common
, ccbegin
, NULL
, framesize
+ extrasize
- 1, extrasize
, FALSE
);
7563 memset(&altbacktrack
, 0, sizeof(backtrack_common
));
7564 if (opcode
== OP_ASSERT_NOT
|| opcode
== OP_ASSERTBACK_NOT
)
7566 /* Negative assert is stronger than positive assert. */
7567 common
->local_exit
= TRUE
;
7568 common
->quit_label
= NULL
;
7569 common
->quit
= NULL
;
7570 common
->positive_assert
= FALSE
;
7573 common
->positive_assert
= TRUE
;
7574 common
->positive_assert_quit
= NULL
;
7578 common
->accept_label
= NULL
;
7579 common
->accept
= NULL
;
7580 altbacktrack
.top
= NULL
;
7581 altbacktrack
.topbacktracks
= NULL
;
7583 if (*ccbegin
== OP_ALT
&& extrasize
> 0)
7584 OP1(SLJIT_MOV
, STR_PTR
, 0, SLJIT_MEM1(STACK_TOP
), STACK(0));
7586 altbacktrack
.cc
= ccbegin
;
7587 compile_matchingpath(common
, ccbegin
+ 1 + LINK_SIZE
, cc
, &altbacktrack
);
7588 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler
)))
7590 if (opcode
== OP_ASSERT_NOT
|| opcode
== OP_ASSERTBACK_NOT
)
7592 common
->local_exit
= save_local_exit
;
7593 common
->quit_label
= save_quit_label
;
7594 common
->quit
= save_quit
;
7596 common
->positive_assert
= save_positive_assert
;
7597 common
->then_trap
= save_then_trap
;
7598 common
->accept_label
= save_accept_label
;
7599 common
->positive_assert_quit
= save_positive_assert_quit
;
7600 common
->accept
= save_accept
;
7603 common
->accept_label
= LABEL();
7604 if (common
->accept
!= NULL
)
7605 set_jumps(common
->accept
, common
->accept_label
);
7610 if (framesize
== no_frame
)
7611 OP1(SLJIT_MOV
, STACK_TOP
, 0, SLJIT_MEM1(SLJIT_SP
), private_data_ptr
);
7612 else if (extrasize
> 0)
7613 free_stack(common
, extrasize
);
7615 if (needs_control_head
)
7616 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), common
->control_head_ptr
, SLJIT_MEM1(STACK_TOP
), STACK(-1));
7620 if ((opcode
!= OP_ASSERT_NOT
&& opcode
!= OP_ASSERTBACK_NOT
) || conditional
)
7622 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
7623 OP2(SLJIT_SUB
, STACK_TOP
, 0, SLJIT_MEM1(SLJIT_SP
), private_data_ptr
, SLJIT_IMM
, (framesize
+ 1) * sizeof(sljit_sw
));
7624 if (needs_control_head
)
7625 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), common
->control_head_ptr
, SLJIT_MEM1(STACK_TOP
), STACK(-1));
7629 OP1(SLJIT_MOV
, STACK_TOP
, 0, SLJIT_MEM1(SLJIT_SP
), private_data_ptr
);
7630 if (needs_control_head
)
7631 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), common
->control_head_ptr
, SLJIT_MEM1(STACK_TOP
), STACK(-framesize
- 2));
7632 add_jump(compiler
, &common
->revertframes
, JUMP(SLJIT_FAST_CALL
));
7636 if (opcode
== OP_ASSERT_NOT
|| opcode
== OP_ASSERTBACK_NOT
)
7638 /* We know that STR_PTR was stored on the top of the stack. */
7642 OP1(SLJIT_MOV
, STR_PTR
, 0, SLJIT_MEM1(STACK_TOP
), needs_control_head
? STACK(-2) : STACK(-1));
7644 else if (bra
== OP_BRAZERO
)
7647 OP1(SLJIT_MOV
, STR_PTR
, 0, SLJIT_MEM1(STACK_TOP
), STACK(-extrasize
));
7650 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(STACK_TOP
), STACK(-framesize
- 1));
7651 OP1(SLJIT_MOV
, STR_PTR
, 0, SLJIT_MEM1(STACK_TOP
), STACK(-framesize
- extrasize
));
7652 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), private_data_ptr
, TMP1
, 0);
7654 OP2(SLJIT_SUB
, STACK_TOP
, 0, STACK_TOP
, 0, SLJIT_IMM
, sizeof(sljit_sw
));
7655 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(0), SLJIT_IMM
, 0);
7657 else if (framesize
>= 0)
7659 /* For OP_BRA and OP_BRAMINZERO. */
7660 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), private_data_ptr
, SLJIT_MEM1(STACK_TOP
), STACK(-framesize
- 1));
7663 add_jump(compiler
, found
, JUMP(SLJIT_JUMP
));
7665 compile_backtrackingpath(common
, altbacktrack
.top
);
7666 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler
)))
7668 if (opcode
== OP_ASSERT_NOT
|| opcode
== OP_ASSERTBACK_NOT
)
7670 common
->local_exit
= save_local_exit
;
7671 common
->quit_label
= save_quit_label
;
7672 common
->quit
= save_quit
;
7674 common
->positive_assert
= save_positive_assert
;
7675 common
->then_trap
= save_then_trap
;
7676 common
->accept_label
= save_accept_label
;
7677 common
->positive_assert_quit
= save_positive_assert_quit
;
7678 common
->accept
= save_accept
;
7681 set_jumps(altbacktrack
.topbacktracks
, LABEL());
7690 if (opcode
== OP_ASSERT_NOT
|| opcode
== OP_ASSERTBACK_NOT
)
7692 SLJIT_ASSERT(common
->positive_assert_quit
== NULL
);
7693 /* Makes the check less complicated below. */
7694 common
->positive_assert_quit
= common
->quit
;
7697 /* None of them matched. */
7698 if (common
->positive_assert_quit
!= NULL
)
7700 jump
= JUMP(SLJIT_JUMP
);
7701 set_jumps(common
->positive_assert_quit
, LABEL());
7702 SLJIT_ASSERT(framesize
!= no_stack
);
7704 OP2(SLJIT_SUB
, STACK_TOP
, 0, SLJIT_MEM1(SLJIT_SP
), private_data_ptr
, SLJIT_IMM
, extrasize
* sizeof(sljit_sw
));
7707 OP1(SLJIT_MOV
, STACK_TOP
, 0, SLJIT_MEM1(SLJIT_SP
), private_data_ptr
);
7708 add_jump(compiler
, &common
->revertframes
, JUMP(SLJIT_FAST_CALL
));
7709 OP2(SLJIT_SUB
, STACK_TOP
, 0, STACK_TOP
, 0, SLJIT_IMM
, (framesize
+ extrasize
) * sizeof(sljit_sw
));
7714 if (needs_control_head
)
7715 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), common
->control_head_ptr
, SLJIT_MEM1(STACK_TOP
), STACK(1));
7717 if (opcode
== OP_ASSERT
|| opcode
== OP_ASSERTBACK
)
7719 /* Assert is failed. */
7720 if ((conditional
&& extrasize
> 0) || bra
== OP_BRAZERO
)
7721 OP1(SLJIT_MOV
, STR_PTR
, 0, SLJIT_MEM1(STACK_TOP
), STACK(0));
7725 /* The topmost item should be 0. */
7726 if (bra
== OP_BRAZERO
)
7729 free_stack(common
, 1);
7730 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(0), SLJIT_IMM
, 0);
7732 else if (extrasize
> 0)
7733 free_stack(common
, extrasize
);
7737 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(STACK_TOP
), STACK(extrasize
- 1));
7738 /* The topmost item should be 0. */
7739 if (bra
== OP_BRAZERO
)
7741 free_stack(common
, framesize
+ extrasize
- 1);
7742 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(0), SLJIT_IMM
, 0);
7745 free_stack(common
, framesize
+ extrasize
);
7746 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), private_data_ptr
, TMP1
, 0);
7748 jump
= JUMP(SLJIT_JUMP
);
7749 if (bra
!= OP_BRAZERO
)
7750 add_jump(compiler
, target
, jump
);
7752 /* Assert is successful. */
7753 set_jumps(tmp
, LABEL());
7756 /* We know that STR_PTR was stored on the top of the stack. */
7758 OP1(SLJIT_MOV
, STR_PTR
, 0, SLJIT_MEM1(STACK_TOP
), STACK(-extrasize
));
7760 /* Keep the STR_PTR on the top of the stack. */
7761 if (bra
== OP_BRAZERO
)
7763 OP2(SLJIT_SUB
, STACK_TOP
, 0, STACK_TOP
, 0, SLJIT_IMM
, sizeof(sljit_sw
));
7765 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(0), STR_PTR
, 0);
7767 else if (bra
== OP_BRAMINZERO
)
7769 OP2(SLJIT_SUB
, STACK_TOP
, 0, STACK_TOP
, 0, SLJIT_IMM
, sizeof(sljit_sw
));
7770 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(0), SLJIT_IMM
, 0);
7777 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
7778 OP2(SLJIT_SUB
, STACK_TOP
, 0, SLJIT_MEM1(SLJIT_SP
), private_data_ptr
, SLJIT_IMM
, (framesize
+ 1) * sizeof(sljit_sw
));
7779 OP1(SLJIT_MOV
, STR_PTR
, 0, SLJIT_MEM1(STACK_TOP
), STACK(-extrasize
+ 1));
7783 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
7784 OP2(SLJIT_SUB
, STACK_TOP
, 0, SLJIT_MEM1(SLJIT_SP
), private_data_ptr
, SLJIT_IMM
, (framesize
+ 2) * sizeof(sljit_sw
));
7787 OP1(SLJIT_MOV
, STR_PTR
, 0, SLJIT_MEM1(STACK_TOP
), STACK(0));
7788 if (bra
== OP_BRAMINZERO
)
7789 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(0), SLJIT_IMM
, 0);
7793 OP1(SLJIT_MOV
, STR_PTR
, 0, SLJIT_MEM1(STACK_TOP
), 0);
7794 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(0), bra
== OP_BRAZERO
? STR_PTR
: SLJIT_IMM
, 0);
7799 if (bra
== OP_BRAZERO
)
7801 backtrack
->matchingpath
= LABEL();
7802 SET_LABEL(jump
, backtrack
->matchingpath
);
7804 else if (bra
== OP_BRAMINZERO
)
7806 JUMPTO(SLJIT_JUMP
, backtrack
->matchingpath
);
7810 OP1(SLJIT_MOV
, STACK_TOP
, 0, SLJIT_MEM1(SLJIT_SP
), private_data_ptr
);
7811 add_jump(compiler
, &common
->revertframes
, JUMP(SLJIT_FAST_CALL
));
7812 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), private_data_ptr
, SLJIT_MEM1(STACK_TOP
), STACK(-framesize
- 1));
7814 set_jumps(backtrack
->common
.topbacktracks
, LABEL());
7819 /* AssertNot is successful. */
7823 OP1(SLJIT_MOV
, STR_PTR
, 0, SLJIT_MEM1(STACK_TOP
), STACK(0));
7828 free_stack(common
, 1);
7829 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(0), SLJIT_IMM
, 0);
7831 else if (extrasize
> 0)
7832 free_stack(common
, extrasize
);
7836 OP1(SLJIT_MOV
, STR_PTR
, 0, SLJIT_MEM1(STACK_TOP
), STACK(0));
7837 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(STACK_TOP
), STACK(extrasize
- 1));
7838 /* The topmost item should be 0. */
7841 free_stack(common
, framesize
+ extrasize
- 1);
7842 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(0), SLJIT_IMM
, 0);
7845 free_stack(common
, framesize
+ extrasize
);
7846 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), private_data_ptr
, TMP1
, 0);
7849 if (bra
== OP_BRAZERO
)
7850 backtrack
->matchingpath
= LABEL();
7851 else if (bra
== OP_BRAMINZERO
)
7853 JUMPTO(SLJIT_JUMP
, backtrack
->matchingpath
);
7859 SLJIT_ASSERT(found
== &backtrack
->common
.topbacktracks
);
7860 set_jumps(backtrack
->common
.topbacktracks
, LABEL());
7861 backtrack
->common
.topbacktracks
= NULL
;
7865 if (opcode
== OP_ASSERT_NOT
|| opcode
== OP_ASSERTBACK_NOT
)
7867 common
->local_exit
= save_local_exit
;
7868 common
->quit_label
= save_quit_label
;
7869 common
->quit
= save_quit
;
7871 common
->positive_assert
= save_positive_assert
;
7872 common
->then_trap
= save_then_trap
;
7873 common
->accept_label
= save_accept_label
;
7874 common
->positive_assert_quit
= save_positive_assert_quit
;
7875 common
->accept
= save_accept
;
7876 return cc
+ 1 + LINK_SIZE
;
7879 static SLJIT_INLINE
void match_once_common(compiler_common
*common
, pcre_uchar ket
, int framesize
, int private_data_ptr
, BOOL has_alternatives
, BOOL needs_control_head
)
7886 if (framesize
== no_frame
)
7887 OP1(SLJIT_MOV
, STACK_TOP
, 0, SLJIT_MEM1(SLJIT_SP
), private_data_ptr
);
7890 stacksize
= needs_control_head
? 1 : 0;
7891 if (ket
!= OP_KET
|| has_alternatives
)
7895 free_stack(common
, stacksize
);
7898 if (needs_control_head
)
7899 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(STACK_TOP
), (ket
!= OP_KET
|| has_alternatives
) ? STACK(-2) : STACK(-1));
7901 /* TMP2 which is set here used by OP_KETRMAX below. */
7902 if (ket
== OP_KETRMAX
)
7903 OP1(SLJIT_MOV
, TMP2
, 0, SLJIT_MEM1(STACK_TOP
), STACK(-1));
7904 else if (ket
== OP_KETRMIN
)
7906 /* Move the STR_PTR to the private_data_ptr. */
7907 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), private_data_ptr
, SLJIT_MEM1(STACK_TOP
), STACK(-1));
7912 stacksize
= (ket
!= OP_KET
|| has_alternatives
) ? 2 : 1;
7913 OP2(SLJIT_SUB
, STACK_TOP
, 0, SLJIT_MEM1(SLJIT_SP
), private_data_ptr
, SLJIT_IMM
, (framesize
+ stacksize
) * sizeof(sljit_sw
));
7914 if (needs_control_head
)
7915 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(STACK_TOP
), STACK(-1));
7917 if (ket
== OP_KETRMAX
)
7919 /* TMP2 which is set here used by OP_KETRMAX below. */
7920 OP1(SLJIT_MOV
, TMP2
, 0, SLJIT_MEM1(STACK_TOP
), STACK(0));
7923 if (needs_control_head
)
7924 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), common
->control_head_ptr
, TMP1
, 0);
7927 static SLJIT_INLINE
int match_capture_common(compiler_common
*common
, int stacksize
, int offset
, int private_data_ptr
)
7931 if (common
->capture_last_ptr
!= 0)
7933 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(SLJIT_SP
), common
->capture_last_ptr
);
7934 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), common
->capture_last_ptr
, SLJIT_IMM
, offset
>> 1);
7935 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(stacksize
), TMP1
, 0);
7938 if (common
->optimized_cbracket
[offset
>> 1] == 0)
7940 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(SLJIT_SP
), OVECTOR(offset
));
7941 OP1(SLJIT_MOV
, TMP2
, 0, SLJIT_MEM1(SLJIT_SP
), OVECTOR(offset
+ 1));
7942 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(stacksize
), TMP1
, 0);
7943 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(SLJIT_SP
), private_data_ptr
);
7944 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(stacksize
+ 1), TMP2
, 0);
7945 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), OVECTOR(offset
+ 1), STR_PTR
, 0);
7946 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), OVECTOR(offset
), TMP1
, 0);
7953 Handling bracketed expressions is probably the most complex part.
7955 Stack layout naming characters:
7956 S - Push the current STR_PTR
7958 A - Push the current STR_PTR. Needed for restoring the STR_PTR
7959 before the next alternative. Not pushed if there are no alternatives.
7960 M - Any values pushed by the current alternative. Can be empty, or anything.
7961 C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
7962 L - Push the previous local (pointed by localptr) to the stack
7963 () - opional values stored on the stack
7964 ()* - optonal, can be stored multiple times
7966 The following list shows the regular expression templates, their PCRE byte codes
7967 and stack layout supported by pcre-sljit.
7969 (?:) OP_BRA | OP_KET A M
7970 () OP_CBRA | OP_KET C M
7971 (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
7972 OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
7973 (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
7974 OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
7975 ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
7976 OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
7977 ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
7978 OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
7979 (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
7980 (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
7981 ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
7982 ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
7983 (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
7984 OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
7985 (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
7986 OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
7987 ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
7988 OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
7989 ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
7990 OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
7993 Stack layout naming characters:
7994 A - Push the alternative index (starting from 0) on the stack.
7995 Not pushed if there is no alternatives.
7996 M - Any values pushed by the current alternative. Can be empty, or anything.
7998 The next list shows the possible content of a bracket:
7999 (|) OP_*BRA | OP_ALT ... M A
8000 (?()|) OP_*COND | OP_ALT M A
8001 (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
8002 (?>|) OP_ONCE_NC | OP_ALT ... [stack trace] M A
8003 Or nothing, if trace is unnecessary
8006 static pcre_uchar
*compile_bracket_matchingpath(compiler_common
*common
, pcre_uchar
*cc
, backtrack_common
*parent
)
8009 backtrack_common
*backtrack
;
8011 int private_data_ptr
= 0;
8014 int repeat_ptr
= 0, repeat_length
= 0;
8015 int repeat_type
= 0, repeat_count
= 0;
8016 pcre_uchar
*ccbegin
;
8017 pcre_uchar
*matchingpath
;
8019 pcre_uchar bra
= OP_BRA
;
8021 assert_backtrack
*assert;
8022 BOOL has_alternatives
;
8023 BOOL needs_control_head
= FALSE
;
8024 struct sljit_jump
*jump
;
8025 struct sljit_jump
*skip
;
8026 struct sljit_label
*rmax_label
= NULL
;
8027 struct sljit_jump
*braminzero
= NULL
;
8029 PUSH_BACKTRACK(sizeof(bracket_backtrack
), cc
, NULL
);
8031 if (*cc
== OP_BRAZERO
|| *cc
== OP_BRAMINZERO
)
8040 matchingpath
= bracketend(cc
) - 1 - LINK_SIZE
;
8041 ket
= *matchingpath
;
8042 if (ket
== OP_KET
&& PRIVATE_DATA(matchingpath
) != 0)
8044 repeat_ptr
= PRIVATE_DATA(matchingpath
);
8045 repeat_length
= PRIVATE_DATA(matchingpath
+ 1);
8046 repeat_type
= PRIVATE_DATA(matchingpath
+ 2);
8047 repeat_count
= PRIVATE_DATA(matchingpath
+ 3);
8048 SLJIT_ASSERT(repeat_length
!= 0 && repeat_type
!= 0 && repeat_count
!= 0);
8049 if (repeat_type
== OP_UPTO
)
8051 if (repeat_type
== OP_MINUPTO
)
8055 if ((opcode
== OP_COND
|| opcode
== OP_SCOND
) && cc
[1 + LINK_SIZE
] == OP_DEF
)
8057 /* Drop this bracket_backtrack. */
8058 parent
->top
= backtrack
->prev
;
8059 return matchingpath
+ 1 + LINK_SIZE
+ repeat_length
;
8062 matchingpath
= ccbegin
+ 1 + LINK_SIZE
;
8063 SLJIT_ASSERT(ket
== OP_KET
|| ket
== OP_KETRMAX
|| ket
== OP_KETRMIN
);
8064 SLJIT_ASSERT(!((bra
== OP_BRAZERO
&& ket
== OP_KETRMIN
) || (bra
== OP_BRAMINZERO
&& ket
== OP_KETRMAX
)));
8067 has_alternatives
= *cc
== OP_ALT
;
8068 if (SLJIT_UNLIKELY(opcode
== OP_COND
|| opcode
== OP_SCOND
))
8069 has_alternatives
= (*matchingpath
== OP_RREF
|| *matchingpath
== OP_DNRREF
|| *matchingpath
== OP_FAIL
) ? FALSE
: TRUE
;
8071 if (SLJIT_UNLIKELY(opcode
== OP_COND
) && (*cc
== OP_KETRMAX
|| *cc
== OP_KETRMIN
))
8073 if (SLJIT_UNLIKELY(opcode
== OP_ONCE_NC
))
8076 if (opcode
== OP_CBRA
|| opcode
== OP_SCBRA
)
8078 /* Capturing brackets has a pre-allocated space. */
8079 offset
= GET2(ccbegin
, 1 + LINK_SIZE
);
8080 if (common
->optimized_cbracket
[offset
] == 0)
8082 private_data_ptr
= OVECTOR_PRIV(offset
);
8088 private_data_ptr
= OVECTOR(offset
);
8090 BACKTRACK_AS(bracket_backtrack
)->private_data_ptr
= private_data_ptr
;
8091 matchingpath
+= IMM2_SIZE
;
8093 else if (opcode
== OP_ONCE
|| opcode
== OP_SBRA
|| opcode
== OP_SCOND
)
8095 /* Other brackets simply allocate the next entry. */
8096 private_data_ptr
= PRIVATE_DATA(ccbegin
);
8097 SLJIT_ASSERT(private_data_ptr
!= 0);
8098 BACKTRACK_AS(bracket_backtrack
)->private_data_ptr
= private_data_ptr
;
8099 if (opcode
== OP_ONCE
)
8100 BACKTRACK_AS(bracket_backtrack
)->u
.framesize
= get_framesize(common
, ccbegin
, NULL
, FALSE
, &needs_control_head
);
8103 /* Instructions before the first alternative. */
8105 if (ket
== OP_KETRMAX
|| (ket
== OP_KETRMIN
&& bra
!= OP_BRAMINZERO
))
8107 if (bra
== OP_BRAZERO
)
8111 allocate_stack(common
, stacksize
);
8114 if (ket
== OP_KETRMAX
|| (ket
== OP_KETRMIN
&& bra
!= OP_BRAMINZERO
))
8116 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(stacksize
), SLJIT_IMM
, 0);
8120 if (bra
== OP_BRAZERO
)
8121 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(stacksize
), STR_PTR
, 0);
8123 if (bra
== OP_BRAMINZERO
)
8125 /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
8126 OP1(SLJIT_MOV
, STR_PTR
, 0, SLJIT_MEM1(STACK_TOP
), STACK(0));
8127 if (ket
!= OP_KETRMIN
)
8129 free_stack(common
, 1);
8130 braminzero
= CMP(SLJIT_EQUAL
, STR_PTR
, 0, SLJIT_IMM
, 0);
8134 if (opcode
== OP_ONCE
|| opcode
>= OP_SBRA
)
8136 jump
= CMP(SLJIT_NOT_EQUAL
, STR_PTR
, 0, SLJIT_IMM
, 0);
8137 OP1(SLJIT_MOV
, STR_PTR
, 0, SLJIT_MEM1(STACK_TOP
), STACK(1));
8138 /* Nothing stored during the first run. */
8139 skip
= JUMP(SLJIT_JUMP
);
8141 /* Checking zero-length iteration. */
8142 if (opcode
!= OP_ONCE
|| BACKTRACK_AS(bracket_backtrack
)->u
.framesize
< 0)
8144 /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
8145 braminzero
= CMP(SLJIT_EQUAL
, STR_PTR
, 0, SLJIT_MEM1(SLJIT_SP
), private_data_ptr
);
8149 /* Except when the whole stack frame must be saved. */
8150 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(SLJIT_SP
), private_data_ptr
);
8151 braminzero
= CMP(SLJIT_EQUAL
, STR_PTR
, 0, SLJIT_MEM1(TMP1
), STACK(-BACKTRACK_AS(bracket_backtrack
)->u
.framesize
- 2));
8157 jump
= CMP(SLJIT_NOT_EQUAL
, STR_PTR
, 0, SLJIT_IMM
, 0);
8158 OP1(SLJIT_MOV
, STR_PTR
, 0, SLJIT_MEM1(STACK_TOP
), STACK(1));
8164 if (repeat_type
!= 0)
8166 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), repeat_ptr
, SLJIT_IMM
, repeat_count
);
8167 if (repeat_type
== OP_EXACT
)
8168 rmax_label
= LABEL();
8171 if (ket
== OP_KETRMIN
)
8172 BACKTRACK_AS(bracket_backtrack
)->recursive_matchingpath
= LABEL();
8174 if (ket
== OP_KETRMAX
)
8176 rmax_label
= LABEL();
8177 if (has_alternatives
&& opcode
!= OP_ONCE
&& opcode
< OP_SBRA
&& repeat_type
== 0)
8178 BACKTRACK_AS(bracket_backtrack
)->alternative_matchingpath
= rmax_label
;
8181 /* Handling capturing brackets and alternatives. */
8182 if (opcode
== OP_ONCE
)
8185 if (needs_control_head
)
8187 OP1(SLJIT_MOV
, TMP2
, 0, SLJIT_MEM1(SLJIT_SP
), common
->control_head_ptr
);
8191 if (BACKTRACK_AS(bracket_backtrack
)->u
.framesize
< 0)
8193 /* Neither capturing brackets nor recursions are found in the block. */
8194 if (ket
== OP_KETRMIN
)
8197 if (!needs_control_head
)
8198 OP1(SLJIT_MOV
, TMP2
, 0, SLJIT_MEM1(SLJIT_SP
), private_data_ptr
);
8202 if (BACKTRACK_AS(bracket_backtrack
)->u
.framesize
== no_frame
)
8203 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), private_data_ptr
, STACK_TOP
, 0);
8204 if (ket
== OP_KETRMAX
|| has_alternatives
)
8209 allocate_stack(common
, stacksize
);
8212 if (needs_control_head
)
8215 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(0), TMP2
, 0);
8218 if (ket
== OP_KETRMIN
)
8220 if (needs_control_head
)
8221 OP1(SLJIT_MOV
, TMP2
, 0, SLJIT_MEM1(SLJIT_SP
), private_data_ptr
);
8222 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(stacksize
), STR_PTR
, 0);
8223 if (BACKTRACK_AS(bracket_backtrack
)->u
.framesize
== no_frame
)
8224 OP2(SLJIT_ADD
, SLJIT_MEM1(SLJIT_SP
), private_data_ptr
, STACK_TOP
, 0, SLJIT_IMM
, needs_control_head
? (2 * sizeof(sljit_sw
)) : sizeof(sljit_sw
));
8225 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(stacksize
+ 1), TMP2
, 0);
8227 else if (ket
== OP_KETRMAX
|| has_alternatives
)
8228 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(stacksize
), STR_PTR
, 0);
8232 if (ket
!= OP_KET
|| has_alternatives
)
8235 stacksize
+= BACKTRACK_AS(bracket_backtrack
)->u
.framesize
+ 1;
8236 allocate_stack(common
, stacksize
);
8238 if (needs_control_head
)
8239 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(0), TMP2
, 0);
8241 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(SLJIT_SP
), private_data_ptr
);
8242 OP2(SLJIT_ADD
, TMP2
, 0, STACK_TOP
, 0, SLJIT_IMM
, stacksize
* sizeof(sljit_sw
));
8244 stacksize
= needs_control_head
? 1 : 0;
8245 if (ket
!= OP_KET
|| has_alternatives
)
8247 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(stacksize
), STR_PTR
, 0);
8248 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), private_data_ptr
, TMP2
, 0);
8250 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(stacksize
), TMP1
, 0);
8254 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), private_data_ptr
, TMP2
, 0);
8255 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(stacksize
), TMP1
, 0);
8257 init_frame(common
, ccbegin
, NULL
, BACKTRACK_AS(bracket_backtrack
)->u
.framesize
+ stacksize
, stacksize
+ 1, FALSE
);
8260 else if (opcode
== OP_CBRA
|| opcode
== OP_SCBRA
)
8262 /* Saving the previous values. */
8263 if (common
->optimized_cbracket
[offset
>> 1] != 0)
8265 SLJIT_ASSERT(private_data_ptr
== OVECTOR(offset
));
8266 allocate_stack(common
, 2);
8267 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(SLJIT_SP
), private_data_ptr
);
8268 OP1(SLJIT_MOV
, TMP2
, 0, SLJIT_MEM1(SLJIT_SP
), private_data_ptr
+ sizeof(sljit_sw
));
8269 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), private_data_ptr
, STR_PTR
, 0);
8270 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(0), TMP1
, 0);
8271 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(1), TMP2
, 0);
8275 OP1(SLJIT_MOV
, TMP2
, 0, SLJIT_MEM1(SLJIT_SP
), private_data_ptr
);
8276 allocate_stack(common
, 1);
8277 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), private_data_ptr
, STR_PTR
, 0);
8278 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(0), TMP2
, 0);
8281 else if (opcode
== OP_SBRA
|| opcode
== OP_SCOND
)
8283 /* Saving the previous value. */
8284 OP1(SLJIT_MOV
, TMP2
, 0, SLJIT_MEM1(SLJIT_SP
), private_data_ptr
);
8285 allocate_stack(common
, 1);
8286 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), private_data_ptr
, STR_PTR
, 0);
8287 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(0), TMP2
, 0);
8289 else if (has_alternatives
)
8291 /* Pushing the starting string pointer. */
8292 allocate_stack(common
, 1);
8293 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(0), STR_PTR
, 0);
8296 /* Generating code for the first alternative. */
8297 if (opcode
== OP_COND
|| opcode
== OP_SCOND
)
8299 if (*matchingpath
== OP_CREF
)
8301 SLJIT_ASSERT(has_alternatives
);
8302 add_jump(compiler
, &(BACKTRACK_AS(bracket_backtrack
)->u
.condfailed
),
8303 CMP(SLJIT_EQUAL
, SLJIT_MEM1(SLJIT_SP
), OVECTOR(GET2(matchingpath
, 1) << 1), SLJIT_MEM1(SLJIT_SP
), OVECTOR(1)));
8304 matchingpath
+= 1 + IMM2_SIZE
;
8306 else if (*matchingpath
== OP_DNCREF
)
8308 SLJIT_ASSERT(has_alternatives
);
8310 i
= GET2(matchingpath
, 1 + IMM2_SIZE
);
8311 slot
= common
->name_table
+ GET2(matchingpath
, 1) * common
->name_entry_size
;
8312 OP1(SLJIT_MOV
, TMP3
, 0, STR_PTR
, 0);
8313 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(SLJIT_SP
), OVECTOR(1));
8314 OP2(SLJIT_SUB
| SLJIT_SET_Z
, TMP2
, 0, SLJIT_MEM1(SLJIT_SP
), OVECTOR(GET2(slot
, 0) << 1), TMP1
, 0);
8315 slot
+= common
->name_entry_size
;
8319 OP2(SLJIT_SUB
, STR_PTR
, 0, SLJIT_MEM1(SLJIT_SP
), OVECTOR(GET2(slot
, 0) << 1), TMP1
, 0);
8320 OP2(SLJIT_OR
| SLJIT_SET_Z
, TMP2
, 0, TMP2
, 0, STR_PTR
, 0);
8321 slot
+= common
->name_entry_size
;
8323 OP1(SLJIT_MOV
, STR_PTR
, 0, TMP3
, 0);
8324 add_jump(compiler
, &(BACKTRACK_AS(bracket_backtrack
)->u
.condfailed
), JUMP(SLJIT_ZERO
));
8325 matchingpath
+= 1 + 2 * IMM2_SIZE
;
8327 else if (*matchingpath
== OP_RREF
|| *matchingpath
== OP_DNRREF
|| *matchingpath
== OP_FAIL
)
8329 /* Never has other case. */
8330 BACKTRACK_AS(bracket_backtrack
)->u
.condfailed
= NULL
;
8331 SLJIT_ASSERT(!has_alternatives
);
8333 if (*matchingpath
== OP_FAIL
)
8335 else if (*matchingpath
== OP_RREF
)
8337 stacksize
= GET2(matchingpath
, 1);
8338 if (common
->currententry
== NULL
)
8340 else if (stacksize
== RREF_ANY
)
8342 else if (common
->currententry
->start
== 0)
8343 stacksize
= stacksize
== 0;
8345 stacksize
= stacksize
== (int)GET2(common
->start
, common
->currententry
->start
+ 1 + LINK_SIZE
);
8348 matchingpath
+= 1 + IMM2_SIZE
;
8352 if (common
->currententry
== NULL
|| common
->currententry
->start
== 0)
8356 stacksize
= GET2(matchingpath
, 1 + IMM2_SIZE
);
8357 slot
= common
->name_table
+ GET2(matchingpath
, 1) * common
->name_entry_size
;
8358 i
= (int)GET2(common
->start
, common
->currententry
->start
+ 1 + LINK_SIZE
);
8359 while (stacksize
> 0)
8361 if ((int)GET2(slot
, 0) == i
)
8363 slot
+= common
->name_entry_size
;
8369 matchingpath
+= 1 + 2 * IMM2_SIZE
;
8372 /* The stacksize == 0 is a common "else" case. */
8377 matchingpath
= cc
+ 1 + LINK_SIZE
;
8386 SLJIT_ASSERT(has_alternatives
&& *matchingpath
>= OP_ASSERT
&& *matchingpath
<= OP_ASSERTBACK_NOT
);
8387 /* Similar code as PUSH_BACKTRACK macro. */
8388 assert = sljit_alloc_memory(compiler
, sizeof(assert_backtrack
));
8389 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler
)))
8391 memset(assert, 0, sizeof(assert_backtrack
));
8392 assert->common
.cc
= matchingpath
;
8393 BACKTRACK_AS(bracket_backtrack
)->u
.assert = assert;
8394 matchingpath
= compile_assert_matchingpath(common
, matchingpath
, assert, TRUE
);
8398 compile_matchingpath(common
, matchingpath
, cc
, backtrack
);
8399 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler
)))
8402 if (opcode
== OP_ONCE
)
8403 match_once_common(common
, ket
, BACKTRACK_AS(bracket_backtrack
)->u
.framesize
, private_data_ptr
, has_alternatives
, needs_control_head
);
8406 if (repeat_type
== OP_MINUPTO
)
8408 /* We need to preserve the counter. TMP2 will be used below. */
8409 OP1(SLJIT_MOV
, TMP2
, 0, SLJIT_MEM1(SLJIT_SP
), repeat_ptr
);
8412 if (ket
!= OP_KET
|| bra
!= OP_BRA
)
8416 if (common
->capture_last_ptr
!= 0)
8418 if (common
->optimized_cbracket
[offset
>> 1] == 0)
8421 if (has_alternatives
&& opcode
!= OP_ONCE
)
8425 allocate_stack(common
, stacksize
);
8428 if (repeat_type
== OP_MINUPTO
)
8430 /* TMP2 was set above. */
8431 OP2(SLJIT_SUB
, SLJIT_MEM1(STACK_TOP
), STACK(stacksize
), TMP2
, 0, SLJIT_IMM
, 1);
8435 if (ket
!= OP_KET
|| bra
!= OP_BRA
)
8438 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(stacksize
), STR_PTR
, 0);
8440 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(stacksize
), SLJIT_IMM
, 0);
8445 stacksize
= match_capture_common(common
, stacksize
, offset
, private_data_ptr
);
8447 if (has_alternatives
)
8449 if (opcode
!= OP_ONCE
)
8450 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(stacksize
), SLJIT_IMM
, 0);
8451 if (ket
!= OP_KETRMAX
)
8452 BACKTRACK_AS(bracket_backtrack
)->alternative_matchingpath
= LABEL();
8455 /* Must be after the matchingpath label. */
8456 if (offset
!= 0 && common
->optimized_cbracket
[offset
>> 1] != 0)
8458 SLJIT_ASSERT(private_data_ptr
== OVECTOR(offset
+ 0));
8459 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), OVECTOR(offset
+ 1), STR_PTR
, 0);
8462 if (ket
== OP_KETRMAX
)
8464 if (repeat_type
!= 0)
8466 if (has_alternatives
)
8467 BACKTRACK_AS(bracket_backtrack
)->alternative_matchingpath
= LABEL();
8468 OP2(SLJIT_SUB
| SLJIT_SET_Z
, SLJIT_MEM1(SLJIT_SP
), repeat_ptr
, SLJIT_MEM1(SLJIT_SP
), repeat_ptr
, SLJIT_IMM
, 1);
8469 JUMPTO(SLJIT_NOT_ZERO
, rmax_label
);
8470 /* Drop STR_PTR for greedy plus quantifier. */
8471 if (opcode
!= OP_ONCE
)
8472 free_stack(common
, 1);
8474 else if (opcode
== OP_ONCE
|| opcode
>= OP_SBRA
)
8476 if (has_alternatives
)
8477 BACKTRACK_AS(bracket_backtrack
)->alternative_matchingpath
= LABEL();
8478 /* Checking zero-length iteration. */
8479 if (opcode
!= OP_ONCE
)
8481 CMPTO(SLJIT_NOT_EQUAL
, SLJIT_MEM1(SLJIT_SP
), private_data_ptr
, STR_PTR
, 0, rmax_label
);
8482 /* Drop STR_PTR for greedy plus quantifier. */
8483 if (bra
!= OP_BRAZERO
)
8484 free_stack(common
, 1);
8487 /* TMP2 must contain the starting STR_PTR. */
8488 CMPTO(SLJIT_NOT_EQUAL
, TMP2
, 0, STR_PTR
, 0, rmax_label
);
8491 JUMPTO(SLJIT_JUMP
, rmax_label
);
8492 BACKTRACK_AS(bracket_backtrack
)->recursive_matchingpath
= LABEL();
8495 if (repeat_type
== OP_EXACT
)
8497 count_match(common
);
8498 OP2(SLJIT_SUB
| SLJIT_SET_Z
, SLJIT_MEM1(SLJIT_SP
), repeat_ptr
, SLJIT_MEM1(SLJIT_SP
), repeat_ptr
, SLJIT_IMM
, 1);
8499 JUMPTO(SLJIT_NOT_ZERO
, rmax_label
);
8501 else if (repeat_type
== OP_UPTO
)
8503 /* We need to preserve the counter. */
8504 OP1(SLJIT_MOV
, TMP2
, 0, SLJIT_MEM1(SLJIT_SP
), repeat_ptr
);
8505 allocate_stack(common
, 1);
8506 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(0), TMP2
, 0);
8509 if (bra
== OP_BRAZERO
)
8510 BACKTRACK_AS(bracket_backtrack
)->zero_matchingpath
= LABEL();
8512 if (bra
== OP_BRAMINZERO
)
8514 /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
8515 JUMPTO(SLJIT_JUMP
, ((braminzero_backtrack
*)parent
)->matchingpath
);
8516 if (braminzero
!= NULL
)
8518 JUMPHERE(braminzero
);
8519 /* We need to release the end pointer to perform the
8520 backtrack for the zero-length iteration. When
8521 framesize is < 0, OP_ONCE will do the release itself. */
8522 if (opcode
== OP_ONCE
&& BACKTRACK_AS(bracket_backtrack
)->u
.framesize
>= 0)
8524 OP1(SLJIT_MOV
, STACK_TOP
, 0, SLJIT_MEM1(SLJIT_SP
), private_data_ptr
);
8525 add_jump(compiler
, &common
->revertframes
, JUMP(SLJIT_FAST_CALL
));
8527 else if (ket
== OP_KETRMIN
&& opcode
!= OP_ONCE
)
8528 free_stack(common
, 1);
8530 /* Continue to the normal backtrack. */
8533 if ((ket
!= OP_KET
&& bra
!= OP_BRAMINZERO
) || bra
== OP_BRAZERO
)
8534 count_match(common
);
8536 /* Skip the other alternatives. */
8537 while (*cc
== OP_ALT
)
8539 cc
+= 1 + LINK_SIZE
;
8541 if (opcode
== OP_ONCE
)
8543 /* We temporarily encode the needs_control_head in the lowest bit.
8544 Note: on the target architectures of SLJIT the ((x << 1) >> 1) returns
8545 the same value for small signed numbers (including negative numbers). */
8546 BACKTRACK_AS(bracket_backtrack
)->u
.framesize
= ((unsigned int)BACKTRACK_AS(bracket_backtrack
)->u
.framesize
<< 1) | (needs_control_head
? 1 : 0);
8548 return cc
+ repeat_length
;
8551 static pcre_uchar
*compile_bracketpos_matchingpath(compiler_common
*common
, pcre_uchar
*cc
, backtrack_common
*parent
)
8554 backtrack_common
*backtrack
;
8556 int private_data_ptr
;
8557 int cbraprivptr
= 0;
8558 BOOL needs_control_head
;
8563 pcre_uchar
*ccbegin
= NULL
;
8564 int stack
; /* Also contains the offset of control head. */
8565 struct sljit_label
*loop
= NULL
;
8566 struct jump_list
*emptymatch
= NULL
;
8568 PUSH_BACKTRACK(sizeof(bracketpos_backtrack
), cc
, NULL
);
8569 if (*cc
== OP_BRAPOSZERO
)
8576 private_data_ptr
= PRIVATE_DATA(cc
);
8577 SLJIT_ASSERT(private_data_ptr
!= 0);
8578 BACKTRACK_AS(bracketpos_backtrack
)->private_data_ptr
= private_data_ptr
;
8583 ccbegin
= cc
+ 1 + LINK_SIZE
;
8588 offset
= GET2(cc
, 1 + LINK_SIZE
);
8589 /* This case cannot be optimized in the same was as
8590 normal capturing brackets. */
8591 SLJIT_ASSERT(common
->optimized_cbracket
[offset
] == 0);
8592 cbraprivptr
= OVECTOR_PRIV(offset
);
8594 ccbegin
= cc
+ 1 + LINK_SIZE
+ IMM2_SIZE
;
8598 SLJIT_UNREACHABLE();
8602 framesize
= get_framesize(common
, cc
, NULL
, FALSE
, &needs_control_head
);
8603 BACKTRACK_AS(bracketpos_backtrack
)->framesize
= framesize
;
8609 if (common
->capture_last_ptr
!= 0)
8615 if (needs_control_head
)
8620 BACKTRACK_AS(bracketpos_backtrack
)->stacksize
= stacksize
;
8621 allocate_stack(common
, stacksize
);
8622 if (framesize
== no_frame
)
8623 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), private_data_ptr
, STACK_TOP
, 0);
8629 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(SLJIT_SP
), OVECTOR(offset
));
8630 OP1(SLJIT_MOV
, TMP2
, 0, SLJIT_MEM1(SLJIT_SP
), OVECTOR(offset
+ 1));
8631 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(0), TMP1
, 0);
8632 if (common
->capture_last_ptr
!= 0)
8633 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(SLJIT_SP
), common
->capture_last_ptr
);
8634 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(1), TMP2
, 0);
8635 if (needs_control_head
)
8636 OP1(SLJIT_MOV
, TMP2
, 0, SLJIT_MEM1(SLJIT_SP
), common
->control_head_ptr
);
8637 if (common
->capture_last_ptr
!= 0)
8639 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(2), TMP1
, 0);
8645 if (needs_control_head
)
8646 OP1(SLJIT_MOV
, TMP2
, 0, SLJIT_MEM1(SLJIT_SP
), common
->control_head_ptr
);
8647 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(0), STR_PTR
, 0);
8651 if (needs_control_head
)
8654 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(stack
), SLJIT_IMM
, 1);
8655 if (needs_control_head
)
8658 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(stack
), TMP2
, 0);
8663 stacksize
= framesize
+ 1;
8666 if (needs_control_head
)
8670 BACKTRACK_AS(bracketpos_backtrack
)->stacksize
= stacksize
;
8672 allocate_stack(common
, stacksize
);
8673 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(SLJIT_SP
), private_data_ptr
);
8674 if (needs_control_head
)
8675 OP1(SLJIT_MOV
, TMP2
, 0, SLJIT_MEM1(SLJIT_SP
), common
->control_head_ptr
);
8676 OP2(SLJIT_ADD
, SLJIT_MEM1(SLJIT_SP
), private_data_ptr
, STACK_TOP
, 0, SLJIT_IMM
, stacksize
* sizeof(sljit_sw
));
8681 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(0), SLJIT_IMM
, 1);
8684 if (needs_control_head
)
8686 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(stack
), TMP2
, 0);
8691 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(stack
), STR_PTR
, 0);
8694 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(stack
), TMP1
, 0);
8695 init_frame(common
, cc
, NULL
, stacksize
- 1, stacksize
- framesize
, FALSE
);
8696 stack
-= 1 + (offset
== 0);
8700 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), cbraprivptr
, STR_PTR
, 0);
8703 while (*cc
!= OP_KETRPOS
)
8705 backtrack
->top
= NULL
;
8706 backtrack
->topbacktracks
= NULL
;
8709 compile_matchingpath(common
, ccbegin
, cc
, backtrack
);
8710 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler
)))
8715 if (framesize
== no_frame
)
8716 OP1(SLJIT_MOV
, STACK_TOP
, 0, SLJIT_MEM1(SLJIT_SP
), private_data_ptr
);
8720 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(SLJIT_SP
), cbraprivptr
);
8721 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), OVECTOR(offset
+ 1), STR_PTR
, 0);
8722 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), cbraprivptr
, STR_PTR
, 0);
8723 if (common
->capture_last_ptr
!= 0)
8724 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), common
->capture_last_ptr
, SLJIT_IMM
, offset
>> 1);
8725 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), OVECTOR(offset
), TMP1
, 0);
8729 if (opcode
== OP_SBRAPOS
)
8730 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(STACK_TOP
), STACK(0));
8731 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(0), STR_PTR
, 0);
8734 /* Even if the match is empty, we need to reset the control head. */
8735 if (needs_control_head
)
8736 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), common
->control_head_ptr
, SLJIT_MEM1(STACK_TOP
), STACK(stack
));
8738 if (opcode
== OP_SBRAPOS
|| opcode
== OP_SCBRAPOS
)
8739 add_jump(compiler
, &emptymatch
, CMP(SLJIT_EQUAL
, TMP1
, 0, STR_PTR
, 0));
8742 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(stacksize
- 1), SLJIT_IMM
, 0);
8748 OP2(SLJIT_SUB
, STACK_TOP
, 0, SLJIT_MEM1(SLJIT_SP
), private_data_ptr
, SLJIT_IMM
, stacksize
* sizeof(sljit_sw
));
8749 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(SLJIT_SP
), cbraprivptr
);
8750 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), OVECTOR(offset
+ 1), STR_PTR
, 0);
8751 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), cbraprivptr
, STR_PTR
, 0);
8752 if (common
->capture_last_ptr
!= 0)
8753 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), common
->capture_last_ptr
, SLJIT_IMM
, offset
>> 1);
8754 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), OVECTOR(offset
), TMP1
, 0);
8758 OP1(SLJIT_MOV
, TMP2
, 0, SLJIT_MEM1(SLJIT_SP
), private_data_ptr
);
8759 OP2(SLJIT_SUB
, STACK_TOP
, 0, TMP2
, 0, SLJIT_IMM
, stacksize
* sizeof(sljit_sw
));
8760 if (opcode
== OP_SBRAPOS
)
8761 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(TMP2
), STACK(-framesize
- 2));
8762 OP1(SLJIT_MOV
, SLJIT_MEM1(TMP2
), STACK(-framesize
- 2), STR_PTR
, 0);
8765 /* Even if the match is empty, we need to reset the control head. */
8766 if (needs_control_head
)
8767 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), common
->control_head_ptr
, SLJIT_MEM1(STACK_TOP
), STACK(stack
));
8769 if (opcode
== OP_SBRAPOS
|| opcode
== OP_SCBRAPOS
)
8770 add_jump(compiler
, &emptymatch
, CMP(SLJIT_EQUAL
, TMP1
, 0, STR_PTR
, 0));
8775 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(stacksize
- 1), SLJIT_IMM
, 0);
8777 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(0), SLJIT_IMM
, 0);
8781 JUMPTO(SLJIT_JUMP
, loop
);
8782 flush_stubs(common
);
8784 compile_backtrackingpath(common
, backtrack
->top
);
8785 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler
)))
8787 set_jumps(backtrack
->topbacktracks
, LABEL());
8792 OP1(SLJIT_MOV
, STR_PTR
, 0, SLJIT_MEM1(SLJIT_SP
), cbraprivptr
);
8794 OP1(SLJIT_MOV
, STR_PTR
, 0, SLJIT_MEM1(STACK_TOP
), STACK(0));
8800 /* Last alternative. */
8801 if (*cc
== OP_KETRPOS
)
8802 OP1(SLJIT_MOV
, TMP2
, 0, SLJIT_MEM1(SLJIT_SP
), private_data_ptr
);
8803 OP1(SLJIT_MOV
, STR_PTR
, 0, SLJIT_MEM1(SLJIT_SP
), cbraprivptr
);
8807 OP1(SLJIT_MOV
, TMP2
, 0, SLJIT_MEM1(SLJIT_SP
), private_data_ptr
);
8808 OP1(SLJIT_MOV
, STR_PTR
, 0, SLJIT_MEM1(TMP2
), STACK(-framesize
- 2));
8812 if (*cc
== OP_KETRPOS
)
8814 ccbegin
= cc
+ 1 + LINK_SIZE
;
8817 /* We don't have to restore the control head in case of a failed match. */
8819 backtrack
->topbacktracks
= NULL
;
8823 add_jump(compiler
, &backtrack
->topbacktracks
, CMP(SLJIT_NOT_EQUAL
, SLJIT_MEM1(STACK_TOP
), STACK(stacksize
- 1), SLJIT_IMM
, 0));
8824 else /* TMP2 is set to [private_data_ptr] above. */
8825 add_jump(compiler
, &backtrack
->topbacktracks
, CMP(SLJIT_NOT_EQUAL
, SLJIT_MEM1(TMP2
), STACK(-stacksize
), SLJIT_IMM
, 0));
8828 /* None of them matched. */
8829 set_jumps(emptymatch
, LABEL());
8830 count_match(common
);
8831 return cc
+ 1 + LINK_SIZE
;
8834 static SLJIT_INLINE pcre_uchar
*get_iterator_parameters(compiler_common
*common
, pcre_uchar
*cc
, pcre_uchar
*opcode
, pcre_uchar
*type
, sljit_u32
*max
, sljit_u32
*exact
, pcre_uchar
**end
)
8841 if (*opcode
>= OP_STAR
&& *opcode
<= OP_POSUPTO
)
8846 else if (*opcode
>= OP_STARI
&& *opcode
<= OP_POSUPTOI
)
8850 *opcode
-= OP_STARI
- OP_STAR
;
8852 else if (*opcode
>= OP_NOTSTAR
&& *opcode
<= OP_NOTPOSUPTO
)
8856 *opcode
-= OP_NOTSTAR
- OP_STAR
;
8858 else if (*opcode
>= OP_NOTSTARI
&& *opcode
<= OP_NOTPOSUPTOI
)
8862 *opcode
-= OP_NOTSTARI
- OP_STAR
;
8864 else if (*opcode
>= OP_TYPESTAR
&& *opcode
<= OP_TYPEPOSUPTO
)
8867 *opcode
-= OP_TYPESTAR
- OP_STAR
;
8872 SLJIT_ASSERT(*opcode
== OP_CLASS
|| *opcode
== OP_NCLASS
|| *opcode
== OP_XCLASS
);
8875 class_len
= (*type
< OP_XCLASS
) ? (int)(1 + (32 / sizeof(pcre_uchar
))) : GET(cc
, 0);
8876 *opcode
= cc
[class_len
- 1];
8878 if (*opcode
>= OP_CRSTAR
&& *opcode
<= OP_CRMINQUERY
)
8880 *opcode
-= OP_CRSTAR
- OP_STAR
;
8881 *end
= cc
+ class_len
;
8883 if (*opcode
== OP_PLUS
|| *opcode
== OP_MINPLUS
)
8886 *opcode
-= OP_PLUS
- OP_STAR
;
8889 else if (*opcode
>= OP_CRPOSSTAR
&& *opcode
<= OP_CRPOSQUERY
)
8891 *opcode
-= OP_CRPOSSTAR
- OP_POSSTAR
;
8892 *end
= cc
+ class_len
;
8894 if (*opcode
== OP_POSPLUS
)
8897 *opcode
= OP_POSSTAR
;
8902 SLJIT_ASSERT(*opcode
== OP_CRRANGE
|| *opcode
== OP_CRMINRANGE
|| *opcode
== OP_CRPOSRANGE
);
8903 *max
= GET2(cc
, (class_len
+ IMM2_SIZE
));
8904 *exact
= GET2(cc
, class_len
);
8908 if (*opcode
== OP_CRPOSRANGE
)
8909 *opcode
= OP_POSSTAR
;
8911 *opcode
-= OP_CRRANGE
- OP_STAR
;
8920 if (*opcode
== OP_CRPOSRANGE
)
8921 *opcode
= OP_POSQUERY
;
8923 *opcode
-= OP_CRRANGE
- OP_QUERY
;
8927 if (*opcode
== OP_CRPOSRANGE
)
8928 *opcode
= OP_POSUPTO
;
8930 *opcode
-= OP_CRRANGE
- OP_UPTO
;
8933 *end
= cc
+ class_len
+ 2 * IMM2_SIZE
;
8941 *exact
= GET2(cc
, 0);
8948 *opcode
-= OP_PLUS
- OP_STAR
;
8953 *opcode
= OP_POSSTAR
;
8964 if (*type
== OP_END
)
8967 *end
= next_opcode(common
, cc
);
8974 if (common
->utf
&& HAS_EXTRALEN(*cc
)) *end
+= GET_EXTRALEN(*cc
);
8979 static pcre_uchar
*compile_iterator_matchingpath(compiler_common
*common
, pcre_uchar
*cc
, backtrack_common
*parent
)
8982 backtrack_common
*backtrack
;
8985 sljit_u32 max
= 0, exact
;
8987 sljit_s32 fast_str_ptr
;
8988 BOOL charpos_enabled
;
8989 pcre_uchar charpos_char
;
8990 unsigned int charpos_othercasebit
;
8992 jump_list
*no_match
= NULL
;
8993 jump_list
*no_char1_match
= NULL
;
8994 struct sljit_jump
*jump
= NULL
;
8995 struct sljit_label
*label
;
8996 int private_data_ptr
= PRIVATE_DATA(cc
);
8997 int base
= (private_data_ptr
== 0) ? SLJIT_MEM1(STACK_TOP
) : SLJIT_MEM1(SLJIT_SP
);
8998 int offset0
= (private_data_ptr
== 0) ? STACK(0) : private_data_ptr
;
8999 int offset1
= (private_data_ptr
== 0) ? STACK(1) : private_data_ptr
+ (int)sizeof(sljit_sw
);
9000 int tmp_base
, tmp_offset
;
9002 PUSH_BACKTRACK(sizeof(char_iterator_backtrack
), cc
, NULL
);
9004 fast_str_ptr
= PRIVATE_DATA(cc
+ 1);
9007 SLJIT_ASSERT(common
->fast_forward_bc_ptr
== NULL
|| fast_str_ptr
== 0 || cc
== common
->fast_forward_bc_ptr
);
9009 if (cc
== common
->fast_forward_bc_ptr
)
9011 else if (common
->fast_fail_start_ptr
== 0)
9014 SLJIT_ASSERT(common
->fast_forward_bc_ptr
!= NULL
|| fast_str_ptr
== 0
9015 || (fast_str_ptr
>= common
->fast_fail_start_ptr
&& fast_str_ptr
<= common
->fast_fail_end_ptr
));
9017 cc
= get_iterator_parameters(common
, cc
, &opcode
, &type
, &max
, &exact
, &end
);
9019 if (type
!= OP_EXTUNI
)
9026 tmp_base
= SLJIT_MEM1(SLJIT_SP
);
9027 tmp_offset
= POSSESSIVE0
;
9030 if (fast_fail
&& fast_str_ptr
!= 0)
9031 add_jump(compiler
, &backtrack
->topbacktracks
, CMP(SLJIT_LESS_EQUAL
, STR_PTR
, 0, SLJIT_MEM1(SLJIT_SP
), fast_str_ptr
));
9033 /* Handle fixed part first. */
9036 SLJIT_ASSERT(fast_str_ptr
== 0);
9037 if (common
->mode
== JIT_COMPILE
9041 && type
!= OP_ANYNL
&& type
!= OP_EXTUNI
)
9043 OP2(SLJIT_ADD
, TMP1
, 0, STR_PTR
, 0, SLJIT_IMM
, IN_UCHARS(exact
));
9044 add_jump(compiler
, &backtrack
->topbacktracks
, CMP(SLJIT_GREATER
, TMP1
, 0, STR_END
, 0));
9045 OP1(SLJIT_MOV
, tmp_base
, tmp_offset
, SLJIT_IMM
, exact
);
9047 compile_char1_matchingpath(common
, type
, cc
, &backtrack
->topbacktracks
, FALSE
);
9048 OP2(SLJIT_SUB
| SLJIT_SET_Z
, tmp_base
, tmp_offset
, tmp_base
, tmp_offset
, SLJIT_IMM
, 1);
9049 JUMPTO(SLJIT_NOT_ZERO
, label
);
9053 OP1(SLJIT_MOV
, tmp_base
, tmp_offset
, SLJIT_IMM
, exact
);
9055 compile_char1_matchingpath(common
, type
, cc
, &backtrack
->topbacktracks
, TRUE
);
9056 OP2(SLJIT_SUB
| SLJIT_SET_Z
, tmp_base
, tmp_offset
, tmp_base
, tmp_offset
, SLJIT_IMM
, 1);
9057 JUMPTO(SLJIT_NOT_ZERO
, label
);
9060 else if (exact
== 1)
9061 compile_char1_matchingpath(common
, type
, cc
, &backtrack
->topbacktracks
, TRUE
);
9067 SLJIT_ASSERT(fast_str_ptr
== 0 || opcode
== OP_STAR
);
9069 if (type
== OP_ANYNL
|| type
== OP_EXTUNI
)
9071 SLJIT_ASSERT(private_data_ptr
== 0);
9072 SLJIT_ASSERT(fast_str_ptr
== 0);
9074 allocate_stack(common
, 2);
9075 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(0), STR_PTR
, 0);
9076 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(1), SLJIT_IMM
, 0);
9078 if (opcode
== OP_UPTO
)
9079 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), POSSESSIVE0
, SLJIT_IMM
, max
);
9082 compile_char1_matchingpath(common
, type
, cc
, &BACKTRACK_AS(char_iterator_backtrack
)->u
.backtracks
, TRUE
);
9083 if (opcode
== OP_UPTO
)
9085 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(SLJIT_SP
), POSSESSIVE0
);
9086 OP2(SLJIT_SUB
| SLJIT_SET_Z
, TMP1
, 0, TMP1
, 0, SLJIT_IMM
, 1);
9087 jump
= JUMP(SLJIT_ZERO
);
9088 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), POSSESSIVE0
, TMP1
, 0);
9091 /* We cannot use TMP3 because of this allocate_stack. */
9092 allocate_stack(common
, 1);
9093 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(0), STR_PTR
, 0);
9094 JUMPTO(SLJIT_JUMP
, label
);
9100 charpos_enabled
= FALSE
;
9102 charpos_othercasebit
= 0;
9104 if ((type
!= OP_CHAR
&& type
!= OP_CHARI
) && (*end
== OP_CHAR
|| *end
== OP_CHARI
))
9106 charpos_enabled
= TRUE
;
9108 charpos_enabled
= !common
->utf
|| !HAS_EXTRALEN(end
[1]);
9110 if (charpos_enabled
&& *end
== OP_CHARI
&& char_has_othercase(common
, end
+ 1))
9112 charpos_othercasebit
= char_get_othercase_bit(common
, end
+ 1);
9113 if (charpos_othercasebit
== 0)
9114 charpos_enabled
= FALSE
;
9117 if (charpos_enabled
)
9119 charpos_char
= end
[1];
9120 /* Consumpe the OP_CHAR opcode. */
9122 #if defined COMPILE_PCRE8
9123 SLJIT_ASSERT((charpos_othercasebit
>> 8) == 0);
9124 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
9125 SLJIT_ASSERT((charpos_othercasebit
>> 9) == 0);
9126 if ((charpos_othercasebit
& 0x100) != 0)
9127 charpos_othercasebit
= (charpos_othercasebit
& 0xff) << 8;
9129 if (charpos_othercasebit
!= 0)
9130 charpos_char
|= charpos_othercasebit
;
9132 BACKTRACK_AS(char_iterator_backtrack
)->u
.charpos
.enabled
= TRUE
;
9133 BACKTRACK_AS(char_iterator_backtrack
)->u
.charpos
.chr
= charpos_char
;
9134 BACKTRACK_AS(char_iterator_backtrack
)->u
.charpos
.othercasebit
= charpos_othercasebit
;
9138 if (charpos_enabled
)
9140 if (opcode
== OP_UPTO
)
9141 OP1(SLJIT_MOV
, tmp_base
, tmp_offset
, SLJIT_IMM
, max
+ 1);
9143 /* Search the first instance of charpos_char. */
9144 jump
= JUMP(SLJIT_JUMP
);
9146 if (opcode
== OP_UPTO
)
9148 OP2(SLJIT_SUB
| SLJIT_SET_Z
, tmp_base
, tmp_offset
, tmp_base
, tmp_offset
, SLJIT_IMM
, 1);
9149 add_jump(compiler
, &backtrack
->topbacktracks
, JUMP(SLJIT_ZERO
));
9151 compile_char1_matchingpath(common
, type
, cc
, &backtrack
->topbacktracks
, FALSE
);
9152 if (fast_str_ptr
!= 0)
9153 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), fast_str_ptr
, STR_PTR
, 0);
9156 detect_partial_match(common
, &backtrack
->topbacktracks
);
9157 OP1(MOV_UCHAR
, TMP1
, 0, SLJIT_MEM1(STR_PTR
), IN_UCHARS(0));
9158 if (charpos_othercasebit
!= 0)
9159 OP2(SLJIT_OR
, TMP1
, 0, TMP1
, 0, SLJIT_IMM
, charpos_othercasebit
);
9160 CMPTO(SLJIT_NOT_EQUAL
, TMP1
, 0, SLJIT_IMM
, charpos_char
, label
);
9162 if (private_data_ptr
== 0)
9163 allocate_stack(common
, 2);
9164 OP1(SLJIT_MOV
, base
, offset0
, STR_PTR
, 0);
9165 OP1(SLJIT_MOV
, base
, offset1
, STR_PTR
, 0);
9166 if (opcode
== OP_UPTO
)
9168 OP2(SLJIT_SUB
| SLJIT_SET_Z
, tmp_base
, tmp_offset
, tmp_base
, tmp_offset
, SLJIT_IMM
, 1);
9169 add_jump(compiler
, &no_match
, JUMP(SLJIT_ZERO
));
9172 /* Search the last instance of charpos_char. */
9174 compile_char1_matchingpath(common
, type
, cc
, &no_match
, FALSE
);
9175 if (fast_str_ptr
!= 0)
9176 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), fast_str_ptr
, STR_PTR
, 0);
9177 detect_partial_match(common
, &no_match
);
9178 OP1(MOV_UCHAR
, TMP1
, 0, SLJIT_MEM1(STR_PTR
), IN_UCHARS(0));
9179 if (charpos_othercasebit
!= 0)
9180 OP2(SLJIT_OR
, TMP1
, 0, TMP1
, 0, SLJIT_IMM
, charpos_othercasebit
);
9181 if (opcode
== OP_STAR
)
9183 CMPTO(SLJIT_NOT_EQUAL
, TMP1
, 0, SLJIT_IMM
, charpos_char
, label
);
9184 OP1(SLJIT_MOV
, base
, offset0
, STR_PTR
, 0);
9188 jump
= CMP(SLJIT_NOT_EQUAL
, TMP1
, 0, SLJIT_IMM
, charpos_char
);
9189 OP1(SLJIT_MOV
, base
, offset0
, STR_PTR
, 0);
9193 if (opcode
== OP_UPTO
)
9195 OP2(SLJIT_SUB
| SLJIT_SET_Z
, tmp_base
, tmp_offset
, tmp_base
, tmp_offset
, SLJIT_IMM
, 1);
9196 JUMPTO(SLJIT_NOT_ZERO
, label
);
9199 JUMPTO(SLJIT_JUMP
, label
);
9201 set_jumps(no_match
, LABEL());
9202 OP1(SLJIT_MOV
, STR_PTR
, 0, base
, offset0
);
9203 OP2(SLJIT_ADD
, STR_PTR
, 0, STR_PTR
, 0, SLJIT_IMM
, IN_UCHARS(1));
9204 OP1(SLJIT_MOV
, base
, offset0
, STR_PTR
, 0);
9206 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
9207 else if (common
->utf
)
9209 if (private_data_ptr
== 0)
9210 allocate_stack(common
, 2);
9212 OP1(SLJIT_MOV
, base
, offset0
, STR_PTR
, 0);
9213 OP1(SLJIT_MOV
, base
, offset1
, STR_PTR
, 0);
9215 if (opcode
== OP_UPTO
)
9216 OP1(SLJIT_MOV
, tmp_base
, tmp_offset
, SLJIT_IMM
, max
);
9219 compile_char1_matchingpath(common
, type
, cc
, &no_match
, TRUE
);
9220 OP1(SLJIT_MOV
, base
, offset0
, STR_PTR
, 0);
9222 if (opcode
== OP_UPTO
)
9224 OP2(SLJIT_SUB
| SLJIT_SET_Z
, tmp_base
, tmp_offset
, tmp_base
, tmp_offset
, SLJIT_IMM
, 1);
9225 JUMPTO(SLJIT_NOT_ZERO
, label
);
9228 JUMPTO(SLJIT_JUMP
, label
);
9230 set_jumps(no_match
, LABEL());
9231 OP1(SLJIT_MOV
, STR_PTR
, 0, base
, offset0
);
9232 if (fast_str_ptr
!= 0)
9233 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), fast_str_ptr
, STR_PTR
, 0);
9238 if (private_data_ptr
== 0)
9239 allocate_stack(common
, 2);
9241 OP1(SLJIT_MOV
, base
, offset1
, STR_PTR
, 0);
9242 if (opcode
== OP_UPTO
)
9243 OP1(SLJIT_MOV
, tmp_base
, tmp_offset
, SLJIT_IMM
, max
);
9246 detect_partial_match(common
, &no_match
);
9247 compile_char1_matchingpath(common
, type
, cc
, &no_char1_match
, FALSE
);
9248 if (opcode
== OP_UPTO
)
9250 OP2(SLJIT_SUB
| SLJIT_SET_Z
, tmp_base
, tmp_offset
, tmp_base
, tmp_offset
, SLJIT_IMM
, 1);
9251 JUMPTO(SLJIT_NOT_ZERO
, label
);
9252 OP2(SLJIT_ADD
, STR_PTR
, 0, STR_PTR
, 0, SLJIT_IMM
, IN_UCHARS(1));
9255 JUMPTO(SLJIT_JUMP
, label
);
9257 set_jumps(no_char1_match
, LABEL());
9258 OP2(SLJIT_SUB
, STR_PTR
, 0, STR_PTR
, 0, SLJIT_IMM
, IN_UCHARS(1));
9259 set_jumps(no_match
, LABEL());
9260 OP1(SLJIT_MOV
, base
, offset0
, STR_PTR
, 0);
9261 if (fast_str_ptr
!= 0)
9262 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), fast_str_ptr
, STR_PTR
, 0);
9265 BACKTRACK_AS(char_iterator_backtrack
)->matchingpath
= LABEL();
9269 if (private_data_ptr
== 0)
9270 allocate_stack(common
, 1);
9271 OP1(SLJIT_MOV
, base
, offset0
, STR_PTR
, 0);
9272 BACKTRACK_AS(char_iterator_backtrack
)->matchingpath
= LABEL();
9273 if (fast_str_ptr
!= 0)
9274 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), fast_str_ptr
, STR_PTR
, 0);
9278 SLJIT_ASSERT(fast_str_ptr
== 0);
9279 if (private_data_ptr
== 0)
9280 allocate_stack(common
, 2);
9281 OP1(SLJIT_MOV
, base
, offset0
, STR_PTR
, 0);
9282 OP1(SLJIT_MOV
, base
, offset1
, SLJIT_IMM
, max
+ 1);
9283 BACKTRACK_AS(char_iterator_backtrack
)->matchingpath
= LABEL();
9288 SLJIT_ASSERT(fast_str_ptr
== 0);
9289 if (private_data_ptr
== 0)
9290 allocate_stack(common
, 1);
9291 OP1(SLJIT_MOV
, base
, offset0
, STR_PTR
, 0);
9292 if (opcode
== OP_QUERY
)
9293 compile_char1_matchingpath(common
, type
, cc
, &BACKTRACK_AS(char_iterator_backtrack
)->u
.backtracks
, TRUE
);
9294 BACKTRACK_AS(char_iterator_backtrack
)->matchingpath
= LABEL();
9301 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
9304 OP1(SLJIT_MOV
, tmp_base
, tmp_offset
, STR_PTR
, 0);
9306 compile_char1_matchingpath(common
, type
, cc
, &no_match
, TRUE
);
9307 OP1(SLJIT_MOV
, tmp_base
, tmp_offset
, STR_PTR
, 0);
9308 JUMPTO(SLJIT_JUMP
, label
);
9309 set_jumps(no_match
, LABEL());
9310 OP1(SLJIT_MOV
, STR_PTR
, 0, tmp_base
, tmp_offset
);
9311 if (fast_str_ptr
!= 0)
9312 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), fast_str_ptr
, STR_PTR
, 0);
9317 detect_partial_match(common
, &no_match
);
9318 compile_char1_matchingpath(common
, type
, cc
, &no_char1_match
, FALSE
);
9319 JUMPTO(SLJIT_JUMP
, label
);
9320 set_jumps(no_char1_match
, LABEL());
9321 OP2(SLJIT_SUB
, STR_PTR
, 0, STR_PTR
, 0, SLJIT_IMM
, IN_UCHARS(1));
9322 set_jumps(no_match
, LABEL());
9323 if (fast_str_ptr
!= 0)
9324 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), fast_str_ptr
, STR_PTR
, 0);
9328 SLJIT_ASSERT(fast_str_ptr
== 0);
9329 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
9332 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), POSSESSIVE1
, STR_PTR
, 0);
9333 OP1(SLJIT_MOV
, tmp_base
, tmp_offset
, SLJIT_IMM
, max
);
9335 compile_char1_matchingpath(common
, type
, cc
, &no_match
, TRUE
);
9336 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), POSSESSIVE1
, STR_PTR
, 0);
9337 OP2(SLJIT_SUB
| SLJIT_SET_Z
, tmp_base
, tmp_offset
, tmp_base
, tmp_offset
, SLJIT_IMM
, 1);
9338 JUMPTO(SLJIT_NOT_ZERO
, label
);
9339 set_jumps(no_match
, LABEL());
9340 OP1(SLJIT_MOV
, STR_PTR
, 0, SLJIT_MEM1(SLJIT_SP
), POSSESSIVE1
);
9344 OP1(SLJIT_MOV
, tmp_base
, tmp_offset
, SLJIT_IMM
, max
);
9346 detect_partial_match(common
, &no_match
);
9347 compile_char1_matchingpath(common
, type
, cc
, &no_char1_match
, FALSE
);
9348 OP2(SLJIT_SUB
| SLJIT_SET_Z
, tmp_base
, tmp_offset
, tmp_base
, tmp_offset
, SLJIT_IMM
, 1);
9349 JUMPTO(SLJIT_NOT_ZERO
, label
);
9350 OP2(SLJIT_ADD
, STR_PTR
, 0, STR_PTR
, 0, SLJIT_IMM
, IN_UCHARS(1));
9351 set_jumps(no_char1_match
, LABEL());
9352 OP2(SLJIT_SUB
, STR_PTR
, 0, STR_PTR
, 0, SLJIT_IMM
, IN_UCHARS(1));
9353 set_jumps(no_match
, LABEL());
9357 SLJIT_ASSERT(fast_str_ptr
== 0);
9358 OP1(SLJIT_MOV
, tmp_base
, tmp_offset
, STR_PTR
, 0);
9359 compile_char1_matchingpath(common
, type
, cc
, &no_match
, TRUE
);
9360 OP1(SLJIT_MOV
, tmp_base
, tmp_offset
, STR_PTR
, 0);
9361 set_jumps(no_match
, LABEL());
9362 OP1(SLJIT_MOV
, STR_PTR
, 0, tmp_base
, tmp_offset
);
9366 SLJIT_UNREACHABLE();
9370 count_match(common
);
9374 static SLJIT_INLINE pcre_uchar
*compile_fail_accept_matchingpath(compiler_common
*common
, pcre_uchar
*cc
, backtrack_common
*parent
)
9377 backtrack_common
*backtrack
;
9379 PUSH_BACKTRACK(sizeof(backtrack_common
), cc
, NULL
);
9383 add_jump(compiler
, &backtrack
->topbacktracks
, JUMP(SLJIT_JUMP
));
9387 if (*cc
== OP_ASSERT_ACCEPT
|| common
->currententry
!= NULL
|| !common
->might_be_empty
)
9389 /* No need to check notempty conditions. */
9390 if (common
->accept_label
== NULL
)
9391 add_jump(compiler
, &common
->accept
, JUMP(SLJIT_JUMP
));
9393 JUMPTO(SLJIT_JUMP
, common
->accept_label
);
9397 if (common
->accept_label
== NULL
)
9398 add_jump(compiler
, &common
->accept
, CMP(SLJIT_NOT_EQUAL
, STR_PTR
, 0, SLJIT_MEM1(SLJIT_SP
), OVECTOR(0)));
9400 CMPTO(SLJIT_NOT_EQUAL
, STR_PTR
, 0, SLJIT_MEM1(SLJIT_SP
), OVECTOR(0), common
->accept_label
);
9401 OP1(SLJIT_MOV
, TMP1
, 0, ARGUMENTS
, 0);
9402 OP1(SLJIT_MOV_U8
, TMP2
, 0, SLJIT_MEM1(TMP1
), SLJIT_OFFSETOF(jit_arguments
, notempty
));
9403 add_jump(compiler
, &backtrack
->topbacktracks
, CMP(SLJIT_NOT_EQUAL
, TMP2
, 0, SLJIT_IMM
, 0));
9404 OP1(SLJIT_MOV_U8
, TMP2
, 0, SLJIT_MEM1(TMP1
), SLJIT_OFFSETOF(jit_arguments
, notempty_atstart
));
9405 if (common
->accept_label
== NULL
)
9406 add_jump(compiler
, &common
->accept
, CMP(SLJIT_EQUAL
, TMP2
, 0, SLJIT_IMM
, 0));
9408 CMPTO(SLJIT_EQUAL
, TMP2
, 0, SLJIT_IMM
, 0, common
->accept_label
);
9409 OP1(SLJIT_MOV
, TMP2
, 0, SLJIT_MEM1(TMP1
), SLJIT_OFFSETOF(jit_arguments
, str
));
9410 if (common
->accept_label
== NULL
)
9411 add_jump(compiler
, &common
->accept
, CMP(SLJIT_NOT_EQUAL
, TMP2
, 0, STR_PTR
, 0));
9413 CMPTO(SLJIT_NOT_EQUAL
, TMP2
, 0, STR_PTR
, 0, common
->accept_label
);
9414 add_jump(compiler
, &backtrack
->topbacktracks
, JUMP(SLJIT_JUMP
));
9418 static SLJIT_INLINE pcre_uchar
*compile_close_matchingpath(compiler_common
*common
, pcre_uchar
*cc
)
9421 int offset
= GET2(cc
, 1);
9422 BOOL optimized_cbracket
= common
->optimized_cbracket
[offset
] != 0;
9424 /* Data will be discarded anyway... */
9425 if (common
->currententry
!= NULL
)
9426 return cc
+ 1 + IMM2_SIZE
;
9428 if (!optimized_cbracket
)
9429 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(SLJIT_SP
), OVECTOR_PRIV(offset
));
9431 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), OVECTOR(offset
+ 1), STR_PTR
, 0);
9432 if (!optimized_cbracket
)
9433 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), OVECTOR(offset
), TMP1
, 0);
9434 return cc
+ 1 + IMM2_SIZE
;
9437 static SLJIT_INLINE pcre_uchar
*compile_control_verb_matchingpath(compiler_common
*common
, pcre_uchar
*cc
, backtrack_common
*parent
)
9440 backtrack_common
*backtrack
;
9441 pcre_uchar opcode
= *cc
;
9442 pcre_uchar
*ccend
= cc
+ 1;
9444 if (opcode
== OP_PRUNE_ARG
|| opcode
== OP_SKIP_ARG
|| opcode
== OP_THEN_ARG
)
9447 PUSH_BACKTRACK(sizeof(backtrack_common
), cc
, NULL
);
9449 if (opcode
== OP_SKIP
)
9451 allocate_stack(common
, 1);
9452 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(0), STR_PTR
, 0);
9456 if (opcode
== OP_PRUNE_ARG
|| opcode
== OP_THEN_ARG
)
9458 OP1(SLJIT_MOV
, TMP1
, 0, ARGUMENTS
, 0);
9459 OP1(SLJIT_MOV
, TMP2
, 0, SLJIT_IMM
, (sljit_sw
)(cc
+ 2));
9460 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), common
->mark_ptr
, TMP2
, 0);
9461 OP1(SLJIT_MOV
, SLJIT_MEM1(TMP1
), SLJIT_OFFSETOF(jit_arguments
, mark_ptr
), TMP2
, 0);
9467 static pcre_uchar then_trap_opcode
[1] = { OP_THEN_TRAP
};
9469 static SLJIT_INLINE
void compile_then_trap_matchingpath(compiler_common
*common
, pcre_uchar
*cc
, pcre_uchar
*ccend
, backtrack_common
*parent
)
9472 backtrack_common
*backtrack
;
9473 BOOL needs_control_head
;
9476 PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack
), cc
);
9477 common
->then_trap
= BACKTRACK_AS(then_trap_backtrack
);
9478 BACKTRACK_AS(then_trap_backtrack
)->common
.cc
= then_trap_opcode
;
9479 BACKTRACK_AS(then_trap_backtrack
)->start
= (sljit_sw
)(cc
- common
->start
);
9480 BACKTRACK_AS(then_trap_backtrack
)->framesize
= get_framesize(common
, cc
, ccend
, FALSE
, &needs_control_head
);
9482 size
= BACKTRACK_AS(then_trap_backtrack
)->framesize
;
9483 size
= 3 + (size
< 0 ? 0 : size
);
9485 OP1(SLJIT_MOV
, TMP2
, 0, SLJIT_MEM1(SLJIT_SP
), common
->control_head_ptr
);
9486 allocate_stack(common
, size
);
9488 OP2(SLJIT_ADD
, SLJIT_MEM1(SLJIT_SP
), common
->control_head_ptr
, STACK_TOP
, 0, SLJIT_IMM
, (size
- 3) * sizeof(sljit_sw
));
9490 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), common
->control_head_ptr
, STACK_TOP
, 0);
9491 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(size
- 1), SLJIT_IMM
, BACKTRACK_AS(then_trap_backtrack
)->start
);
9492 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(size
- 2), SLJIT_IMM
, type_then_trap
);
9493 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(size
- 3), TMP2
, 0);
9495 size
= BACKTRACK_AS(then_trap_backtrack
)->framesize
;
9497 init_frame(common
, cc
, ccend
, size
- 1, 0, FALSE
);
9500 static void compile_matchingpath(compiler_common
*common
, pcre_uchar
*cc
, pcre_uchar
*ccend
, backtrack_common
*parent
)
9503 backtrack_common
*backtrack
;
9504 BOOL has_then_trap
= FALSE
;
9505 then_trap_backtrack
*save_then_trap
= NULL
;
9507 SLJIT_ASSERT(*ccend
== OP_END
|| (*ccend
>= OP_ALT
&& *ccend
<= OP_KETRPOS
));
9509 if (common
->has_then
&& common
->then_offsets
[cc
- common
->start
] != 0)
9511 SLJIT_ASSERT(*ccend
!= OP_END
&& common
->control_head_ptr
!= 0);
9512 has_then_trap
= TRUE
;
9513 save_then_trap
= common
->then_trap
;
9514 /* Tail item on backtrack. */
9515 compile_then_trap_matchingpath(common
, cc
, ccend
, parent
);
9524 case OP_NOT_WORD_BOUNDARY
:
9525 case OP_WORD_BOUNDARY
:
9533 cc
= compile_simple_assertion_matchingpath(common
, *cc
, cc
+ 1, parent
->top
!= NULL
? &parent
->top
->nextbacktracks
: &parent
->topbacktracks
);
9538 case OP_NOT_WHITESPACE
:
9540 case OP_NOT_WORDCHAR
:
9555 cc
= compile_char1_matchingpath(common
, *cc
, cc
+ 1, parent
->top
!= NULL
? &parent
->top
->nextbacktracks
: &parent
->topbacktracks
, TRUE
);
9559 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common
), cc
);
9560 OP1(SLJIT_MOV
, TMP2
, 0, SLJIT_MEM1(SLJIT_SP
), OVECTOR(0));
9561 allocate_stack(common
, 1);
9562 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), OVECTOR(0), STR_PTR
, 0);
9563 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(0), TMP2
, 0);
9569 if (common
->mode
== JIT_COMPILE
)
9570 cc
= compile_charn_matchingpath(common
, cc
, ccend
, parent
->top
!= NULL
? &parent
->top
->nextbacktracks
: &parent
->topbacktracks
);
9572 cc
= compile_char1_matchingpath(common
, *cc
, cc
+ 1, parent
->top
!= NULL
? &parent
->top
->nextbacktracks
: &parent
->topbacktracks
, TRUE
);
9606 case OP_NOTMINQUERY
:
9612 case OP_NOTPOSQUERY
:
9615 case OP_NOTMINSTARI
:
9617 case OP_NOTMINPLUSI
:
9619 case OP_NOTMINQUERYI
:
9621 case OP_NOTMINUPTOI
:
9623 case OP_NOTPOSSTARI
:
9624 case OP_NOTPOSPLUSI
:
9625 case OP_NOTPOSQUERYI
:
9626 case OP_NOTPOSUPTOI
:
9628 case OP_TYPEMINSTAR
:
9630 case OP_TYPEMINPLUS
:
9632 case OP_TYPEMINQUERY
:
9634 case OP_TYPEMINUPTO
:
9636 case OP_TYPEPOSSTAR
:
9637 case OP_TYPEPOSPLUS
:
9638 case OP_TYPEPOSQUERY
:
9639 case OP_TYPEPOSUPTO
:
9640 cc
= compile_iterator_matchingpath(common
, cc
, parent
);
9645 if (cc
[1 + (32 / sizeof(pcre_uchar
))] >= OP_CRSTAR
&& cc
[1 + (32 / sizeof(pcre_uchar
))] <= OP_CRPOSRANGE
)
9646 cc
= compile_iterator_matchingpath(common
, cc
, parent
);
9648 cc
= compile_char1_matchingpath(common
, *cc
, cc
+ 1, parent
->top
!= NULL
? &parent
->top
->nextbacktracks
: &parent
->topbacktracks
, TRUE
);
9651 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
9653 if (*(cc
+ GET(cc
, 1)) >= OP_CRSTAR
&& *(cc
+ GET(cc
, 1)) <= OP_CRPOSRANGE
)
9654 cc
= compile_iterator_matchingpath(common
, cc
, parent
);
9656 cc
= compile_char1_matchingpath(common
, *cc
, cc
+ 1, parent
->top
!= NULL
? &parent
->top
->nextbacktracks
: &parent
->topbacktracks
, TRUE
);
9662 if (cc
[1 + IMM2_SIZE
] >= OP_CRSTAR
&& cc
[1 + IMM2_SIZE
] <= OP_CRPOSRANGE
)
9663 cc
= compile_ref_iterator_matchingpath(common
, cc
, parent
);
9666 compile_ref_matchingpath(common
, cc
, parent
->top
!= NULL
? &parent
->top
->nextbacktracks
: &parent
->topbacktracks
, TRUE
, FALSE
);
9667 cc
+= 1 + IMM2_SIZE
;
9673 if (cc
[1 + 2 * IMM2_SIZE
] >= OP_CRSTAR
&& cc
[1 + 2 * IMM2_SIZE
] <= OP_CRPOSRANGE
)
9674 cc
= compile_ref_iterator_matchingpath(common
, cc
, parent
);
9677 compile_dnref_search(common
, cc
, parent
->top
!= NULL
? &parent
->top
->nextbacktracks
: &parent
->topbacktracks
);
9678 compile_ref_matchingpath(common
, cc
, parent
->top
!= NULL
? &parent
->top
->nextbacktracks
: &parent
->topbacktracks
, TRUE
, FALSE
);
9679 cc
+= 1 + 2 * IMM2_SIZE
;
9684 cc
= compile_recurse_matchingpath(common
, cc
, parent
);
9688 cc
= compile_callout_matchingpath(common
, cc
, parent
);
9694 case OP_ASSERTBACK_NOT
:
9695 PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack
), cc
);
9696 cc
= compile_assert_matchingpath(common
, cc
, BACKTRACK_AS(assert_backtrack
), FALSE
);
9700 PUSH_BACKTRACK_NOVALUE(sizeof(braminzero_backtrack
), cc
);
9701 cc
= bracketend(cc
+ 1);
9702 if (*(cc
- 1 - LINK_SIZE
) != OP_KETRMIN
)
9704 allocate_stack(common
, 1);
9705 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(0), STR_PTR
, 0);
9709 allocate_stack(common
, 2);
9710 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(0), SLJIT_IMM
, 0);
9711 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(1), STR_PTR
, 0);
9713 BACKTRACK_AS(braminzero_backtrack
)->matchingpath
= LABEL();
9714 count_match(common
);
9725 cc
= compile_bracket_matchingpath(common
, cc
, parent
);
9729 if (cc
[1] > OP_ASSERTBACK_NOT
)
9730 cc
= compile_bracket_matchingpath(common
, cc
, parent
);
9733 PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack
), cc
);
9734 cc
= compile_assert_matchingpath(common
, cc
, BACKTRACK_AS(assert_backtrack
), FALSE
);
9743 cc
= compile_bracketpos_matchingpath(common
, cc
, parent
);
9747 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common
), cc
);
9748 SLJIT_ASSERT(common
->mark_ptr
!= 0);
9749 OP1(SLJIT_MOV
, TMP2
, 0, SLJIT_MEM1(SLJIT_SP
), common
->mark_ptr
);
9750 allocate_stack(common
, common
->has_skip_arg
? 5 : 1);
9751 OP1(SLJIT_MOV
, TMP1
, 0, ARGUMENTS
, 0);
9752 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(common
->has_skip_arg
? 4 : 0), TMP2
, 0);
9753 OP1(SLJIT_MOV
, TMP2
, 0, SLJIT_IMM
, (sljit_sw
)(cc
+ 2));
9754 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), common
->mark_ptr
, TMP2
, 0);
9755 OP1(SLJIT_MOV
, SLJIT_MEM1(TMP1
), SLJIT_OFFSETOF(jit_arguments
, mark_ptr
), TMP2
, 0);
9756 if (common
->has_skip_arg
)
9758 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(SLJIT_SP
), common
->control_head_ptr
);
9759 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), common
->control_head_ptr
, STACK_TOP
, 0);
9760 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(1), SLJIT_IMM
, type_mark
);
9761 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(2), SLJIT_IMM
, (sljit_sw
)(cc
+ 2));
9762 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(3), STR_PTR
, 0);
9763 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(0), TMP1
, 0);
9765 cc
+= 1 + 2 + cc
[1];
9775 cc
= compile_control_verb_matchingpath(common
, cc
, parent
);
9780 case OP_ASSERT_ACCEPT
:
9781 cc
= compile_fail_accept_matchingpath(common
, cc
, parent
);
9785 cc
= compile_close_matchingpath(common
, cc
);
9789 cc
= bracketend(cc
+ 1);
9793 SLJIT_UNREACHABLE();
9802 /* Head item on backtrack. */
9803 PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack
), cc
);
9804 BACKTRACK_AS(then_trap_backtrack
)->common
.cc
= then_trap_opcode
;
9805 BACKTRACK_AS(then_trap_backtrack
)->then_trap
= common
->then_trap
;
9806 common
->then_trap
= save_then_trap
;
9808 SLJIT_ASSERT(cc
== ccend
);
9811 #undef PUSH_BACKTRACK
9812 #undef PUSH_BACKTRACK_NOVALUE
9815 #define COMPILE_BACKTRACKINGPATH(current) \
9818 compile_backtrackingpath(common, (current)); \
9819 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
9824 #define CURRENT_AS(type) ((type *)current)
9826 static void compile_iterator_backtrackingpath(compiler_common
*common
, struct backtrack_common
*current
)
9829 pcre_uchar
*cc
= current
->cc
;
9832 sljit_u32 max
= 0, exact
;
9833 struct sljit_label
*label
= NULL
;
9834 struct sljit_jump
*jump
= NULL
;
9835 jump_list
*jumplist
= NULL
;
9837 int private_data_ptr
= PRIVATE_DATA(cc
);
9838 int base
= (private_data_ptr
== 0) ? SLJIT_MEM1(STACK_TOP
) : SLJIT_MEM1(SLJIT_SP
);
9839 int offset0
= (private_data_ptr
== 0) ? STACK(0) : private_data_ptr
;
9840 int offset1
= (private_data_ptr
== 0) ? STACK(1) : private_data_ptr
+ (int)sizeof(sljit_sw
);
9842 cc
= get_iterator_parameters(common
, cc
, &opcode
, &type
, &max
, &exact
, &end
);
9848 if (type
== OP_ANYNL
|| type
== OP_EXTUNI
)
9850 SLJIT_ASSERT(private_data_ptr
== 0);
9851 set_jumps(CURRENT_AS(char_iterator_backtrack
)->u
.backtracks
, LABEL());
9852 OP1(SLJIT_MOV
, STR_PTR
, 0, SLJIT_MEM1(STACK_TOP
), STACK(0));
9853 free_stack(common
, 1);
9854 CMPTO(SLJIT_NOT_EQUAL
, STR_PTR
, 0, SLJIT_IMM
, 0, CURRENT_AS(char_iterator_backtrack
)->matchingpath
);
9858 if (CURRENT_AS(char_iterator_backtrack
)->u
.charpos
.enabled
)
9860 OP1(SLJIT_MOV
, STR_PTR
, 0, base
, offset0
);
9861 OP1(SLJIT_MOV
, TMP2
, 0, base
, offset1
);
9862 OP2(SLJIT_SUB
, STR_PTR
, 0, STR_PTR
, 0, SLJIT_IMM
, IN_UCHARS(1));
9864 jump
= CMP(SLJIT_LESS_EQUAL
, STR_PTR
, 0, TMP2
, 0);
9866 OP1(MOV_UCHAR
, TMP1
, 0, SLJIT_MEM1(STR_PTR
), IN_UCHARS(-1));
9867 OP1(SLJIT_MOV
, base
, offset0
, STR_PTR
, 0);
9868 if (CURRENT_AS(char_iterator_backtrack
)->u
.charpos
.othercasebit
!= 0)
9869 OP2(SLJIT_OR
, TMP1
, 0, TMP1
, 0, SLJIT_IMM
, CURRENT_AS(char_iterator_backtrack
)->u
.charpos
.othercasebit
);
9870 CMPTO(SLJIT_EQUAL
, TMP1
, 0, SLJIT_IMM
, CURRENT_AS(char_iterator_backtrack
)->u
.charpos
.chr
, CURRENT_AS(char_iterator_backtrack
)->matchingpath
);
9871 skip_char_back(common
);
9872 CMPTO(SLJIT_GREATER
, STR_PTR
, 0, TMP2
, 0, label
);
9876 OP1(SLJIT_MOV
, STR_PTR
, 0, base
, offset0
);
9877 jump
= CMP(SLJIT_LESS_EQUAL
, STR_PTR
, 0, base
, offset1
);
9878 skip_char_back(common
);
9879 OP1(SLJIT_MOV
, base
, offset0
, STR_PTR
, 0);
9880 JUMPTO(SLJIT_JUMP
, CURRENT_AS(char_iterator_backtrack
)->matchingpath
);
9883 if (private_data_ptr
== 0)
9884 free_stack(common
, 2);
9889 OP1(SLJIT_MOV
, STR_PTR
, 0, base
, offset0
);
9890 compile_char1_matchingpath(common
, type
, cc
, &jumplist
, TRUE
);
9891 OP1(SLJIT_MOV
, base
, offset0
, STR_PTR
, 0);
9892 JUMPTO(SLJIT_JUMP
, CURRENT_AS(char_iterator_backtrack
)->matchingpath
);
9893 set_jumps(jumplist
, LABEL());
9894 if (private_data_ptr
== 0)
9895 free_stack(common
, 1);
9899 OP1(SLJIT_MOV
, TMP1
, 0, base
, offset1
);
9900 OP1(SLJIT_MOV
, STR_PTR
, 0, base
, offset0
);
9901 OP2(SLJIT_SUB
| SLJIT_SET_Z
, TMP1
, 0, TMP1
, 0, SLJIT_IMM
, 1);
9902 add_jump(compiler
, &jumplist
, JUMP(SLJIT_ZERO
));
9904 OP1(SLJIT_MOV
, base
, offset1
, TMP1
, 0);
9905 compile_char1_matchingpath(common
, type
, cc
, &jumplist
, TRUE
);
9906 OP1(SLJIT_MOV
, base
, offset0
, STR_PTR
, 0);
9907 JUMPTO(SLJIT_JUMP
, CURRENT_AS(char_iterator_backtrack
)->matchingpath
);
9909 set_jumps(jumplist
, LABEL());
9910 if (private_data_ptr
== 0)
9911 free_stack(common
, 2);
9915 OP1(SLJIT_MOV
, STR_PTR
, 0, base
, offset0
);
9916 OP1(SLJIT_MOV
, base
, offset0
, SLJIT_IMM
, 0);
9917 CMPTO(SLJIT_NOT_EQUAL
, STR_PTR
, 0, SLJIT_IMM
, 0, CURRENT_AS(char_iterator_backtrack
)->matchingpath
);
9918 jump
= JUMP(SLJIT_JUMP
);
9919 set_jumps(CURRENT_AS(char_iterator_backtrack
)->u
.backtracks
, LABEL());
9920 OP1(SLJIT_MOV
, STR_PTR
, 0, base
, offset0
);
9921 OP1(SLJIT_MOV
, base
, offset0
, SLJIT_IMM
, 0);
9922 JUMPTO(SLJIT_JUMP
, CURRENT_AS(char_iterator_backtrack
)->matchingpath
);
9924 if (private_data_ptr
== 0)
9925 free_stack(common
, 1);
9929 OP1(SLJIT_MOV
, STR_PTR
, 0, base
, offset0
);
9930 OP1(SLJIT_MOV
, base
, offset0
, SLJIT_IMM
, 0);
9931 jump
= CMP(SLJIT_EQUAL
, STR_PTR
, 0, SLJIT_IMM
, 0);
9932 compile_char1_matchingpath(common
, type
, cc
, &jumplist
, TRUE
);
9933 JUMPTO(SLJIT_JUMP
, CURRENT_AS(char_iterator_backtrack
)->matchingpath
);
9934 set_jumps(jumplist
, LABEL());
9936 if (private_data_ptr
== 0)
9937 free_stack(common
, 1);
9947 SLJIT_UNREACHABLE();
9951 set_jumps(current
->topbacktracks
, LABEL());
9954 static SLJIT_INLINE
void compile_ref_iterator_backtrackingpath(compiler_common
*common
, struct backtrack_common
*current
)
9957 pcre_uchar
*cc
= current
->cc
;
9958 BOOL ref
= (*cc
== OP_REF
|| *cc
== OP_REFI
);
9961 type
= cc
[ref
? 1 + IMM2_SIZE
: 1 + 2 * IMM2_SIZE
];
9963 if ((type
& 0x1) == 0)
9965 /* Maximize case. */
9966 set_jumps(current
->topbacktracks
, LABEL());
9967 OP1(SLJIT_MOV
, STR_PTR
, 0, SLJIT_MEM1(STACK_TOP
), STACK(0));
9968 free_stack(common
, 1);
9969 CMPTO(SLJIT_NOT_EQUAL
, STR_PTR
, 0, SLJIT_IMM
, 0, CURRENT_AS(ref_iterator_backtrack
)->matchingpath
);
9973 OP1(SLJIT_MOV
, STR_PTR
, 0, SLJIT_MEM1(STACK_TOP
), STACK(0));
9974 CMPTO(SLJIT_NOT_EQUAL
, STR_PTR
, 0, SLJIT_IMM
, 0, CURRENT_AS(ref_iterator_backtrack
)->matchingpath
);
9975 set_jumps(current
->topbacktracks
, LABEL());
9976 free_stack(common
, ref
? 2 : 3);
9979 static SLJIT_INLINE
void compile_recurse_backtrackingpath(compiler_common
*common
, struct backtrack_common
*current
)
9983 if (CURRENT_AS(recurse_backtrack
)->inlined_pattern
)
9984 compile_backtrackingpath(common
, current
->top
);
9985 set_jumps(current
->topbacktracks
, LABEL());
9986 if (CURRENT_AS(recurse_backtrack
)->inlined_pattern
)
9989 if (common
->has_set_som
&& common
->mark_ptr
!= 0)
9991 OP1(SLJIT_MOV
, TMP2
, 0, SLJIT_MEM1(STACK_TOP
), STACK(0));
9992 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(STACK_TOP
), STACK(1));
9993 free_stack(common
, 2);
9994 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), OVECTOR(0), TMP2
, 0);
9995 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), common
->mark_ptr
, TMP1
, 0);
9997 else if (common
->has_set_som
|| common
->mark_ptr
!= 0)
9999 OP1(SLJIT_MOV
, TMP2
, 0, SLJIT_MEM1(STACK_TOP
), STACK(0));
10000 free_stack(common
, 1);
10001 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), common
->has_set_som
? (int)(OVECTOR(0)) : common
->mark_ptr
, TMP2
, 0);
10005 static void compile_assert_backtrackingpath(compiler_common
*common
, struct backtrack_common
*current
)
10008 pcre_uchar
*cc
= current
->cc
;
10009 pcre_uchar bra
= OP_BRA
;
10010 struct sljit_jump
*brajump
= NULL
;
10012 SLJIT_ASSERT(*cc
!= OP_BRAMINZERO
);
10013 if (*cc
== OP_BRAZERO
)
10019 if (bra
== OP_BRAZERO
)
10021 SLJIT_ASSERT(current
->topbacktracks
== NULL
);
10022 OP1(SLJIT_MOV
, STR_PTR
, 0, SLJIT_MEM1(STACK_TOP
), STACK(0));
10025 if (CURRENT_AS(assert_backtrack
)->framesize
< 0)
10027 set_jumps(current
->topbacktracks
, LABEL());
10029 if (bra
== OP_BRAZERO
)
10031 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(0), SLJIT_IMM
, 0);
10032 CMPTO(SLJIT_NOT_EQUAL
, STR_PTR
, 0, SLJIT_IMM
, 0, CURRENT_AS(assert_backtrack
)->matchingpath
);
10033 free_stack(common
, 1);
10038 if (bra
== OP_BRAZERO
)
10040 if (*cc
== OP_ASSERT_NOT
|| *cc
== OP_ASSERTBACK_NOT
)
10042 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(0), SLJIT_IMM
, 0);
10043 CMPTO(SLJIT_NOT_EQUAL
, STR_PTR
, 0, SLJIT_IMM
, 0, CURRENT_AS(assert_backtrack
)->matchingpath
);
10044 free_stack(common
, 1);
10047 free_stack(common
, 1);
10048 brajump
= CMP(SLJIT_EQUAL
, STR_PTR
, 0, SLJIT_IMM
, 0);
10051 if (*cc
== OP_ASSERT
|| *cc
== OP_ASSERTBACK
)
10053 OP1(SLJIT_MOV
, STACK_TOP
, 0, SLJIT_MEM1(SLJIT_SP
), CURRENT_AS(assert_backtrack
)->private_data_ptr
);
10054 add_jump(compiler
, &common
->revertframes
, JUMP(SLJIT_FAST_CALL
));
10055 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), CURRENT_AS(assert_backtrack
)->private_data_ptr
, SLJIT_MEM1(STACK_TOP
), STACK(-CURRENT_AS(assert_backtrack
)->framesize
- 1));
10057 set_jumps(current
->topbacktracks
, LABEL());
10060 set_jumps(current
->topbacktracks
, LABEL());
10062 if (bra
== OP_BRAZERO
)
10064 /* We know there is enough place on the stack. */
10065 OP2(SLJIT_SUB
, STACK_TOP
, 0, STACK_TOP
, 0, SLJIT_IMM
, sizeof(sljit_sw
));
10066 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(0), SLJIT_IMM
, 0);
10067 JUMPTO(SLJIT_JUMP
, CURRENT_AS(assert_backtrack
)->matchingpath
);
10072 static void compile_bracket_backtrackingpath(compiler_common
*common
, struct backtrack_common
*current
)
10075 int opcode
, stacksize
, alt_count
, alt_max
;
10077 int private_data_ptr
= CURRENT_AS(bracket_backtrack
)->private_data_ptr
;
10078 int repeat_ptr
= 0, repeat_type
= 0, repeat_count
= 0;
10079 pcre_uchar
*cc
= current
->cc
;
10080 pcre_uchar
*ccbegin
;
10081 pcre_uchar
*ccprev
;
10082 pcre_uchar bra
= OP_BRA
;
10084 assert_backtrack
*assert;
10085 sljit_uw
*next_update_addr
= NULL
;
10086 BOOL has_alternatives
;
10087 BOOL needs_control_head
= FALSE
;
10088 struct sljit_jump
*brazero
= NULL
;
10089 struct sljit_jump
*alt1
= NULL
;
10090 struct sljit_jump
*alt2
= NULL
;
10091 struct sljit_jump
*once
= NULL
;
10092 struct sljit_jump
*cond
= NULL
;
10093 struct sljit_label
*rmin_label
= NULL
;
10094 struct sljit_label
*exact_label
= NULL
;
10096 if (*cc
== OP_BRAZERO
|| *cc
== OP_BRAMINZERO
)
10103 ccbegin
= bracketend(cc
) - 1 - LINK_SIZE
;
10105 if (ket
== OP_KET
&& PRIVATE_DATA(ccbegin
) != 0)
10107 repeat_ptr
= PRIVATE_DATA(ccbegin
);
10108 repeat_type
= PRIVATE_DATA(ccbegin
+ 2);
10109 repeat_count
= PRIVATE_DATA(ccbegin
+ 3);
10110 SLJIT_ASSERT(repeat_type
!= 0 && repeat_count
!= 0);
10111 if (repeat_type
== OP_UPTO
)
10113 if (repeat_type
== OP_MINUPTO
)
10118 has_alternatives
= *cc
== OP_ALT
;
10119 if (SLJIT_UNLIKELY(opcode
== OP_COND
) || SLJIT_UNLIKELY(opcode
== OP_SCOND
))
10120 has_alternatives
= (ccbegin
[1 + LINK_SIZE
] >= OP_ASSERT
&& ccbegin
[1 + LINK_SIZE
] <= OP_ASSERTBACK_NOT
) || CURRENT_AS(bracket_backtrack
)->u
.condfailed
!= NULL
;
10121 if (opcode
== OP_CBRA
|| opcode
== OP_SCBRA
)
10122 offset
= (GET2(ccbegin
, 1 + LINK_SIZE
)) << 1;
10123 if (SLJIT_UNLIKELY(opcode
== OP_COND
) && (*cc
== OP_KETRMAX
|| *cc
== OP_KETRMIN
))
10125 if (SLJIT_UNLIKELY(opcode
== OP_ONCE_NC
))
10128 alt_max
= has_alternatives
? no_alternatives(ccbegin
) : 0;
10130 /* Decoding the needs_control_head in framesize. */
10131 if (opcode
== OP_ONCE
)
10133 needs_control_head
= (CURRENT_AS(bracket_backtrack
)->u
.framesize
& 0x1) != 0;
10134 CURRENT_AS(bracket_backtrack
)->u
.framesize
>>= 1;
10137 if (ket
!= OP_KET
&& repeat_type
!= 0)
10139 /* TMP1 is used in OP_KETRMIN below. */
10140 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(STACK_TOP
), STACK(0));
10141 free_stack(common
, 1);
10142 if (repeat_type
== OP_UPTO
)
10143 OP2(SLJIT_ADD
, SLJIT_MEM1(SLJIT_SP
), repeat_ptr
, TMP1
, 0, SLJIT_IMM
, 1);
10145 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), repeat_ptr
, TMP1
, 0);
10148 if (ket
== OP_KETRMAX
)
10150 if (bra
== OP_BRAZERO
)
10152 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(STACK_TOP
), STACK(0));
10153 free_stack(common
, 1);
10154 brazero
= CMP(SLJIT_EQUAL
, TMP1
, 0, SLJIT_IMM
, 0);
10157 else if (ket
== OP_KETRMIN
)
10159 if (bra
!= OP_BRAMINZERO
)
10161 OP1(SLJIT_MOV
, STR_PTR
, 0, SLJIT_MEM1(STACK_TOP
), STACK(0));
10162 if (repeat_type
!= 0)
10164 /* TMP1 was set a few lines above. */
10165 CMPTO(SLJIT_NOT_EQUAL
, TMP1
, 0, SLJIT_IMM
, 0, CURRENT_AS(bracket_backtrack
)->recursive_matchingpath
);
10166 /* Drop STR_PTR for non-greedy plus quantifier. */
10167 if (opcode
!= OP_ONCE
)
10168 free_stack(common
, 1);
10170 else if (opcode
>= OP_SBRA
|| opcode
== OP_ONCE
)
10172 /* Checking zero-length iteration. */
10173 if (opcode
!= OP_ONCE
|| CURRENT_AS(bracket_backtrack
)->u
.framesize
< 0)
10174 CMPTO(SLJIT_NOT_EQUAL
, STR_PTR
, 0, SLJIT_MEM1(SLJIT_SP
), private_data_ptr
, CURRENT_AS(bracket_backtrack
)->recursive_matchingpath
);
10177 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(SLJIT_SP
), private_data_ptr
);
10178 CMPTO(SLJIT_NOT_EQUAL
, STR_PTR
, 0, SLJIT_MEM1(TMP1
), STACK(-CURRENT_AS(bracket_backtrack
)->u
.framesize
- 2), CURRENT_AS(bracket_backtrack
)->recursive_matchingpath
);
10180 /* Drop STR_PTR for non-greedy plus quantifier. */
10181 if (opcode
!= OP_ONCE
)
10182 free_stack(common
, 1);
10185 JUMPTO(SLJIT_JUMP
, CURRENT_AS(bracket_backtrack
)->recursive_matchingpath
);
10187 rmin_label
= LABEL();
10188 if (repeat_type
!= 0)
10189 OP2(SLJIT_ADD
, SLJIT_MEM1(SLJIT_SP
), repeat_ptr
, SLJIT_MEM1(SLJIT_SP
), repeat_ptr
, SLJIT_IMM
, 1);
10191 else if (bra
== OP_BRAZERO
)
10193 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(STACK_TOP
), STACK(0));
10194 free_stack(common
, 1);
10195 brazero
= CMP(SLJIT_NOT_EQUAL
, TMP1
, 0, SLJIT_IMM
, 0);
10197 else if (repeat_type
== OP_EXACT
)
10199 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), repeat_ptr
, SLJIT_IMM
, 1);
10200 exact_label
= LABEL();
10205 if (common
->capture_last_ptr
!= 0)
10207 SLJIT_ASSERT(common
->optimized_cbracket
[offset
>> 1] == 0);
10208 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(STACK_TOP
), STACK(0));
10209 OP1(SLJIT_MOV
, TMP2
, 0, SLJIT_MEM1(STACK_TOP
), STACK(1));
10210 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), common
->capture_last_ptr
, TMP1
, 0);
10211 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(STACK_TOP
), STACK(2));
10212 free_stack(common
, 3);
10213 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), OVECTOR(offset
), TMP2
, 0);
10214 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), OVECTOR(offset
+ 1), TMP1
, 0);
10216 else if (common
->optimized_cbracket
[offset
>> 1] == 0)
10218 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(STACK_TOP
), STACK(0));
10219 OP1(SLJIT_MOV
, TMP2
, 0, SLJIT_MEM1(STACK_TOP
), STACK(1));
10220 free_stack(common
, 2);
10221 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), OVECTOR(offset
), TMP1
, 0);
10222 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), OVECTOR(offset
+ 1), TMP2
, 0);
10226 if (SLJIT_UNLIKELY(opcode
== OP_ONCE
))
10228 if (CURRENT_AS(bracket_backtrack
)->u
.framesize
>= 0)
10230 OP1(SLJIT_MOV
, STACK_TOP
, 0, SLJIT_MEM1(SLJIT_SP
), private_data_ptr
);
10231 add_jump(compiler
, &common
->revertframes
, JUMP(SLJIT_FAST_CALL
));
10233 once
= JUMP(SLJIT_JUMP
);
10235 else if (SLJIT_UNLIKELY(opcode
== OP_COND
) || SLJIT_UNLIKELY(opcode
== OP_SCOND
))
10237 if (has_alternatives
)
10239 /* Always exactly one alternative. */
10240 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(STACK_TOP
), STACK(0));
10241 free_stack(common
, 1);
10244 alt1
= CMP(SLJIT_EQUAL
, TMP1
, 0, SLJIT_IMM
, sizeof(sljit_uw
));
10247 else if (has_alternatives
)
10249 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(STACK_TOP
), STACK(0));
10250 free_stack(common
, 1);
10254 /* Table jump if alt_max is greater than 4. */
10255 next_update_addr
= allocate_read_only_data(common
, alt_max
* sizeof(sljit_uw
));
10256 if (SLJIT_UNLIKELY(next_update_addr
== NULL
))
10258 sljit_emit_ijump(compiler
, SLJIT_JUMP
, SLJIT_MEM1(TMP1
), (sljit_sw
)next_update_addr
);
10259 add_label_addr(common
, next_update_addr
++);
10264 alt2
= CMP(SLJIT_GREATER_EQUAL
, TMP1
, 0, SLJIT_IMM
, 2 * sizeof(sljit_uw
));
10265 alt1
= CMP(SLJIT_GREATER_EQUAL
, TMP1
, 0, SLJIT_IMM
, sizeof(sljit_uw
));
10269 COMPILE_BACKTRACKINGPATH(current
->top
);
10270 if (current
->topbacktracks
)
10271 set_jumps(current
->topbacktracks
, LABEL());
10273 if (SLJIT_UNLIKELY(opcode
== OP_COND
) || SLJIT_UNLIKELY(opcode
== OP_SCOND
))
10275 /* Conditional block always has at most one alternative. */
10276 if (ccbegin
[1 + LINK_SIZE
] >= OP_ASSERT
&& ccbegin
[1 + LINK_SIZE
] <= OP_ASSERTBACK_NOT
)
10278 SLJIT_ASSERT(has_alternatives
);
10279 assert = CURRENT_AS(bracket_backtrack
)->u
.assert;
10280 if (assert->framesize
>= 0 && (ccbegin
[1 + LINK_SIZE
] == OP_ASSERT
|| ccbegin
[1 + LINK_SIZE
] == OP_ASSERTBACK
))
10282 OP1(SLJIT_MOV
, STACK_TOP
, 0, SLJIT_MEM1(SLJIT_SP
), assert->private_data_ptr
);
10283 add_jump(compiler
, &common
->revertframes
, JUMP(SLJIT_FAST_CALL
));
10284 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), assert->private_data_ptr
, SLJIT_MEM1(STACK_TOP
), STACK(-assert->framesize
- 1));
10286 cond
= JUMP(SLJIT_JUMP
);
10287 set_jumps(CURRENT_AS(bracket_backtrack
)->u
.assert->condfailed
, LABEL());
10289 else if (CURRENT_AS(bracket_backtrack
)->u
.condfailed
!= NULL
)
10291 SLJIT_ASSERT(has_alternatives
);
10292 cond
= JUMP(SLJIT_JUMP
);
10293 set_jumps(CURRENT_AS(bracket_backtrack
)->u
.condfailed
, LABEL());
10296 SLJIT_ASSERT(!has_alternatives
);
10299 if (has_alternatives
)
10301 alt_count
= sizeof(sljit_uw
);
10304 current
->top
= NULL
;
10305 current
->topbacktracks
= NULL
;
10306 current
->nextbacktracks
= NULL
;
10307 /* Conditional blocks always have an additional alternative, even if it is empty. */
10310 ccprev
= cc
+ 1 + LINK_SIZE
;
10312 if (opcode
!= OP_COND
&& opcode
!= OP_SCOND
)
10314 if (opcode
!= OP_ONCE
)
10316 if (private_data_ptr
!= 0)
10317 OP1(SLJIT_MOV
, STR_PTR
, 0, SLJIT_MEM1(SLJIT_SP
), private_data_ptr
);
10319 OP1(SLJIT_MOV
, STR_PTR
, 0, SLJIT_MEM1(STACK_TOP
), STACK(0));
10322 OP1(SLJIT_MOV
, STR_PTR
, 0, SLJIT_MEM1(STACK_TOP
), STACK(needs_control_head
? 1 : 0));
10324 compile_matchingpath(common
, ccprev
, cc
, current
);
10325 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler
)))
10329 /* Instructions after the current alternative is successfully matched. */
10330 /* There is a similar code in compile_bracket_matchingpath. */
10331 if (opcode
== OP_ONCE
)
10332 match_once_common(common
, ket
, CURRENT_AS(bracket_backtrack
)->u
.framesize
, private_data_ptr
, has_alternatives
, needs_control_head
);
10335 if (repeat_type
== OP_MINUPTO
)
10337 /* We need to preserve the counter. TMP2 will be used below. */
10338 OP1(SLJIT_MOV
, TMP2
, 0, SLJIT_MEM1(SLJIT_SP
), repeat_ptr
);
10341 if (ket
!= OP_KET
|| bra
!= OP_BRA
)
10345 if (common
->capture_last_ptr
!= 0)
10347 if (common
->optimized_cbracket
[offset
>> 1] == 0)
10350 if (opcode
!= OP_ONCE
)
10354 allocate_stack(common
, stacksize
);
10357 if (repeat_type
== OP_MINUPTO
)
10359 /* TMP2 was set above. */
10360 OP2(SLJIT_SUB
, SLJIT_MEM1(STACK_TOP
), STACK(stacksize
), TMP2
, 0, SLJIT_IMM
, 1);
10364 if (ket
!= OP_KET
|| bra
!= OP_BRA
)
10367 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(stacksize
), STR_PTR
, 0);
10369 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(stacksize
), SLJIT_IMM
, 0);
10374 stacksize
= match_capture_common(common
, stacksize
, offset
, private_data_ptr
);
10376 if (opcode
!= OP_ONCE
)
10377 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(stacksize
), SLJIT_IMM
, alt_count
);
10379 if (offset
!= 0 && ket
== OP_KETRMAX
&& common
->optimized_cbracket
[offset
>> 1] != 0)
10381 /* If ket is not OP_KETRMAX, this code path is executed after the jump to alternative_matchingpath. */
10382 SLJIT_ASSERT(private_data_ptr
== OVECTOR(offset
+ 0));
10383 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), OVECTOR(offset
+ 1), STR_PTR
, 0);
10386 JUMPTO(SLJIT_JUMP
, CURRENT_AS(bracket_backtrack
)->alternative_matchingpath
);
10388 if (opcode
!= OP_ONCE
)
10391 add_label_addr(common
, next_update_addr
++);
10394 if (alt_count
!= 2 * sizeof(sljit_uw
))
10397 if (alt_max
== 3 && alt_count
== sizeof(sljit_uw
))
10398 alt2
= CMP(SLJIT_GREATER_EQUAL
, TMP1
, 0, SLJIT_IMM
, 2 * sizeof(sljit_uw
));
10404 alt1
= CMP(SLJIT_GREATER_EQUAL
, TMP1
, 0, SLJIT_IMM
, 3 * sizeof(sljit_uw
));
10407 alt_count
+= sizeof(sljit_uw
);
10410 COMPILE_BACKTRACKINGPATH(current
->top
);
10411 if (current
->topbacktracks
)
10412 set_jumps(current
->topbacktracks
, LABEL());
10413 SLJIT_ASSERT(!current
->nextbacktracks
);
10415 while (*cc
== OP_ALT
);
10419 SLJIT_ASSERT(opcode
== OP_COND
|| opcode
== OP_SCOND
);
10420 assert = CURRENT_AS(bracket_backtrack
)->u
.assert;
10421 if ((ccbegin
[1 + LINK_SIZE
] == OP_ASSERT_NOT
|| ccbegin
[1 + LINK_SIZE
] == OP_ASSERTBACK_NOT
) && assert->framesize
>= 0)
10423 OP1(SLJIT_MOV
, STACK_TOP
, 0, SLJIT_MEM1(SLJIT_SP
), assert->private_data_ptr
);
10424 add_jump(compiler
, &common
->revertframes
, JUMP(SLJIT_FAST_CALL
));
10425 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), assert->private_data_ptr
, SLJIT_MEM1(STACK_TOP
), STACK(-assert->framesize
- 1));
10430 /* Free the STR_PTR. */
10431 if (private_data_ptr
== 0)
10432 free_stack(common
, 1);
10437 /* Using both tmp register is better for instruction scheduling. */
10438 if (common
->optimized_cbracket
[offset
>> 1] != 0)
10440 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(STACK_TOP
), STACK(0));
10441 OP1(SLJIT_MOV
, TMP2
, 0, SLJIT_MEM1(STACK_TOP
), STACK(1));
10442 free_stack(common
, 2);
10443 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), OVECTOR(offset
), TMP1
, 0);
10444 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), OVECTOR(offset
+ 1), TMP2
, 0);
10448 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(STACK_TOP
), STACK(0));
10449 free_stack(common
, 1);
10450 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), private_data_ptr
, TMP1
, 0);
10453 else if (opcode
== OP_SBRA
|| opcode
== OP_SCOND
)
10455 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), private_data_ptr
, SLJIT_MEM1(STACK_TOP
), STACK(0));
10456 free_stack(common
, 1);
10458 else if (opcode
== OP_ONCE
)
10460 cc
= ccbegin
+ GET(ccbegin
, 1);
10461 stacksize
= needs_control_head
? 1 : 0;
10463 if (CURRENT_AS(bracket_backtrack
)->u
.framesize
>= 0)
10465 /* Reset head and drop saved frame. */
10466 stacksize
+= CURRENT_AS(bracket_backtrack
)->u
.framesize
+ ((ket
!= OP_KET
|| *cc
== OP_ALT
) ? 2 : 1);
10468 else if (ket
== OP_KETRMAX
|| (*cc
== OP_ALT
&& ket
!= OP_KETRMIN
))
10470 /* The STR_PTR must be released. */
10475 free_stack(common
, stacksize
);
10478 /* Restore previous private_data_ptr */
10479 if (CURRENT_AS(bracket_backtrack
)->u
.framesize
>= 0)
10480 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), private_data_ptr
, SLJIT_MEM1(STACK_TOP
), STACK(-CURRENT_AS(bracket_backtrack
)->u
.framesize
- 1));
10481 else if (ket
== OP_KETRMIN
)
10483 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(STACK_TOP
), STACK(1));
10484 /* See the comment below. */
10485 free_stack(common
, 2);
10486 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), private_data_ptr
, TMP1
, 0);
10490 if (repeat_type
== OP_EXACT
)
10492 OP2(SLJIT_ADD
, TMP1
, 0, SLJIT_MEM1(SLJIT_SP
), repeat_ptr
, SLJIT_IMM
, 1);
10493 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), repeat_ptr
, TMP1
, 0);
10494 CMPTO(SLJIT_LESS_EQUAL
, TMP1
, 0, SLJIT_IMM
, repeat_count
, exact_label
);
10496 else if (ket
== OP_KETRMAX
)
10498 OP1(SLJIT_MOV
, STR_PTR
, 0, SLJIT_MEM1(STACK_TOP
), STACK(0));
10499 if (bra
!= OP_BRAZERO
)
10500 free_stack(common
, 1);
10502 CMPTO(SLJIT_NOT_EQUAL
, STR_PTR
, 0, SLJIT_IMM
, 0, CURRENT_AS(bracket_backtrack
)->recursive_matchingpath
);
10503 if (bra
== OP_BRAZERO
)
10505 OP1(SLJIT_MOV
, STR_PTR
, 0, SLJIT_MEM1(STACK_TOP
), STACK(1));
10506 JUMPTO(SLJIT_JUMP
, CURRENT_AS(bracket_backtrack
)->zero_matchingpath
);
10508 free_stack(common
, 1);
10511 else if (ket
== OP_KETRMIN
)
10513 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(STACK_TOP
), STACK(0));
10515 /* OP_ONCE removes everything in case of a backtrack, so we don't
10516 need to explicitly release the STR_PTR. The extra release would
10517 affect badly the free_stack(2) above. */
10518 if (opcode
!= OP_ONCE
)
10519 free_stack(common
, 1);
10520 CMPTO(SLJIT_NOT_EQUAL
, TMP1
, 0, SLJIT_IMM
, 0, rmin_label
);
10521 if (opcode
== OP_ONCE
)
10522 free_stack(common
, bra
== OP_BRAMINZERO
? 2 : 1);
10523 else if (bra
== OP_BRAMINZERO
)
10524 free_stack(common
, 1);
10526 else if (bra
== OP_BRAZERO
)
10528 OP1(SLJIT_MOV
, STR_PTR
, 0, SLJIT_MEM1(STACK_TOP
), STACK(0));
10529 JUMPTO(SLJIT_JUMP
, CURRENT_AS(bracket_backtrack
)->zero_matchingpath
);
10534 static SLJIT_INLINE
void compile_bracketpos_backtrackingpath(compiler_common
*common
, struct backtrack_common
*current
)
10538 struct sljit_jump
*jump
;
10540 if (CURRENT_AS(bracketpos_backtrack
)->framesize
< 0)
10542 if (*current
->cc
== OP_CBRAPOS
|| *current
->cc
== OP_SCBRAPOS
)
10544 offset
= (GET2(current
->cc
, 1 + LINK_SIZE
)) << 1;
10545 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(STACK_TOP
), STACK(0));
10546 OP1(SLJIT_MOV
, TMP2
, 0, SLJIT_MEM1(STACK_TOP
), STACK(1));
10547 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), OVECTOR(offset
), TMP1
, 0);
10548 if (common
->capture_last_ptr
!= 0)
10549 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(STACK_TOP
), STACK(2));
10550 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), OVECTOR(offset
+ 1), TMP2
, 0);
10551 if (common
->capture_last_ptr
!= 0)
10552 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), common
->capture_last_ptr
, TMP1
, 0);
10554 set_jumps(current
->topbacktracks
, LABEL());
10555 free_stack(common
, CURRENT_AS(bracketpos_backtrack
)->stacksize
);
10559 OP1(SLJIT_MOV
, STACK_TOP
, 0, SLJIT_MEM1(SLJIT_SP
), CURRENT_AS(bracketpos_backtrack
)->private_data_ptr
);
10560 add_jump(compiler
, &common
->revertframes
, JUMP(SLJIT_FAST_CALL
));
10562 if (current
->topbacktracks
)
10564 jump
= JUMP(SLJIT_JUMP
);
10565 set_jumps(current
->topbacktracks
, LABEL());
10566 /* Drop the stack frame. */
10567 free_stack(common
, CURRENT_AS(bracketpos_backtrack
)->stacksize
);
10570 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), CURRENT_AS(bracketpos_backtrack
)->private_data_ptr
, SLJIT_MEM1(STACK_TOP
), STACK(-CURRENT_AS(bracketpos_backtrack
)->framesize
- 1));
10573 static SLJIT_INLINE
void compile_braminzero_backtrackingpath(compiler_common
*common
, struct backtrack_common
*current
)
10575 assert_backtrack backtrack
;
10577 current
->top
= NULL
;
10578 current
->topbacktracks
= NULL
;
10579 current
->nextbacktracks
= NULL
;
10580 if (current
->cc
[1] > OP_ASSERTBACK_NOT
)
10582 /* Manual call of compile_bracket_matchingpath and compile_bracket_backtrackingpath. */
10583 compile_bracket_matchingpath(common
, current
->cc
, current
);
10584 compile_bracket_backtrackingpath(common
, current
->top
);
10588 memset(&backtrack
, 0, sizeof(backtrack
));
10589 backtrack
.common
.cc
= current
->cc
;
10590 backtrack
.matchingpath
= CURRENT_AS(braminzero_backtrack
)->matchingpath
;
10591 /* Manual call of compile_assert_matchingpath. */
10592 compile_assert_matchingpath(common
, current
->cc
, &backtrack
, FALSE
);
10594 SLJIT_ASSERT(!current
->nextbacktracks
&& !current
->topbacktracks
);
10597 static SLJIT_INLINE
void compile_control_verb_backtrackingpath(compiler_common
*common
, struct backtrack_common
*current
)
10600 pcre_uchar opcode
= *current
->cc
;
10601 struct sljit_label
*loop
;
10602 struct sljit_jump
*jump
;
10604 if (opcode
== OP_THEN
|| opcode
== OP_THEN_ARG
)
10606 if (common
->then_trap
!= NULL
)
10608 SLJIT_ASSERT(common
->control_head_ptr
!= 0);
10610 OP1(SLJIT_MOV
, STACK_TOP
, 0, SLJIT_MEM1(SLJIT_SP
), common
->control_head_ptr
);
10611 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_IMM
, type_then_trap
);
10612 OP1(SLJIT_MOV
, TMP2
, 0, SLJIT_IMM
, common
->then_trap
->start
);
10613 jump
= JUMP(SLJIT_JUMP
);
10616 OP1(SLJIT_MOV
, STACK_TOP
, 0, SLJIT_MEM1(STACK_TOP
), STACK(0));
10618 CMPTO(SLJIT_NOT_EQUAL
, SLJIT_MEM1(STACK_TOP
), STACK(1), TMP1
, 0, loop
);
10619 CMPTO(SLJIT_NOT_EQUAL
, SLJIT_MEM1(STACK_TOP
), STACK(2), TMP2
, 0, loop
);
10620 add_jump(compiler
, &common
->then_trap
->quit
, JUMP(SLJIT_JUMP
));
10623 else if (common
->positive_assert
)
10625 add_jump(compiler
, &common
->positive_assert_quit
, JUMP(SLJIT_JUMP
));
10630 if (common
->local_exit
)
10632 if (common
->quit_label
== NULL
)
10633 add_jump(compiler
, &common
->quit
, JUMP(SLJIT_JUMP
));
10635 JUMPTO(SLJIT_JUMP
, common
->quit_label
);
10639 if (opcode
== OP_SKIP_ARG
)
10641 SLJIT_ASSERT(common
->control_head_ptr
!= 0);
10642 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(SLJIT_SP
), common
->control_head_ptr
);
10643 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), LOCALS0
, STACK_TOP
, 0);
10644 OP1(SLJIT_MOV
, STACK_TOP
, 0, SLJIT_IMM
, (sljit_sw
)(current
->cc
+ 2));
10645 sljit_emit_icall(compiler
, SLJIT_CALL
, SLJIT_RET(SW
) | SLJIT_ARG1(SW
) | SLJIT_ARG2(SW
), SLJIT_IMM
, SLJIT_FUNC_OFFSET(do_search_mark
));
10646 OP1(SLJIT_MOV
, STACK_TOP
, 0, SLJIT_MEM1(SLJIT_SP
), LOCALS0
);
10648 OP1(SLJIT_MOV
, STR_PTR
, 0, TMP1
, 0);
10649 add_jump(compiler
, &common
->reset_match
, CMP(SLJIT_NOT_EQUAL
, STR_PTR
, 0, SLJIT_IMM
, 0));
10653 if (opcode
== OP_SKIP
)
10654 OP1(SLJIT_MOV
, STR_PTR
, 0, SLJIT_MEM1(STACK_TOP
), STACK(0));
10656 OP1(SLJIT_MOV
, STR_PTR
, 0, SLJIT_IMM
, 0);
10657 add_jump(compiler
, &common
->reset_match
, JUMP(SLJIT_JUMP
));
10660 static SLJIT_INLINE
void compile_then_trap_backtrackingpath(compiler_common
*common
, struct backtrack_common
*current
)
10663 struct sljit_jump
*jump
;
10666 if (CURRENT_AS(then_trap_backtrack
)->then_trap
)
10668 common
->then_trap
= CURRENT_AS(then_trap_backtrack
)->then_trap
;
10672 size
= CURRENT_AS(then_trap_backtrack
)->framesize
;
10673 size
= 3 + (size
< 0 ? 0 : size
);
10675 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(STACK_TOP
), STACK(size
- 3));
10676 free_stack(common
, size
);
10677 jump
= JUMP(SLJIT_JUMP
);
10679 set_jumps(CURRENT_AS(then_trap_backtrack
)->quit
, LABEL());
10680 /* STACK_TOP is set by THEN. */
10681 if (CURRENT_AS(then_trap_backtrack
)->framesize
>= 0)
10682 add_jump(compiler
, &common
->revertframes
, JUMP(SLJIT_FAST_CALL
));
10683 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(STACK_TOP
), STACK(0));
10684 free_stack(common
, 3);
10687 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), common
->control_head_ptr
, TMP1
, 0);
10690 static void compile_backtrackingpath(compiler_common
*common
, struct backtrack_common
*current
)
10693 then_trap_backtrack
*save_then_trap
= common
->then_trap
;
10697 if (current
->nextbacktracks
!= NULL
)
10698 set_jumps(current
->nextbacktracks
, LABEL());
10699 switch(*current
->cc
)
10702 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(STACK_TOP
), STACK(0));
10703 free_stack(common
, 1);
10704 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), OVECTOR(0), TMP1
, 0);
10734 case OP_NOTMINSTAR
:
10736 case OP_NOTMINPLUS
:
10738 case OP_NOTMINQUERY
:
10740 case OP_NOTMINUPTO
:
10742 case OP_NOTPOSSTAR
:
10743 case OP_NOTPOSPLUS
:
10744 case OP_NOTPOSQUERY
:
10745 case OP_NOTPOSUPTO
:
10747 case OP_NOTMINSTARI
:
10749 case OP_NOTMINPLUSI
:
10751 case OP_NOTMINQUERYI
:
10753 case OP_NOTMINUPTOI
:
10755 case OP_NOTPOSSTARI
:
10756 case OP_NOTPOSPLUSI
:
10757 case OP_NOTPOSQUERYI
:
10758 case OP_NOTPOSUPTOI
:
10760 case OP_TYPEMINSTAR
:
10762 case OP_TYPEMINPLUS
:
10764 case OP_TYPEMINQUERY
:
10766 case OP_TYPEMINUPTO
:
10768 case OP_TYPEPOSSTAR
:
10769 case OP_TYPEPOSPLUS
:
10770 case OP_TYPEPOSQUERY
:
10771 case OP_TYPEPOSUPTO
:
10774 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
10777 compile_iterator_backtrackingpath(common
, current
);
10784 compile_ref_iterator_backtrackingpath(common
, current
);
10788 compile_recurse_backtrackingpath(common
, current
);
10792 case OP_ASSERT_NOT
:
10793 case OP_ASSERTBACK
:
10794 case OP_ASSERTBACK_NOT
:
10795 compile_assert_backtrackingpath(common
, current
);
10806 compile_bracket_backtrackingpath(common
, current
);
10810 if (current
->cc
[1] > OP_ASSERTBACK_NOT
)
10811 compile_bracket_backtrackingpath(common
, current
);
10813 compile_assert_backtrackingpath(common
, current
);
10820 case OP_BRAPOSZERO
:
10821 compile_bracketpos_backtrackingpath(common
, current
);
10824 case OP_BRAMINZERO
:
10825 compile_braminzero_backtrackingpath(common
, current
);
10829 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(STACK_TOP
), STACK(common
->has_skip_arg
? 4 : 0));
10830 if (common
->has_skip_arg
)
10831 OP1(SLJIT_MOV
, TMP2
, 0, SLJIT_MEM1(STACK_TOP
), STACK(0));
10832 free_stack(common
, common
->has_skip_arg
? 5 : 1);
10833 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), common
->mark_ptr
, TMP1
, 0);
10834 if (common
->has_skip_arg
)
10835 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), common
->control_head_ptr
, TMP2
, 0);
10844 compile_control_verb_backtrackingpath(common
, current
);
10848 if (!common
->local_exit
)
10849 OP1(SLJIT_MOV
, SLJIT_RETURN_REG
, 0, SLJIT_IMM
, PCRE_ERROR_NOMATCH
);
10850 if (common
->quit_label
== NULL
)
10851 add_jump(compiler
, &common
->quit
, JUMP(SLJIT_JUMP
));
10853 JUMPTO(SLJIT_JUMP
, common
->quit_label
);
10859 case OP_ASSERT_ACCEPT
:
10860 set_jumps(current
->topbacktracks
, LABEL());
10864 /* A virtual opcode for then traps. */
10865 compile_then_trap_backtrackingpath(common
, current
);
10869 SLJIT_UNREACHABLE();
10872 current
= current
->prev
;
10874 common
->then_trap
= save_then_trap
;
10877 static SLJIT_INLINE
void compile_recurse(compiler_common
*common
)
10880 pcre_uchar
*cc
= common
->start
+ common
->currententry
->start
;
10881 pcre_uchar
*ccbegin
= cc
+ 1 + LINK_SIZE
+ (*cc
== OP_BRA
? 0 : IMM2_SIZE
);
10882 pcre_uchar
*ccend
= bracketend(cc
) - (1 + LINK_SIZE
);
10883 BOOL needs_control_head
;
10884 int framesize
= get_framesize(common
, cc
, NULL
, TRUE
, &needs_control_head
);
10885 int private_data_size
= get_private_data_copy_length(common
, ccbegin
, ccend
, needs_control_head
);
10886 int alternativesize
;
10888 backtrack_common altbacktrack
;
10889 struct sljit_jump
*jump
;
10891 /* Recurse captures then. */
10892 common
->then_trap
= NULL
;
10894 SLJIT_ASSERT(*cc
== OP_BRA
|| *cc
== OP_CBRA
|| *cc
== OP_CBRAPOS
|| *cc
== OP_SCBRA
|| *cc
== OP_SCBRAPOS
);
10895 needs_frame
= framesize
>= 0;
10898 alternativesize
= *(cc
+ GET(cc
, 1)) == OP_ALT
? 1 : 0;
10900 SLJIT_ASSERT(common
->currententry
->entry
== NULL
&& common
->recursive_head_ptr
!= 0);
10901 common
->currententry
->entry
= LABEL();
10902 set_jumps(common
->currententry
->calls
, common
->currententry
->entry
);
10904 sljit_emit_fast_enter(compiler
, TMP2
, 0);
10905 count_match(common
);
10906 allocate_stack(common
, private_data_size
+ framesize
+ alternativesize
);
10907 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(private_data_size
+ framesize
+ alternativesize
- 1), TMP2
, 0);
10908 copy_private_data(common
, ccbegin
, ccend
, TRUE
, framesize
+ alternativesize
, private_data_size
+ framesize
+ alternativesize
, needs_control_head
);
10909 if (needs_control_head
)
10910 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), common
->control_head_ptr
, SLJIT_IMM
, 0);
10911 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), common
->recursive_head_ptr
, STACK_TOP
, 0);
10913 init_frame(common
, cc
, NULL
, framesize
+ alternativesize
- 1, alternativesize
, TRUE
);
10915 if (alternativesize
> 0)
10916 OP1(SLJIT_MOV
, SLJIT_MEM1(STACK_TOP
), STACK(0), STR_PTR
, 0);
10918 memset(&altbacktrack
, 0, sizeof(backtrack_common
));
10919 common
->quit_label
= NULL
;
10920 common
->accept_label
= NULL
;
10921 common
->quit
= NULL
;
10922 common
->accept
= NULL
;
10923 altbacktrack
.cc
= ccbegin
;
10927 altbacktrack
.top
= NULL
;
10928 altbacktrack
.topbacktracks
= NULL
;
10930 if (altbacktrack
.cc
!= ccbegin
)
10931 OP1(SLJIT_MOV
, STR_PTR
, 0, SLJIT_MEM1(STACK_TOP
), STACK(0));
10933 compile_matchingpath(common
, altbacktrack
.cc
, cc
, &altbacktrack
);
10934 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler
)))
10937 add_jump(compiler
, &common
->accept
, JUMP(SLJIT_JUMP
));
10939 compile_backtrackingpath(common
, altbacktrack
.top
);
10940 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler
)))
10942 set_jumps(altbacktrack
.topbacktracks
, LABEL());
10947 altbacktrack
.cc
= cc
+ 1 + LINK_SIZE
;
10951 /* None of them matched. */
10952 OP1(SLJIT_MOV
, TMP3
, 0, SLJIT_IMM
, 0);
10953 jump
= JUMP(SLJIT_JUMP
);
10955 if (common
->quit
!= NULL
)
10957 set_jumps(common
->quit
, LABEL());
10958 OP1(SLJIT_MOV
, STACK_TOP
, 0, SLJIT_MEM1(SLJIT_SP
), common
->recursive_head_ptr
);
10961 OP2(SLJIT_ADD
, STACK_TOP
, 0, STACK_TOP
, 0, SLJIT_IMM
, (framesize
+ alternativesize
) * sizeof(sljit_sw
));
10962 add_jump(compiler
, &common
->revertframes
, JUMP(SLJIT_FAST_CALL
));
10963 OP2(SLJIT_SUB
, STACK_TOP
, 0, STACK_TOP
, 0, SLJIT_IMM
, (framesize
+ alternativesize
) * sizeof(sljit_sw
));
10965 OP1(SLJIT_MOV
, TMP3
, 0, SLJIT_IMM
, 0);
10966 common
->quit
= NULL
;
10967 add_jump(compiler
, &common
->quit
, JUMP(SLJIT_JUMP
));
10970 set_jumps(common
->accept
, LABEL());
10971 OP1(SLJIT_MOV
, STACK_TOP
, 0, SLJIT_MEM1(SLJIT_SP
), common
->recursive_head_ptr
);
10974 OP2(SLJIT_ADD
, STACK_TOP
, 0, STACK_TOP
, 0, SLJIT_IMM
, (framesize
+ alternativesize
) * sizeof(sljit_sw
));
10975 add_jump(compiler
, &common
->revertframes
, JUMP(SLJIT_FAST_CALL
));
10976 OP2(SLJIT_SUB
, STACK_TOP
, 0, STACK_TOP
, 0, SLJIT_IMM
, (framesize
+ alternativesize
) * sizeof(sljit_sw
));
10978 OP1(SLJIT_MOV
, TMP3
, 0, SLJIT_IMM
, 1);
10981 if (common
->quit
!= NULL
)
10982 set_jumps(common
->quit
, LABEL());
10983 copy_private_data(common
, ccbegin
, ccend
, FALSE
, framesize
+ alternativesize
, private_data_size
+ framesize
+ alternativesize
, needs_control_head
);
10984 free_stack(common
, private_data_size
+ framesize
+ alternativesize
);
10985 if (needs_control_head
)
10987 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(STACK_TOP
), STACK(-3));
10988 OP1(SLJIT_MOV
, TMP2
, 0, SLJIT_MEM1(STACK_TOP
), STACK(-2));
10989 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), common
->recursive_head_ptr
, TMP1
, 0);
10990 OP1(SLJIT_MOV
, TMP1
, 0, TMP3
, 0);
10991 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), common
->control_head_ptr
, TMP2
, 0);
10995 OP1(SLJIT_MOV
, TMP2
, 0, SLJIT_MEM1(STACK_TOP
), STACK(-2));
10996 OP1(SLJIT_MOV
, TMP1
, 0, TMP3
, 0);
10997 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), common
->recursive_head_ptr
, TMP2
, 0);
10999 sljit_emit_fast_return(compiler
, SLJIT_MEM1(STACK_TOP
), STACK(-1));
11002 #undef COMPILE_BACKTRACKINGPATH
11006 PRIV(jit_compile
)(const REAL_PCRE
*re
, PUBL(extra
) *extra
, int mode
)
11008 struct sljit_compiler
*compiler
;
11009 backtrack_common rootbacktrack
;
11010 compiler_common common_data
;
11011 compiler_common
*common
= &common_data
;
11012 const sljit_u8
*tables
= re
->tables
;
11013 pcre_study_data
*study
;
11014 int private_data_size
;
11016 executable_functions
*functions
;
11017 void *executable_func
;
11018 sljit_uw executable_size
;
11019 sljit_uw total_length
;
11020 label_addr_list
*label_addr
;
11021 struct sljit_label
*mainloop_label
= NULL
;
11022 struct sljit_label
*continue_match_label
;
11023 struct sljit_label
*empty_match_found_label
= NULL
;
11024 struct sljit_label
*empty_match_backtrack_label
= NULL
;
11025 struct sljit_label
*reset_match_label
;
11026 struct sljit_label
*quit_label
;
11027 struct sljit_jump
*jump
;
11028 struct sljit_jump
*minlength_check_failed
= NULL
;
11029 struct sljit_jump
*reqbyte_notfound
= NULL
;
11030 struct sljit_jump
*empty_match
= NULL
;
11032 SLJIT_ASSERT((extra
->flags
& PCRE_EXTRA_STUDY_DATA
) != 0);
11033 study
= extra
->study_data
;
11036 tables
= PRIV(default_tables
);
11038 memset(&rootbacktrack
, 0, sizeof(backtrack_common
));
11039 memset(common
, 0, sizeof(compiler_common
));
11040 rootbacktrack
.cc
= (pcre_uchar
*)re
+ re
->name_table_offset
+ re
->name_count
* re
->name_entry_size
;
11042 common
->start
= rootbacktrack
.cc
;
11043 common
->read_only_data_head
= NULL
;
11044 common
->fcc
= tables
+ fcc_offset
;
11045 common
->lcc
= (sljit_sw
)(tables
+ lcc_offset
);
11046 common
->mode
= mode
;
11047 common
->might_be_empty
= study
->minlength
== 0;
11048 common
->nltype
= NLTYPE_FIXED
;
11049 switch(re
->options
& PCRE_NEWLINE_BITS
)
11052 /* Compile-time default */
11055 case -1: common
->newline
= (CHAR_CR
<< 8) | CHAR_NL
; common
->nltype
= NLTYPE_ANY
; break;
11056 case -2: common
->newline
= (CHAR_CR
<< 8) | CHAR_NL
; common
->nltype
= NLTYPE_ANYCRLF
; break;
11057 default: common
->newline
= NEWLINE
; break;
11060 case PCRE_NEWLINE_CR
: common
->newline
= CHAR_CR
; break;
11061 case PCRE_NEWLINE_LF
: common
->newline
= CHAR_NL
; break;
11062 case PCRE_NEWLINE_CR
+
11063 PCRE_NEWLINE_LF
: common
->newline
= (CHAR_CR
<< 8) | CHAR_NL
; break;
11064 case PCRE_NEWLINE_ANY
: common
->newline
= (CHAR_CR
<< 8) | CHAR_NL
; common
->nltype
= NLTYPE_ANY
; break;
11065 case PCRE_NEWLINE_ANYCRLF
: common
->newline
= (CHAR_CR
<< 8) | CHAR_NL
; common
->nltype
= NLTYPE_ANYCRLF
; break;
11068 common
->nlmax
= READ_CHAR_MAX
;
11070 if ((re
->options
& PCRE_BSR_ANYCRLF
) != 0)
11071 common
->bsr_nltype
= NLTYPE_ANYCRLF
;
11072 else if ((re
->options
& PCRE_BSR_UNICODE
) != 0)
11073 common
->bsr_nltype
= NLTYPE_ANY
;
11077 common
->bsr_nltype
= NLTYPE_ANYCRLF
;
11079 common
->bsr_nltype
= NLTYPE_ANY
;
11082 common
->bsr_nlmax
= READ_CHAR_MAX
;
11083 common
->bsr_nlmin
= 0;
11084 common
->endonly
= (re
->options
& PCRE_DOLLAR_ENDONLY
) != 0;
11085 common
->ctypes
= (sljit_sw
)(tables
+ ctypes_offset
);
11086 common
->name_table
= ((pcre_uchar
*)re
) + re
->name_table_offset
;
11087 common
->name_count
= re
->name_count
;
11088 common
->name_entry_size
= re
->name_entry_size
;
11089 common
->jscript_compat
= (re
->options
& PCRE_JAVASCRIPT_COMPAT
) != 0;
11091 /* PCRE_UTF[16|32] have the same value as PCRE_UTF8. */
11092 common
->utf
= (re
->options
& PCRE_UTF8
) != 0;
11094 common
->use_ucp
= (re
->options
& PCRE_UCP
) != 0;
11098 if (common
->nltype
== NLTYPE_ANY
)
11099 common
->nlmax
= 0x2029;
11100 else if (common
->nltype
== NLTYPE_ANYCRLF
)
11101 common
->nlmax
= (CHAR_CR
> CHAR_NL
) ? CHAR_CR
: CHAR_NL
;
11104 /* We only care about the first newline character. */
11105 common
->nlmax
= common
->newline
& 0xff;
11108 if (common
->nltype
== NLTYPE_FIXED
)
11109 common
->nlmin
= common
->newline
& 0xff;
11111 common
->nlmin
= (CHAR_CR
< CHAR_NL
) ? CHAR_CR
: CHAR_NL
;
11113 if (common
->bsr_nltype
== NLTYPE_ANY
)
11114 common
->bsr_nlmax
= 0x2029;
11116 common
->bsr_nlmax
= (CHAR_CR
> CHAR_NL
) ? CHAR_CR
: CHAR_NL
;
11117 common
->bsr_nlmin
= (CHAR_CR
< CHAR_NL
) ? CHAR_CR
: CHAR_NL
;
11119 #endif /* SUPPORT_UTF */
11120 ccend
= bracketend(common
->start
);
11122 /* Calculate the local space size on the stack. */
11123 common
->ovector_start
= LIMIT_MATCH
+ sizeof(sljit_sw
);
11124 common
->optimized_cbracket
= (sljit_u8
*)SLJIT_MALLOC(re
->top_bracket
+ 1, compiler
->allocator_data
);
11125 if (!common
->optimized_cbracket
)
11127 #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 1
11128 memset(common
->optimized_cbracket
, 0, re
->top_bracket
+ 1);
11130 memset(common
->optimized_cbracket
, 1, re
->top_bracket
+ 1);
11133 SLJIT_ASSERT(*common
->start
== OP_BRA
&& ccend
[-(1 + LINK_SIZE
)] == OP_KET
);
11134 #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 2
11135 common
->capture_last_ptr
= common
->ovector_start
;
11136 common
->ovector_start
+= sizeof(sljit_sw
);
11138 if (!check_opcode_types(common
, common
->start
, ccend
))
11140 SLJIT_FREE(common
->optimized_cbracket
, compiler
->allocator_data
);
11144 /* Checking flags and updating ovector_start. */
11145 if (mode
== JIT_COMPILE
&& (re
->flags
& PCRE_REQCHSET
) != 0 && (re
->options
& PCRE_NO_START_OPTIMIZE
) == 0)
11147 common
->req_char_ptr
= common
->ovector_start
;
11148 common
->ovector_start
+= sizeof(sljit_sw
);
11150 if (mode
!= JIT_COMPILE
)
11152 common
->start_used_ptr
= common
->ovector_start
;
11153 common
->ovector_start
+= sizeof(sljit_sw
);
11154 if (mode
== JIT_PARTIAL_SOFT_COMPILE
)
11156 common
->hit_start
= common
->ovector_start
;
11157 common
->ovector_start
+= 2 * sizeof(sljit_sw
);
11160 if ((re
->options
& PCRE_FIRSTLINE
) != 0)
11162 common
->match_end_ptr
= common
->ovector_start
;
11163 common
->ovector_start
+= sizeof(sljit_sw
);
11165 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
11166 common
->control_head_ptr
= 1;
11168 if (common
->control_head_ptr
!= 0)
11170 common
->control_head_ptr
= common
->ovector_start
;
11171 common
->ovector_start
+= sizeof(sljit_sw
);
11173 if (common
->has_set_som
)
11175 /* Saving the real start pointer is necessary. */
11176 common
->start_ptr
= common
->ovector_start
;
11177 common
->ovector_start
+= sizeof(sljit_sw
);
11180 /* Aligning ovector to even number of sljit words. */
11181 if ((common
->ovector_start
& sizeof(sljit_sw
)) != 0)
11182 common
->ovector_start
+= sizeof(sljit_sw
);
11184 if (common
->start_ptr
== 0)
11185 common
->start_ptr
= OVECTOR(0);
11187 /* Capturing brackets cannot be optimized if callouts are allowed. */
11188 if (common
->capture_last_ptr
!= 0)
11189 memset(common
->optimized_cbracket
, 0, re
->top_bracket
+ 1);
11191 SLJIT_ASSERT(!(common
->req_char_ptr
!= 0 && common
->start_used_ptr
!= 0));
11192 common
->cbra_ptr
= OVECTOR_START
+ (re
->top_bracket
+ 1) * 2 * sizeof(sljit_sw
);
11194 total_length
= ccend
- common
->start
;
11195 common
->private_data_ptrs
= (sljit_s32
*)SLJIT_MALLOC(total_length
* (sizeof(sljit_s32
) + (common
->has_then
? 1 : 0)), compiler
->allocator_data
);
11196 if (!common
->private_data_ptrs
)
11198 SLJIT_FREE(common
->optimized_cbracket
, compiler
->allocator_data
);
11201 memset(common
->private_data_ptrs
, 0, total_length
* sizeof(sljit_s32
));
11203 private_data_size
= common
->cbra_ptr
+ (re
->top_bracket
+ 1) * sizeof(sljit_sw
);
11204 set_private_data_ptrs(common
, &private_data_size
, ccend
);
11205 if ((re
->options
& PCRE_ANCHORED
) == 0 && (re
->options
& PCRE_NO_START_OPTIMIZE
) == 0)
11207 if (!detect_fast_forward_skip(common
, &private_data_size
) && !common
->has_skip_in_assert_back
)
11208 detect_fast_fail(common
, common
->start
, &private_data_size
, 4);
11211 SLJIT_ASSERT(common
->fast_fail_start_ptr
<= common
->fast_fail_end_ptr
);
11213 if (private_data_size
> SLJIT_MAX_LOCAL_SIZE
)
11215 SLJIT_FREE(common
->private_data_ptrs
, compiler
->allocator_data
);
11216 SLJIT_FREE(common
->optimized_cbracket
, compiler
->allocator_data
);
11220 if (common
->has_then
)
11222 common
->then_offsets
= (sljit_u8
*)(common
->private_data_ptrs
+ total_length
);
11223 memset(common
->then_offsets
, 0, total_length
);
11224 set_then_offsets(common
, common
->start
, NULL
);
11227 compiler
= sljit_create_compiler(NULL
);
11230 SLJIT_FREE(common
->optimized_cbracket
, compiler
->allocator_data
);
11231 SLJIT_FREE(common
->private_data_ptrs
, compiler
->allocator_data
);
11234 common
->compiler
= compiler
;
11236 /* Main pcre_jit_exec entry. */
11237 sljit_emit_enter(compiler
, 0, SLJIT_ARG1(SW
), 5, 5, 0, 0, private_data_size
);
11239 /* Register init. */
11240 reset_ovector(common
, (re
->top_bracket
+ 1) * 2);
11241 if (common
->req_char_ptr
!= 0)
11242 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), common
->req_char_ptr
, SLJIT_R0
, 0);
11244 OP1(SLJIT_MOV
, ARGUMENTS
, 0, SLJIT_S0
, 0);
11245 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_S0
, 0);
11246 OP1(SLJIT_MOV
, STR_PTR
, 0, SLJIT_MEM1(TMP1
), SLJIT_OFFSETOF(jit_arguments
, str
));
11247 OP1(SLJIT_MOV
, STR_END
, 0, SLJIT_MEM1(TMP1
), SLJIT_OFFSETOF(jit_arguments
, end
));
11248 OP1(SLJIT_MOV
, TMP2
, 0, SLJIT_MEM1(TMP1
), SLJIT_OFFSETOF(jit_arguments
, stack
));
11249 OP1(SLJIT_MOV_U32
, TMP1
, 0, SLJIT_MEM1(TMP1
), SLJIT_OFFSETOF(jit_arguments
, limit_match
));
11250 OP1(SLJIT_MOV
, STACK_TOP
, 0, SLJIT_MEM1(TMP2
), SLJIT_OFFSETOF(struct sljit_stack
, end
));
11251 OP1(SLJIT_MOV
, STACK_LIMIT
, 0, SLJIT_MEM1(TMP2
), SLJIT_OFFSETOF(struct sljit_stack
, start
));
11252 OP2(SLJIT_ADD
, TMP1
, 0, TMP1
, 0, SLJIT_IMM
, 1);
11253 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), LIMIT_MATCH
, TMP1
, 0);
11255 if (common
->fast_fail_start_ptr
< common
->fast_fail_end_ptr
)
11256 reset_fast_fail(common
);
11258 if (mode
== JIT_PARTIAL_SOFT_COMPILE
)
11259 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), common
->hit_start
, SLJIT_IMM
, -1);
11260 if (common
->mark_ptr
!= 0)
11261 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), common
->mark_ptr
, SLJIT_IMM
, 0);
11262 if (common
->control_head_ptr
!= 0)
11263 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), common
->control_head_ptr
, SLJIT_IMM
, 0);
11265 /* Main part of the matching */
11266 if ((re
->options
& PCRE_ANCHORED
) == 0)
11268 mainloop_label
= mainloop_entry(common
, (re
->flags
& PCRE_HASCRORLF
) != 0);
11269 continue_match_label
= LABEL();
11270 /* Forward search if possible. */
11271 if ((re
->options
& PCRE_NO_START_OPTIMIZE
) == 0)
11273 if (mode
== JIT_COMPILE
&& fast_forward_first_n_chars(common
))
11275 else if ((re
->flags
& PCRE_FIRSTSET
) != 0)
11276 fast_forward_first_char(common
, (pcre_uchar
)re
->first_char
, (re
->flags
& PCRE_FCH_CASELESS
) != 0);
11277 else if ((re
->flags
& PCRE_STARTLINE
) != 0)
11278 fast_forward_newline(common
);
11279 else if (study
!= NULL
&& (study
->flags
& PCRE_STUDY_MAPPED
) != 0)
11280 fast_forward_start_bits(common
, study
->start_bits
);
11284 continue_match_label
= LABEL();
11286 if (mode
== JIT_COMPILE
&& study
->minlength
> 0 && (re
->options
& PCRE_NO_START_OPTIMIZE
) == 0)
11288 OP1(SLJIT_MOV
, SLJIT_RETURN_REG
, 0, SLJIT_IMM
, PCRE_ERROR_NOMATCH
);
11289 OP2(SLJIT_ADD
, TMP2
, 0, STR_PTR
, 0, SLJIT_IMM
, IN_UCHARS(study
->minlength
));
11290 minlength_check_failed
= CMP(SLJIT_GREATER
, TMP2
, 0, STR_END
, 0);
11292 if (common
->req_char_ptr
!= 0)
11293 reqbyte_notfound
= search_requested_char(common
, (pcre_uchar
)re
->req_char
, (re
->flags
& PCRE_RCH_CASELESS
) != 0, (re
->flags
& PCRE_FIRSTSET
) != 0);
11295 /* Store the current STR_PTR in OVECTOR(0). */
11296 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), OVECTOR(0), STR_PTR
, 0);
11297 /* Copy the limit of allowed recursions. */
11298 OP1(SLJIT_MOV
, COUNT_MATCH
, 0, SLJIT_MEM1(SLJIT_SP
), LIMIT_MATCH
);
11299 if (common
->capture_last_ptr
!= 0)
11300 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), common
->capture_last_ptr
, SLJIT_IMM
, -1);
11301 if (common
->fast_forward_bc_ptr
!= NULL
)
11302 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), PRIVATE_DATA(common
->fast_forward_bc_ptr
+ 1), STR_PTR
, 0);
11304 if (common
->start_ptr
!= OVECTOR(0))
11305 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), common
->start_ptr
, STR_PTR
, 0);
11307 /* Copy the beginning of the string. */
11308 if (mode
== JIT_PARTIAL_SOFT_COMPILE
)
11310 jump
= CMP(SLJIT_NOT_EQUAL
, SLJIT_MEM1(SLJIT_SP
), common
->hit_start
, SLJIT_IMM
, -1);
11311 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), common
->start_used_ptr
, STR_PTR
, 0);
11312 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), common
->hit_start
+ sizeof(sljit_sw
), STR_PTR
, 0);
11315 else if (mode
== JIT_PARTIAL_HARD_COMPILE
)
11316 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), common
->start_used_ptr
, STR_PTR
, 0);
11318 compile_matchingpath(common
, common
->start
, ccend
, &rootbacktrack
);
11319 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler
)))
11321 sljit_free_compiler(compiler
);
11322 SLJIT_FREE(common
->optimized_cbracket
, compiler
->allocator_data
);
11323 SLJIT_FREE(common
->private_data_ptrs
, compiler
->allocator_data
);
11324 free_read_only_data(common
->read_only_data_head
, compiler
->allocator_data
);
11328 if (common
->might_be_empty
)
11330 empty_match
= CMP(SLJIT_EQUAL
, STR_PTR
, 0, SLJIT_MEM1(SLJIT_SP
), OVECTOR(0));
11331 empty_match_found_label
= LABEL();
11334 common
->accept_label
= LABEL();
11335 if (common
->accept
!= NULL
)
11336 set_jumps(common
->accept
, common
->accept_label
);
11338 /* This means we have a match. Update the ovector. */
11339 copy_ovector(common
, re
->top_bracket
+ 1);
11340 common
->quit_label
= common
->forced_quit_label
= LABEL();
11341 if (common
->quit
!= NULL
)
11342 set_jumps(common
->quit
, common
->quit_label
);
11343 if (common
->forced_quit
!= NULL
)
11344 set_jumps(common
->forced_quit
, common
->forced_quit_label
);
11345 if (minlength_check_failed
!= NULL
)
11346 SET_LABEL(minlength_check_failed
, common
->forced_quit_label
);
11347 sljit_emit_return(compiler
, SLJIT_MOV
, SLJIT_RETURN_REG
, 0);
11349 if (mode
!= JIT_COMPILE
)
11351 common
->partialmatchlabel
= LABEL();
11352 set_jumps(common
->partialmatch
, common
->partialmatchlabel
);
11353 return_with_partial_match(common
, common
->quit_label
);
11356 if (common
->might_be_empty
)
11357 empty_match_backtrack_label
= LABEL();
11358 compile_backtrackingpath(common
, rootbacktrack
.top
);
11359 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler
)))
11361 sljit_free_compiler(compiler
);
11362 SLJIT_FREE(common
->optimized_cbracket
, compiler
->allocator_data
);
11363 SLJIT_FREE(common
->private_data_ptrs
, compiler
->allocator_data
);
11364 free_read_only_data(common
->read_only_data_head
, compiler
->allocator_data
);
11368 SLJIT_ASSERT(rootbacktrack
.prev
== NULL
);
11369 reset_match_label
= LABEL();
11371 if (mode
== JIT_PARTIAL_SOFT_COMPILE
)
11373 /* Update hit_start only in the first time. */
11374 jump
= CMP(SLJIT_NOT_EQUAL
, SLJIT_MEM1(SLJIT_SP
), common
->hit_start
, SLJIT_IMM
, 0);
11375 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(SLJIT_SP
), common
->start_used_ptr
);
11376 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), common
->start_used_ptr
, SLJIT_IMM
, -1);
11377 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), common
->hit_start
, TMP1
, 0);
11381 /* Check we have remaining characters. */
11382 if ((re
->options
& PCRE_ANCHORED
) == 0 && (re
->options
& PCRE_FIRSTLINE
) != 0)
11384 SLJIT_ASSERT(common
->match_end_ptr
!= 0);
11385 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(SLJIT_SP
), common
->match_end_ptr
);
11388 OP1(SLJIT_MOV
, STR_PTR
, 0, SLJIT_MEM1(SLJIT_SP
),
11389 (common
->fast_forward_bc_ptr
!= NULL
) ? (PRIVATE_DATA(common
->fast_forward_bc_ptr
+ 1)) : common
->start_ptr
);
11391 if ((re
->options
& PCRE_ANCHORED
) == 0)
11393 if (common
->ff_newline_shortcut
!= NULL
)
11395 if ((re
->options
& PCRE_FIRSTLINE
) == 0)
11396 CMPTO(SLJIT_LESS
, STR_PTR
, 0, STR_END
, 0, common
->ff_newline_shortcut
);
11397 /* There cannot be more newlines here. */
11400 CMPTO(SLJIT_LESS
, STR_PTR
, 0, ((re
->options
& PCRE_FIRSTLINE
) == 0) ? STR_END
: TMP1
, 0, mainloop_label
);
11403 /* No more remaining characters. */
11404 if (reqbyte_notfound
!= NULL
)
11405 JUMPHERE(reqbyte_notfound
);
11407 if (mode
== JIT_PARTIAL_SOFT_COMPILE
)
11408 CMPTO(SLJIT_NOT_EQUAL
, SLJIT_MEM1(SLJIT_SP
), common
->hit_start
, SLJIT_IMM
, -1, common
->partialmatchlabel
);
11410 OP1(SLJIT_MOV
, SLJIT_RETURN_REG
, 0, SLJIT_IMM
, PCRE_ERROR_NOMATCH
);
11411 JUMPTO(SLJIT_JUMP
, common
->quit_label
);
11413 flush_stubs(common
);
11415 if (common
->might_be_empty
)
11417 JUMPHERE(empty_match
);
11418 OP1(SLJIT_MOV
, TMP1
, 0, ARGUMENTS
, 0);
11419 OP1(SLJIT_MOV_U8
, TMP2
, 0, SLJIT_MEM1(TMP1
), SLJIT_OFFSETOF(jit_arguments
, notempty
));
11420 CMPTO(SLJIT_NOT_EQUAL
, TMP2
, 0, SLJIT_IMM
, 0, empty_match_backtrack_label
);
11421 OP1(SLJIT_MOV_U8
, TMP2
, 0, SLJIT_MEM1(TMP1
), SLJIT_OFFSETOF(jit_arguments
, notempty_atstart
));
11422 CMPTO(SLJIT_EQUAL
, TMP2
, 0, SLJIT_IMM
, 0, empty_match_found_label
);
11423 OP1(SLJIT_MOV
, TMP2
, 0, SLJIT_MEM1(TMP1
), SLJIT_OFFSETOF(jit_arguments
, str
));
11424 CMPTO(SLJIT_NOT_EQUAL
, TMP2
, 0, STR_PTR
, 0, empty_match_found_label
);
11425 JUMPTO(SLJIT_JUMP
, empty_match_backtrack_label
);
11428 common
->fast_forward_bc_ptr
= NULL
;
11429 common
->fast_fail_start_ptr
= 0;
11430 common
->fast_fail_end_ptr
= 0;
11431 common
->currententry
= common
->entries
;
11432 common
->local_exit
= TRUE
;
11433 quit_label
= common
->quit_label
;
11434 while (common
->currententry
!= NULL
)
11436 /* Might add new entries. */
11437 compile_recurse(common
);
11438 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler
)))
11440 sljit_free_compiler(compiler
);
11441 SLJIT_FREE(common
->optimized_cbracket
, compiler
->allocator_data
);
11442 SLJIT_FREE(common
->private_data_ptrs
, compiler
->allocator_data
);
11443 free_read_only_data(common
->read_only_data_head
, compiler
->allocator_data
);
11446 flush_stubs(common
);
11447 common
->currententry
= common
->currententry
->next
;
11449 common
->local_exit
= FALSE
;
11450 common
->quit_label
= quit_label
;
11452 /* Allocating stack, returns with PCRE_ERROR_JIT_STACKLIMIT if fails. */
11453 /* This is a (really) rare case. */
11454 set_jumps(common
->stackalloc
, LABEL());
11455 /* RETURN_ADDR is not a saved register. */
11456 sljit_emit_fast_enter(compiler
, SLJIT_MEM1(SLJIT_SP
), LOCALS0
);
11458 SLJIT_ASSERT(TMP1
== SLJIT_R0
&& STACK_TOP
== SLJIT_R1
);
11460 OP1(SLJIT_MOV
, SLJIT_MEM1(SLJIT_SP
), LOCALS1
, STACK_TOP
, 0);
11461 OP1(SLJIT_MOV
, SLJIT_R0
, 0, ARGUMENTS
, 0);
11462 OP2(SLJIT_SUB
, SLJIT_R1
, 0, STACK_LIMIT
, 0, SLJIT_IMM
, STACK_GROWTH_RATE
);
11463 OP1(SLJIT_MOV
, SLJIT_R0
, 0, SLJIT_MEM1(SLJIT_R0
), SLJIT_OFFSETOF(jit_arguments
, stack
));
11464 OP1(SLJIT_MOV
, STACK_LIMIT
, 0, TMP2
, 0);
11466 sljit_emit_icall(compiler
, SLJIT_CALL
, SLJIT_RET(SW
) | SLJIT_ARG1(SW
) | SLJIT_ARG2(SW
), SLJIT_IMM
, SLJIT_FUNC_OFFSET(sljit_stack_resize
));
11467 jump
= CMP(SLJIT_EQUAL
, SLJIT_RETURN_REG
, 0, SLJIT_IMM
, 0);
11468 OP1(SLJIT_MOV
, TMP2
, 0, STACK_LIMIT
, 0);
11469 OP1(SLJIT_MOV
, STACK_LIMIT
, 0, SLJIT_RETURN_REG
, 0);
11470 OP1(SLJIT_MOV
, TMP1
, 0, SLJIT_MEM1(SLJIT_SP
), LOCALS0
);
11471 OP1(SLJIT_MOV
, STACK_TOP
, 0, SLJIT_MEM1(SLJIT_SP
), LOCALS1
);
11472 sljit_emit_fast_return(compiler
, TMP1
, 0);
11474 /* Allocation failed. */
11476 /* We break the return address cache here, but this is a really rare case. */
11477 OP1(SLJIT_MOV
, SLJIT_RETURN_REG
, 0, SLJIT_IMM
, PCRE_ERROR_JIT_STACKLIMIT
);
11478 JUMPTO(SLJIT_JUMP
, common
->quit_label
);
11480 /* Call limit reached. */
11481 set_jumps(common
->calllimit
, LABEL());
11482 OP1(SLJIT_MOV
, SLJIT_RETURN_REG
, 0, SLJIT_IMM
, PCRE_ERROR_MATCHLIMIT
);
11483 JUMPTO(SLJIT_JUMP
, common
->quit_label
);
11485 if (common
->revertframes
!= NULL
)
11487 set_jumps(common
->revertframes
, LABEL());
11488 do_revertframes(common
);
11490 if (common
->wordboundary
!= NULL
)
11492 set_jumps(common
->wordboundary
, LABEL());
11493 check_wordboundary(common
);
11495 if (common
->anynewline
!= NULL
)
11497 set_jumps(common
->anynewline
, LABEL());
11498 check_anynewline(common
);
11500 if (common
->hspace
!= NULL
)
11502 set_jumps(common
->hspace
, LABEL());
11503 check_hspace(common
);
11505 if (common
->vspace
!= NULL
)
11507 set_jumps(common
->vspace
, LABEL());
11508 check_vspace(common
);
11510 if (common
->casefulcmp
!= NULL
)
11512 set_jumps(common
->casefulcmp
, LABEL());
11513 do_casefulcmp(common
);
11515 if (common
->caselesscmp
!= NULL
)
11517 set_jumps(common
->caselesscmp
, LABEL());
11518 do_caselesscmp(common
);
11520 if (common
->reset_match
!= NULL
)
11522 set_jumps(common
->reset_match
, LABEL());
11523 do_reset_match(common
, (re
->top_bracket
+ 1) * 2);
11524 CMPTO(SLJIT_GREATER
, STR_PTR
, 0, TMP1
, 0, continue_match_label
);
11525 OP1(SLJIT_MOV
, STR_PTR
, 0, TMP1
, 0);
11526 JUMPTO(SLJIT_JUMP
, reset_match_label
);
11529 #ifdef COMPILE_PCRE8
11530 if (common
->utfreadchar
!= NULL
)
11532 set_jumps(common
->utfreadchar
, LABEL());
11533 do_utfreadchar(common
);
11535 if (common
->utfreadchar16
!= NULL
)
11537 set_jumps(common
->utfreadchar16
, LABEL());
11538 do_utfreadchar16(common
);
11540 if (common
->utfreadtype8
!= NULL
)
11542 set_jumps(common
->utfreadtype8
, LABEL());
11543 do_utfreadtype8(common
);
11545 #endif /* COMPILE_PCRE8 */
11546 #endif /* SUPPORT_UTF */
11548 if (common
->getucd
!= NULL
)
11550 set_jumps(common
->getucd
, LABEL());
11555 SLJIT_FREE(common
->optimized_cbracket
, compiler
->allocator_data
);
11556 SLJIT_FREE(common
->private_data_ptrs
, compiler
->allocator_data
);
11558 executable_func
= sljit_generate_code(compiler
);
11559 executable_size
= sljit_get_generated_code_size(compiler
);
11560 label_addr
= common
->label_addrs
;
11561 while (label_addr
!= NULL
)
11563 *label_addr
->update_addr
= sljit_get_label_addr(label_addr
->label
);
11564 label_addr
= label_addr
->next
;
11566 sljit_free_compiler(compiler
);
11567 if (executable_func
== NULL
)
11569 free_read_only_data(common
->read_only_data_head
, compiler
->allocator_data
);
11573 /* Reuse the function descriptor if possible. */
11574 if ((extra
->flags
& PCRE_EXTRA_EXECUTABLE_JIT
) != 0 && extra
->executable_jit
!= NULL
)
11575 functions
= (executable_functions
*)extra
->executable_jit
;
11578 /* Note: If your memory-checker has flagged the allocation below as a
11579 * memory leak, it is probably because you either forgot to call
11580 * pcre_free_study() (or pcre16_free_study()) on the pcre_extra (or
11581 * pcre16_extra) object, or you called said function after having
11582 * cleared the PCRE_EXTRA_EXECUTABLE_JIT bit from the "flags" field
11583 * of the object. (The function will only free the JIT data if the
11584 * bit remains set, as the bit indicates that the pointer to the data
11587 functions
= SLJIT_MALLOC(sizeof(executable_functions
), compiler
->allocator_data
);
11588 if (functions
== NULL
)
11590 /* This case is highly unlikely since we just recently
11591 freed a lot of memory. Not impossible though. */
11592 sljit_free_code(executable_func
);
11593 free_read_only_data(common
->read_only_data_head
, compiler
->allocator_data
);
11596 memset(functions
, 0, sizeof(executable_functions
));
11597 functions
->top_bracket
= (re
->top_bracket
+ 1) * 2;
11598 functions
->limit_match
= (re
->flags
& PCRE_MLSET
) != 0 ? re
->limit_match
: 0;
11599 extra
->executable_jit
= functions
;
11600 extra
->flags
|= PCRE_EXTRA_EXECUTABLE_JIT
;
11603 functions
->executable_funcs
[mode
] = executable_func
;
11604 functions
->read_only_data_heads
[mode
] = common
->read_only_data_head
;
11605 functions
->executable_sizes
[mode
] = executable_size
;
11608 static SLJIT_NOINLINE
int jit_machine_stack_exec(jit_arguments
*arguments
, void *executable_func
)
11611 void *executable_func
;
11612 jit_function call_executable_func
;
11613 } convert_executable_func
;
11614 sljit_u8 local_space
[MACHINE_STACK_SIZE
];
11615 struct sljit_stack local_stack
;
11617 local_stack
.min_start
= local_space
;
11618 local_stack
.start
= local_space
;
11619 local_stack
.end
= local_space
+ MACHINE_STACK_SIZE
;
11620 local_stack
.top
= local_space
+ MACHINE_STACK_SIZE
;
11621 arguments
->stack
= &local_stack
;
11622 convert_executable_func
.executable_func
= executable_func
;
11623 return convert_executable_func
.call_executable_func(arguments
);
11627 PRIV(jit_exec
)(const PUBL(extra
) *extra_data
, const pcre_uchar
*subject
,
11628 int length
, int start_offset
, int options
, int *offsets
, int offset_count
)
11630 executable_functions
*functions
= (executable_functions
*)extra_data
->executable_jit
;
11632 void *executable_func
;
11633 jit_function call_executable_func
;
11634 } convert_executable_func
;
11635 jit_arguments arguments
;
11636 int max_offset_count
;
11638 int mode
= JIT_COMPILE
;
11640 if ((options
& PCRE_PARTIAL_HARD
) != 0)
11641 mode
= JIT_PARTIAL_HARD_COMPILE
;
11642 else if ((options
& PCRE_PARTIAL_SOFT
) != 0)
11643 mode
= JIT_PARTIAL_SOFT_COMPILE
;
11645 if (functions
->executable_funcs
[mode
] == NULL
)
11646 return PCRE_ERROR_JIT_BADOPTION
;
11648 /* Sanity checks should be handled by pcre_exec. */
11649 arguments
.str
= subject
+ start_offset
;
11650 arguments
.begin
= subject
;
11651 arguments
.end
= subject
+ length
;
11652 arguments
.mark_ptr
= NULL
;
11653 /* JIT decreases this value less frequently than the interpreter. */
11654 arguments
.limit_match
= ((extra_data
->flags
& PCRE_EXTRA_MATCH_LIMIT
) == 0) ? MATCH_LIMIT
: (sljit_u32
)(extra_data
->match_limit
);
11655 if (functions
->limit_match
!= 0 && functions
->limit_match
< arguments
.limit_match
)
11656 arguments
.limit_match
= functions
->limit_match
;
11657 arguments
.notbol
= (options
& PCRE_NOTBOL
) != 0;
11658 arguments
.noteol
= (options
& PCRE_NOTEOL
) != 0;
11659 arguments
.notempty
= (options
& PCRE_NOTEMPTY
) != 0;
11660 arguments
.notempty_atstart
= (options
& PCRE_NOTEMPTY_ATSTART
) != 0;
11661 arguments
.offsets
= offsets
;
11662 arguments
.callout_data
= (extra_data
->flags
& PCRE_EXTRA_CALLOUT_DATA
) != 0 ? extra_data
->callout_data
: NULL
;
11663 arguments
.real_offset_count
= offset_count
;
11665 /* pcre_exec() rounds offset_count to a multiple of 3, and then uses only 2/3 of
11666 the output vector for storing captured strings, with the remainder used as
11667 workspace. We don't need the workspace here. For compatibility, we limit the
11668 number of captured strings in the same way as pcre_exec(), so that the user
11669 gets the same result with and without JIT. */
11671 if (offset_count
!= 2)
11672 offset_count
= ((offset_count
- (offset_count
% 3)) * 2) / 3;
11673 max_offset_count
= functions
->top_bracket
;
11674 if (offset_count
> max_offset_count
)
11675 offset_count
= max_offset_count
;
11676 arguments
.offset_count
= offset_count
;
11678 if (functions
->callback
)
11679 arguments
.stack
= (struct sljit_stack
*)functions
->callback(functions
->userdata
);
11681 arguments
.stack
= (struct sljit_stack
*)functions
->userdata
;
11683 if (arguments
.stack
== NULL
)
11684 retval
= jit_machine_stack_exec(&arguments
, functions
->executable_funcs
[mode
]);
11687 convert_executable_func
.executable_func
= functions
->executable_funcs
[mode
];
11688 retval
= convert_executable_func
.call_executable_func(&arguments
);
11691 if (retval
* 2 > offset_count
)
11693 if ((extra_data
->flags
& PCRE_EXTRA_MARK
) != 0)
11694 *(extra_data
->mark
) = arguments
.mark_ptr
;
11699 #if defined COMPILE_PCRE8
11700 PCRE_EXP_DEFN
int PCRE_CALL_CONVENTION
11701 pcre_jit_exec(const pcre
*argument_re
, const pcre_extra
*extra_data
,
11702 PCRE_SPTR subject
, int length
, int start_offset
, int options
,
11703 int *offsets
, int offset_count
, pcre_jit_stack
*stack
)
11704 #elif defined COMPILE_PCRE16
11705 PCRE_EXP_DEFN
int PCRE_CALL_CONVENTION
11706 pcre16_jit_exec(const pcre16
*argument_re
, const pcre16_extra
*extra_data
,
11707 PCRE_SPTR16 subject
, int length
, int start_offset
, int options
,
11708 int *offsets
, int offset_count
, pcre16_jit_stack
*stack
)
11709 #elif defined COMPILE_PCRE32
11710 PCRE_EXP_DEFN
int PCRE_CALL_CONVENTION
11711 pcre32_jit_exec(const pcre32
*argument_re
, const pcre32_extra
*extra_data
,
11712 PCRE_SPTR32 subject
, int length
, int start_offset
, int options
,
11713 int *offsets
, int offset_count
, pcre32_jit_stack
*stack
)
11716 pcre_uchar
*subject_ptr
= (pcre_uchar
*)subject
;
11717 executable_functions
*functions
= (executable_functions
*)(extra_data
? extra_data
->executable_jit
: NULL
);
11719 void *executable_func
;
11720 jit_function call_executable_func
;
11721 } convert_executable_func
;
11722 jit_arguments arguments
;
11723 int max_offset_count
;
11725 int mode
= JIT_COMPILE
;
11727 SLJIT_UNUSED_ARG(argument_re
);
11729 /* Plausibility checks */
11730 if ((options
& ~PUBLIC_JIT_EXEC_OPTIONS
) != 0) return PCRE_ERROR_JIT_BADOPTION
;
11732 if ((options
& PCRE_PARTIAL_HARD
) != 0)
11733 mode
= JIT_PARTIAL_HARD_COMPILE
;
11734 else if ((options
& PCRE_PARTIAL_SOFT
) != 0)
11735 mode
= JIT_PARTIAL_SOFT_COMPILE
;
11737 if (functions
== NULL
|| functions
->executable_funcs
[mode
] == NULL
)
11739 #if defined COMPILE_PCRE8
11741 #elif defined COMPILE_PCRE16
11743 #elif defined COMPILE_PCRE32
11746 (argument_re
, extra_data
, subject
, length
, start_offset
, options
, offsets
, offset_count
);
11748 /* Sanity checks should be handled by pcre_exec. */
11749 arguments
.stack
= (struct sljit_stack
*)stack
;
11750 arguments
.str
= subject_ptr
+ start_offset
;
11751 arguments
.begin
= subject_ptr
;
11752 arguments
.end
= subject_ptr
+ length
;
11753 arguments
.mark_ptr
= NULL
;
11754 /* JIT decreases this value less frequently than the interpreter. */
11755 arguments
.limit_match
= ((extra_data
->flags
& PCRE_EXTRA_MATCH_LIMIT
) == 0) ? MATCH_LIMIT
: (sljit_u32
)(extra_data
->match_limit
);
11756 if (functions
->limit_match
!= 0 && functions
->limit_match
< arguments
.limit_match
)
11757 arguments
.limit_match
= functions
->limit_match
;
11758 arguments
.notbol
= (options
& PCRE_NOTBOL
) != 0;
11759 arguments
.noteol
= (options
& PCRE_NOTEOL
) != 0;
11760 arguments
.notempty
= (options
& PCRE_NOTEMPTY
) != 0;
11761 arguments
.notempty_atstart
= (options
& PCRE_NOTEMPTY_ATSTART
) != 0;
11762 arguments
.offsets
= offsets
;
11763 arguments
.callout_data
= (extra_data
->flags
& PCRE_EXTRA_CALLOUT_DATA
) != 0 ? extra_data
->callout_data
: NULL
;
11764 arguments
.real_offset_count
= offset_count
;
11766 /* pcre_exec() rounds offset_count to a multiple of 3, and then uses only 2/3 of
11767 the output vector for storing captured strings, with the remainder used as
11768 workspace. We don't need the workspace here. For compatibility, we limit the
11769 number of captured strings in the same way as pcre_exec(), so that the user
11770 gets the same result with and without JIT. */
11772 if (offset_count
!= 2)
11773 offset_count
= ((offset_count
- (offset_count
% 3)) * 2) / 3;
11774 max_offset_count
= functions
->top_bracket
;
11775 if (offset_count
> max_offset_count
)
11776 offset_count
= max_offset_count
;
11777 arguments
.offset_count
= offset_count
;
11779 convert_executable_func
.executable_func
= functions
->executable_funcs
[mode
];
11780 retval
= convert_executable_func
.call_executable_func(&arguments
);
11782 if (retval
* 2 > offset_count
)
11784 if ((extra_data
->flags
& PCRE_EXTRA_MARK
) != 0)
11785 *(extra_data
->mark
) = arguments
.mark_ptr
;
11791 PRIV(jit_free
)(void *executable_funcs
)
11794 executable_functions
*functions
= (executable_functions
*)executable_funcs
;
11795 for (i
= 0; i
< JIT_NUMBER_OF_COMPILE_MODES
; i
++)
11797 if (functions
->executable_funcs
[i
] != NULL
)
11798 sljit_free_code(functions
->executable_funcs
[i
]);
11799 free_read_only_data(functions
->read_only_data_heads
[i
], NULL
);
11801 SLJIT_FREE(functions
, compiler
->allocator_data
);
11805 PRIV(jit_get_size
)(void *executable_funcs
)
11809 sljit_uw
*executable_sizes
= ((executable_functions
*)executable_funcs
)->executable_sizes
;
11810 for (i
= 0; i
< JIT_NUMBER_OF_COMPILE_MODES
; i
++)
11811 size
+= executable_sizes
[i
];
11816 PRIV(jit_get_target
)(void)
11818 return sljit_get_platform_name();
11821 #if defined COMPILE_PCRE8
11822 PCRE_EXP_DECL pcre_jit_stack
*
11823 pcre_jit_stack_alloc(int startsize
, int maxsize
)
11824 #elif defined COMPILE_PCRE16
11825 PCRE_EXP_DECL pcre16_jit_stack
*
11826 pcre16_jit_stack_alloc(int startsize
, int maxsize
)
11827 #elif defined COMPILE_PCRE32
11828 PCRE_EXP_DECL pcre32_jit_stack
*
11829 pcre32_jit_stack_alloc(int startsize
, int maxsize
)
11832 if (startsize
< 1 || maxsize
< 1)
11834 if (startsize
> maxsize
)
11835 startsize
= maxsize
;
11836 startsize
= (startsize
+ STACK_GROWTH_RATE
- 1) & ~(STACK_GROWTH_RATE
- 1);
11837 maxsize
= (maxsize
+ STACK_GROWTH_RATE
- 1) & ~(STACK_GROWTH_RATE
- 1);
11838 return (PUBL(jit_stack
)*)sljit_allocate_stack(startsize
, maxsize
, NULL
);
11841 #if defined COMPILE_PCRE8
11843 pcre_jit_stack_free(pcre_jit_stack
*stack
)
11844 #elif defined COMPILE_PCRE16
11846 pcre16_jit_stack_free(pcre16_jit_stack
*stack
)
11847 #elif defined COMPILE_PCRE32
11849 pcre32_jit_stack_free(pcre32_jit_stack
*stack
)
11852 sljit_free_stack((struct sljit_stack
*)stack
, NULL
);
11855 #if defined COMPILE_PCRE8
11857 pcre_assign_jit_stack(pcre_extra
*extra
, pcre_jit_callback callback
, void *userdata
)
11858 #elif defined COMPILE_PCRE16
11860 pcre16_assign_jit_stack(pcre16_extra
*extra
, pcre16_jit_callback callback
, void *userdata
)
11861 #elif defined COMPILE_PCRE32
11863 pcre32_assign_jit_stack(pcre32_extra
*extra
, pcre32_jit_callback callback
, void *userdata
)
11866 executable_functions
*functions
;
11867 if (extra
!= NULL
&&
11868 (extra
->flags
& PCRE_EXTRA_EXECUTABLE_JIT
) != 0 &&
11869 extra
->executable_jit
!= NULL
)
11871 functions
= (executable_functions
*)extra
->executable_jit
;
11872 functions
->callback
= callback
;
11873 functions
->userdata
= userdata
;
11877 #if defined COMPILE_PCRE8
11879 pcre_jit_free_unused_memory(void)
11880 #elif defined COMPILE_PCRE16
11882 pcre16_jit_free_unused_memory(void)
11883 #elif defined COMPILE_PCRE32
11885 pcre32_jit_free_unused_memory(void)
11888 sljit_free_unused_memory_exec();
11891 #else /* SUPPORT_JIT */
11893 /* These are dummy functions to avoid linking errors when JIT support is not
11896 static const void *const dummy_stack
= NULL
;
11898 #if defined COMPILE_PCRE8
11899 #define DUMMY_STACK (pcre_jit_stack *)&dummy_stack
11900 PCRE_EXP_DECL pcre_jit_stack
*
11901 pcre_jit_stack_alloc(int startsize
, int maxsize
)
11902 #elif defined COMPILE_PCRE16
11903 #define DUMMY_STACK (pcre16_jit_stack *)&dummy_stack
11904 PCRE_EXP_DECL pcre16_jit_stack
*
11905 pcre16_jit_stack_alloc(int startsize
, int maxsize
)
11906 #elif defined COMPILE_PCRE32
11907 #define DUMMY_STACK (pcre32_jit_stack *)&dummy_stack
11908 PCRE_EXP_DECL pcre32_jit_stack
*
11909 pcre32_jit_stack_alloc(int startsize
, int maxsize
)
11914 return DUMMY_STACK
;
11917 #if defined COMPILE_PCRE8
11919 pcre_jit_stack_free(pcre_jit_stack
*stack
)
11920 #elif defined COMPILE_PCRE16
11922 pcre16_jit_stack_free(pcre16_jit_stack
*stack
)
11923 #elif defined COMPILE_PCRE32
11925 pcre32_jit_stack_free(pcre32_jit_stack
*stack
)
11931 #if defined COMPILE_PCRE8
11933 pcre_assign_jit_stack(pcre_extra
*extra
, pcre_jit_callback callback
, void *userdata
)
11934 #elif defined COMPILE_PCRE16
11936 pcre16_assign_jit_stack(pcre16_extra
*extra
, pcre16_jit_callback callback
, void *userdata
)
11937 #elif defined COMPILE_PCRE32
11939 pcre32_assign_jit_stack(pcre32_extra
*extra
, pcre32_jit_callback callback
, void *userdata
)
11947 #if defined COMPILE_PCRE8
11949 pcre_jit_free_unused_memory(void)
11950 #elif defined COMPILE_PCRE16
11952 pcre16_jit_free_unused_memory(void)
11953 #elif defined COMPILE_PCRE32
11955 pcre32_jit_free_unused_memory(void)
11962 /* End of pcre_jit_compile.c */