root's authorized_keys should be perm 0600 (u=rw)
[tomato.git] / release / src / router / pcre / pcre_jit_compile.c
blobe67071ef791106dcb9dc8e87240cfbf4c3bbeb2a
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
8 Written by Philip Hazel
9 Copyright (c) 1997-2013 University of Cambridge
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2013
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
47 #include "pcre_internal.h"
49 #if defined SUPPORT_JIT
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
55 #define SLJIT_MALLOC(size) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
62 #include "sljit/sljitLir.c"
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
68 /* Defines for debugging purposes. */
70 /* 1 - Use unoptimized capturing brackets.
71 2 - Enable capture_last_ptr (includes option 1). */
72 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
74 /* 1 - Always have a control head. */
75 /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
77 /* Allocate memory for the regex stack on the real machine stack.
78 Fast, but limited size. */
79 #define MACHINE_STACK_SIZE 32768
81 /* Growth rate for stack allocated by the OS. Should be the multiply
82 of page size. */
83 #define STACK_GROWTH_RATE 8192
85 /* Enable to check that the allocation could destroy temporaries. */
86 #if defined SLJIT_DEBUG && SLJIT_DEBUG
87 #define DESTROY_REGISTERS 1
88 #endif
91 Short summary about the backtracking mechanism empolyed by the jit code generator:
93 The code generator follows the recursive nature of the PERL compatible regular
94 expressions. The basic blocks of regular expressions are condition checkers
95 whose execute different commands depending on the result of the condition check.
96 The relationship between the operators can be horizontal (concatenation) and
97 vertical (sub-expression) (See struct backtrack_common for more details).
99 'ab' - 'a' and 'b' regexps are concatenated
100 'a+' - 'a' is the sub-expression of the '+' operator
102 The condition checkers are boolean (true/false) checkers. Machine code is generated
103 for the checker itself and for the actions depending on the result of the checker.
104 The 'true' case is called as the matching path (expected path), and the other is called as
105 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
106 branches on the matching path.
108 Greedy star operator (*) :
109 Matching path: match happens.
110 Backtrack path: match failed.
111 Non-greedy star operator (*?) :
112 Matching path: no need to perform a match.
113 Backtrack path: match is required.
115 The following example shows how the code generated for a capturing bracket
116 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
117 we have the following regular expression:
119 A(B|C)D
121 The generated code will be the following:
123 A matching path
124 '(' matching path (pushing arguments to the stack)
125 B matching path
126 ')' matching path (pushing arguments to the stack)
127 D matching path
128 return with successful match
130 D backtrack path
131 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
132 B backtrack path
133 C expected path
134 jump to D matching path
135 C backtrack path
136 A backtrack path
138 Notice, that the order of backtrack code paths are the opposite of the fast
139 code paths. In this way the topmost value on the stack is always belong
140 to the current backtrack code path. The backtrack path must check
141 whether there is a next alternative. If so, it needs to jump back to
142 the matching path eventually. Otherwise it needs to clear out its own stack
143 frame and continue the execution on the backtrack code paths.
147 Saved stack frames:
149 Atomic blocks and asserts require reloading the values of private data
150 when the backtrack mechanism performed. Because of OP_RECURSE, the data
151 are not necessarly known in compile time, thus we need a dynamic restore
152 mechanism.
154 The stack frames are stored in a chain list, and have the following format:
155 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
157 Thus we can restore the private data to a particular point in the stack.
160 typedef struct jit_arguments {
161 /* Pointers first. */
162 struct sljit_stack *stack;
163 const pcre_uchar *str;
164 const pcre_uchar *begin;
165 const pcre_uchar *end;
166 int *offsets;
167 pcre_uchar *uchar_ptr;
168 pcre_uchar *mark_ptr;
169 void *callout_data;
170 /* Everything else after. */
171 pcre_uint32 limit_match;
172 int real_offset_count;
173 int offset_count;
174 pcre_uint8 notbol;
175 pcre_uint8 noteol;
176 pcre_uint8 notempty;
177 pcre_uint8 notempty_atstart;
178 } jit_arguments;
180 typedef struct executable_functions {
181 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
182 sljit_uw *read_only_data[JIT_NUMBER_OF_COMPILE_MODES];
183 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
184 PUBL(jit_callback) callback;
185 void *userdata;
186 pcre_uint32 top_bracket;
187 pcre_uint32 limit_match;
188 } executable_functions;
190 typedef struct jump_list {
191 struct sljit_jump *jump;
192 struct jump_list *next;
193 } jump_list;
195 typedef struct stub_list {
196 struct sljit_jump *start;
197 struct sljit_label *quit;
198 struct stub_list *next;
199 } stub_list;
201 typedef struct label_addr_list {
202 struct sljit_label *label;
203 sljit_uw *addr;
204 struct label_addr_list *next;
205 } label_addr_list;
207 enum frame_types {
208 no_frame = -1,
209 no_stack = -2
212 enum control_types {
213 type_mark = 0,
214 type_then_trap = 1
217 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
219 /* The following structure is the key data type for the recursive
220 code generator. It is allocated by compile_matchingpath, and contains
221 the arguments for compile_backtrackingpath. Must be the first member
222 of its descendants. */
223 typedef struct backtrack_common {
224 /* Concatenation stack. */
225 struct backtrack_common *prev;
226 jump_list *nextbacktracks;
227 /* Internal stack (for component operators). */
228 struct backtrack_common *top;
229 jump_list *topbacktracks;
230 /* Opcode pointer. */
231 pcre_uchar *cc;
232 } backtrack_common;
234 typedef struct assert_backtrack {
235 backtrack_common common;
236 jump_list *condfailed;
237 /* Less than 0 if a frame is not needed. */
238 int framesize;
239 /* Points to our private memory word on the stack. */
240 int private_data_ptr;
241 /* For iterators. */
242 struct sljit_label *matchingpath;
243 } assert_backtrack;
245 typedef struct bracket_backtrack {
246 backtrack_common common;
247 /* Where to coninue if an alternative is successfully matched. */
248 struct sljit_label *alternative_matchingpath;
249 /* For rmin and rmax iterators. */
250 struct sljit_label *recursive_matchingpath;
251 /* For greedy ? operator. */
252 struct sljit_label *zero_matchingpath;
253 /* Contains the branches of a failed condition. */
254 union {
255 /* Both for OP_COND, OP_SCOND. */
256 jump_list *condfailed;
257 assert_backtrack *assert;
258 /* For OP_ONCE. Less than 0 if not needed. */
259 int framesize;
260 } u;
261 /* Points to our private memory word on the stack. */
262 int private_data_ptr;
263 } bracket_backtrack;
265 typedef struct bracketpos_backtrack {
266 backtrack_common common;
267 /* Points to our private memory word on the stack. */
268 int private_data_ptr;
269 /* Reverting stack is needed. */
270 int framesize;
271 /* Allocated stack size. */
272 int stacksize;
273 } bracketpos_backtrack;
275 typedef struct braminzero_backtrack {
276 backtrack_common common;
277 struct sljit_label *matchingpath;
278 } braminzero_backtrack;
280 typedef struct iterator_backtrack {
281 backtrack_common common;
282 /* Next iteration. */
283 struct sljit_label *matchingpath;
284 } iterator_backtrack;
286 typedef struct recurse_entry {
287 struct recurse_entry *next;
288 /* Contains the function entry. */
289 struct sljit_label *entry;
290 /* Collects the calls until the function is not created. */
291 jump_list *calls;
292 /* Points to the starting opcode. */
293 sljit_sw start;
294 } recurse_entry;
296 typedef struct recurse_backtrack {
297 backtrack_common common;
298 BOOL inlined_pattern;
299 } recurse_backtrack;
301 #define OP_THEN_TRAP OP_TABLE_LENGTH
303 typedef struct then_trap_backtrack {
304 backtrack_common common;
305 /* If then_trap is not NULL, this structure contains the real
306 then_trap for the backtracking path. */
307 struct then_trap_backtrack *then_trap;
308 /* Points to the starting opcode. */
309 sljit_sw start;
310 /* Exit point for the then opcodes of this alternative. */
311 jump_list *quit;
312 /* Frame size of the current alternative. */
313 int framesize;
314 } then_trap_backtrack;
316 #define MAX_RANGE_SIZE 4
318 typedef struct compiler_common {
319 /* The sljit ceneric compiler. */
320 struct sljit_compiler *compiler;
321 /* First byte code. */
322 pcre_uchar *start;
323 /* Maps private data offset to each opcode. */
324 sljit_si *private_data_ptrs;
325 /* This read-only data is available during runtime. */
326 sljit_uw *read_only_data;
327 /* The total size of the read-only data. */
328 sljit_uw read_only_data_size;
329 /* The next free entry of the read_only_data. */
330 sljit_uw *read_only_data_ptr;
331 /* Tells whether the capturing bracket is optimized. */
332 pcre_uint8 *optimized_cbracket;
333 /* Tells whether the starting offset is a target of then. */
334 pcre_uint8 *then_offsets;
335 /* Current position where a THEN must jump. */
336 then_trap_backtrack *then_trap;
337 /* Starting offset of private data for capturing brackets. */
338 int cbra_ptr;
339 /* Output vector starting point. Must be divisible by 2. */
340 int ovector_start;
341 /* Last known position of the requested byte. */
342 int req_char_ptr;
343 /* Head of the last recursion. */
344 int recursive_head_ptr;
345 /* First inspected character for partial matching. */
346 int start_used_ptr;
347 /* Starting pointer for partial soft matches. */
348 int hit_start;
349 /* End pointer of the first line. */
350 int first_line_end;
351 /* Points to the marked string. */
352 int mark_ptr;
353 /* Recursive control verb management chain. */
354 int control_head_ptr;
355 /* Points to the last matched capture block index. */
356 int capture_last_ptr;
357 /* Points to the starting position of the current match. */
358 int start_ptr;
360 /* Flipped and lower case tables. */
361 const pcre_uint8 *fcc;
362 sljit_sw lcc;
363 /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
364 int mode;
365 /* TRUE, when minlength is greater than 0. */
366 BOOL might_be_empty;
367 /* \K is found in the pattern. */
368 BOOL has_set_som;
369 /* (*SKIP:arg) is found in the pattern. */
370 BOOL has_skip_arg;
371 /* (*THEN) is found in the pattern. */
372 BOOL has_then;
373 /* Needs to know the start position anytime. */
374 BOOL needs_start_ptr;
375 /* Currently in recurse or negative assert. */
376 BOOL local_exit;
377 /* Currently in a positive assert. */
378 BOOL positive_assert;
379 /* Newline control. */
380 int nltype;
381 pcre_uint32 nlmax;
382 pcre_uint32 nlmin;
383 int newline;
384 int bsr_nltype;
385 pcre_uint32 bsr_nlmax;
386 pcre_uint32 bsr_nlmin;
387 /* Dollar endonly. */
388 int endonly;
389 /* Tables. */
390 sljit_sw ctypes;
391 /* Named capturing brackets. */
392 pcre_uchar *name_table;
393 sljit_sw name_count;
394 sljit_sw name_entry_size;
396 /* Labels and jump lists. */
397 struct sljit_label *partialmatchlabel;
398 struct sljit_label *quit_label;
399 struct sljit_label *forced_quit_label;
400 struct sljit_label *accept_label;
401 struct sljit_label *ff_newline_shortcut;
402 stub_list *stubs;
403 label_addr_list *label_addrs;
404 recurse_entry *entries;
405 recurse_entry *currententry;
406 jump_list *partialmatch;
407 jump_list *quit;
408 jump_list *positive_assert_quit;
409 jump_list *forced_quit;
410 jump_list *accept;
411 jump_list *calllimit;
412 jump_list *stackalloc;
413 jump_list *revertframes;
414 jump_list *wordboundary;
415 jump_list *anynewline;
416 jump_list *hspace;
417 jump_list *vspace;
418 jump_list *casefulcmp;
419 jump_list *caselesscmp;
420 jump_list *reset_match;
421 BOOL jscript_compat;
422 #ifdef SUPPORT_UTF
423 BOOL utf;
424 #ifdef SUPPORT_UCP
425 BOOL use_ucp;
426 #endif
427 #ifdef COMPILE_PCRE8
428 jump_list *utfreadchar;
429 jump_list *utfreadchar16;
430 jump_list *utfreadtype8;
431 #endif
432 #endif /* SUPPORT_UTF */
433 #ifdef SUPPORT_UCP
434 jump_list *getucd;
435 #endif
436 } compiler_common;
438 /* For byte_sequence_compare. */
440 typedef struct compare_context {
441 int length;
442 int sourcereg;
443 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
444 int ucharptr;
445 union {
446 sljit_si asint;
447 sljit_uh asushort;
448 #if defined COMPILE_PCRE8
449 sljit_ub asbyte;
450 sljit_ub asuchars[4];
451 #elif defined COMPILE_PCRE16
452 sljit_uh asuchars[2];
453 #elif defined COMPILE_PCRE32
454 sljit_ui asuchars[1];
455 #endif
456 } c;
457 union {
458 sljit_si asint;
459 sljit_uh asushort;
460 #if defined COMPILE_PCRE8
461 sljit_ub asbyte;
462 sljit_ub asuchars[4];
463 #elif defined COMPILE_PCRE16
464 sljit_uh asuchars[2];
465 #elif defined COMPILE_PCRE32
466 sljit_ui asuchars[1];
467 #endif
468 } oc;
469 #endif
470 } compare_context;
472 /* Undefine sljit macros. */
473 #undef CMP
475 /* Used for accessing the elements of the stack. */
476 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_sw))
478 #define TMP1 SLJIT_SCRATCH_REG1
479 #define TMP2 SLJIT_SCRATCH_REG3
480 #define TMP3 SLJIT_TEMPORARY_EREG2
481 #define STR_PTR SLJIT_SAVED_REG1
482 #define STR_END SLJIT_SAVED_REG2
483 #define STACK_TOP SLJIT_SCRATCH_REG2
484 #define STACK_LIMIT SLJIT_SAVED_REG3
485 #define ARGUMENTS SLJIT_SAVED_EREG1
486 #define COUNT_MATCH SLJIT_SAVED_EREG2
487 #define RETURN_ADDR SLJIT_TEMPORARY_EREG1
489 /* Local space layout. */
490 /* These two locals can be used by the current opcode. */
491 #define LOCALS0 (0 * sizeof(sljit_sw))
492 #define LOCALS1 (1 * sizeof(sljit_sw))
493 /* Two local variables for possessive quantifiers (char1 cannot use them). */
494 #define POSSESSIVE0 (2 * sizeof(sljit_sw))
495 #define POSSESSIVE1 (3 * sizeof(sljit_sw))
496 /* Max limit of recursions. */
497 #define LIMIT_MATCH (4 * sizeof(sljit_sw))
498 /* The output vector is stored on the stack, and contains pointers
499 to characters. The vector data is divided into two groups: the first
500 group contains the start / end character pointers, and the second is
501 the start pointers when the end of the capturing group has not yet reached. */
502 #define OVECTOR_START (common->ovector_start)
503 #define OVECTOR(i) (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
504 #define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
505 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
507 #if defined COMPILE_PCRE8
508 #define MOV_UCHAR SLJIT_MOV_UB
509 #define MOVU_UCHAR SLJIT_MOVU_UB
510 #elif defined COMPILE_PCRE16
511 #define MOV_UCHAR SLJIT_MOV_UH
512 #define MOVU_UCHAR SLJIT_MOVU_UH
513 #elif defined COMPILE_PCRE32
514 #define MOV_UCHAR SLJIT_MOV_UI
515 #define MOVU_UCHAR SLJIT_MOVU_UI
516 #else
517 #error Unsupported compiling mode
518 #endif
520 /* Shortcuts. */
521 #define DEFINE_COMPILER \
522 struct sljit_compiler *compiler = common->compiler
523 #define OP1(op, dst, dstw, src, srcw) \
524 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
525 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
526 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
527 #define LABEL() \
528 sljit_emit_label(compiler)
529 #define JUMP(type) \
530 sljit_emit_jump(compiler, (type))
531 #define JUMPTO(type, label) \
532 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
533 #define JUMPHERE(jump) \
534 sljit_set_label((jump), sljit_emit_label(compiler))
535 #define SET_LABEL(jump, label) \
536 sljit_set_label((jump), (label))
537 #define CMP(type, src1, src1w, src2, src2w) \
538 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
539 #define CMPTO(type, src1, src1w, src2, src2w, label) \
540 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
541 #define OP_FLAGS(op, dst, dstw, src, srcw, type) \
542 sljit_emit_op_flags(compiler, (op), (dst), (dstw), (src), (srcw), (type))
543 #define GET_LOCAL_BASE(dst, dstw, offset) \
544 sljit_get_local_base(compiler, (dst), (dstw), (offset))
546 #define READ_CHAR_MAX 0x7fffffff
548 static pcre_uchar* bracketend(pcre_uchar* cc)
550 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
551 do cc += GET(cc, 1); while (*cc == OP_ALT);
552 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
553 cc += 1 + LINK_SIZE;
554 return cc;
557 static int no_alternatives(pcre_uchar* cc)
559 int count = 0;
560 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
563 cc += GET(cc, 1);
564 count++;
566 while (*cc == OP_ALT);
567 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
568 return count;
571 static int ones_in_half_byte[16] = {
572 /* 0 */ 0, 1, 1, 2, /* 4 */ 1, 2, 2, 3,
573 /* 8 */ 1, 2, 2, 3, /* 12 */ 2, 3, 3, 4
576 /* Functions whose might need modification for all new supported opcodes:
577 next_opcode
578 check_opcode_types
579 set_private_data_ptrs
580 get_framesize
581 init_frame
582 get_private_data_copy_length
583 copy_private_data
584 compile_matchingpath
585 compile_backtrackingpath
588 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
590 SLJIT_UNUSED_ARG(common);
591 switch(*cc)
593 case OP_SOD:
594 case OP_SOM:
595 case OP_SET_SOM:
596 case OP_NOT_WORD_BOUNDARY:
597 case OP_WORD_BOUNDARY:
598 case OP_NOT_DIGIT:
599 case OP_DIGIT:
600 case OP_NOT_WHITESPACE:
601 case OP_WHITESPACE:
602 case OP_NOT_WORDCHAR:
603 case OP_WORDCHAR:
604 case OP_ANY:
605 case OP_ALLANY:
606 case OP_NOTPROP:
607 case OP_PROP:
608 case OP_ANYNL:
609 case OP_NOT_HSPACE:
610 case OP_HSPACE:
611 case OP_NOT_VSPACE:
612 case OP_VSPACE:
613 case OP_EXTUNI:
614 case OP_EODN:
615 case OP_EOD:
616 case OP_CIRC:
617 case OP_CIRCM:
618 case OP_DOLL:
619 case OP_DOLLM:
620 case OP_CRSTAR:
621 case OP_CRMINSTAR:
622 case OP_CRPLUS:
623 case OP_CRMINPLUS:
624 case OP_CRQUERY:
625 case OP_CRMINQUERY:
626 case OP_CRRANGE:
627 case OP_CRMINRANGE:
628 case OP_CRPOSSTAR:
629 case OP_CRPOSPLUS:
630 case OP_CRPOSQUERY:
631 case OP_CRPOSRANGE:
632 case OP_CLASS:
633 case OP_NCLASS:
634 case OP_REF:
635 case OP_REFI:
636 case OP_DNREF:
637 case OP_DNREFI:
638 case OP_RECURSE:
639 case OP_CALLOUT:
640 case OP_ALT:
641 case OP_KET:
642 case OP_KETRMAX:
643 case OP_KETRMIN:
644 case OP_KETRPOS:
645 case OP_REVERSE:
646 case OP_ASSERT:
647 case OP_ASSERT_NOT:
648 case OP_ASSERTBACK:
649 case OP_ASSERTBACK_NOT:
650 case OP_ONCE:
651 case OP_ONCE_NC:
652 case OP_BRA:
653 case OP_BRAPOS:
654 case OP_CBRA:
655 case OP_CBRAPOS:
656 case OP_COND:
657 case OP_SBRA:
658 case OP_SBRAPOS:
659 case OP_SCBRA:
660 case OP_SCBRAPOS:
661 case OP_SCOND:
662 case OP_CREF:
663 case OP_DNCREF:
664 case OP_RREF:
665 case OP_DNRREF:
666 case OP_DEF:
667 case OP_BRAZERO:
668 case OP_BRAMINZERO:
669 case OP_BRAPOSZERO:
670 case OP_PRUNE:
671 case OP_SKIP:
672 case OP_THEN:
673 case OP_COMMIT:
674 case OP_FAIL:
675 case OP_ACCEPT:
676 case OP_ASSERT_ACCEPT:
677 case OP_CLOSE:
678 case OP_SKIPZERO:
679 return cc + PRIV(OP_lengths)[*cc];
681 case OP_CHAR:
682 case OP_CHARI:
683 case OP_NOT:
684 case OP_NOTI:
685 case OP_STAR:
686 case OP_MINSTAR:
687 case OP_PLUS:
688 case OP_MINPLUS:
689 case OP_QUERY:
690 case OP_MINQUERY:
691 case OP_UPTO:
692 case OP_MINUPTO:
693 case OP_EXACT:
694 case OP_POSSTAR:
695 case OP_POSPLUS:
696 case OP_POSQUERY:
697 case OP_POSUPTO:
698 case OP_STARI:
699 case OP_MINSTARI:
700 case OP_PLUSI:
701 case OP_MINPLUSI:
702 case OP_QUERYI:
703 case OP_MINQUERYI:
704 case OP_UPTOI:
705 case OP_MINUPTOI:
706 case OP_EXACTI:
707 case OP_POSSTARI:
708 case OP_POSPLUSI:
709 case OP_POSQUERYI:
710 case OP_POSUPTOI:
711 case OP_NOTSTAR:
712 case OP_NOTMINSTAR:
713 case OP_NOTPLUS:
714 case OP_NOTMINPLUS:
715 case OP_NOTQUERY:
716 case OP_NOTMINQUERY:
717 case OP_NOTUPTO:
718 case OP_NOTMINUPTO:
719 case OP_NOTEXACT:
720 case OP_NOTPOSSTAR:
721 case OP_NOTPOSPLUS:
722 case OP_NOTPOSQUERY:
723 case OP_NOTPOSUPTO:
724 case OP_NOTSTARI:
725 case OP_NOTMINSTARI:
726 case OP_NOTPLUSI:
727 case OP_NOTMINPLUSI:
728 case OP_NOTQUERYI:
729 case OP_NOTMINQUERYI:
730 case OP_NOTUPTOI:
731 case OP_NOTMINUPTOI:
732 case OP_NOTEXACTI:
733 case OP_NOTPOSSTARI:
734 case OP_NOTPOSPLUSI:
735 case OP_NOTPOSQUERYI:
736 case OP_NOTPOSUPTOI:
737 cc += PRIV(OP_lengths)[*cc];
738 #ifdef SUPPORT_UTF
739 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
740 #endif
741 return cc;
743 /* Special cases. */
744 case OP_TYPESTAR:
745 case OP_TYPEMINSTAR:
746 case OP_TYPEPLUS:
747 case OP_TYPEMINPLUS:
748 case OP_TYPEQUERY:
749 case OP_TYPEMINQUERY:
750 case OP_TYPEUPTO:
751 case OP_TYPEMINUPTO:
752 case OP_TYPEEXACT:
753 case OP_TYPEPOSSTAR:
754 case OP_TYPEPOSPLUS:
755 case OP_TYPEPOSQUERY:
756 case OP_TYPEPOSUPTO:
757 return cc + PRIV(OP_lengths)[*cc] - 1;
759 case OP_ANYBYTE:
760 #ifdef SUPPORT_UTF
761 if (common->utf) return NULL;
762 #endif
763 return cc + 1;
765 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
766 case OP_XCLASS:
767 return cc + GET(cc, 1);
768 #endif
770 case OP_MARK:
771 case OP_PRUNE_ARG:
772 case OP_SKIP_ARG:
773 case OP_THEN_ARG:
774 return cc + 1 + 2 + cc[1];
776 default:
777 /* All opcodes are supported now! */
778 SLJIT_ASSERT_STOP();
779 return NULL;
783 static BOOL check_opcode_types(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
785 int count;
786 pcre_uchar *slot;
788 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
789 while (cc < ccend)
791 switch(*cc)
793 case OP_SET_SOM:
794 common->has_set_som = TRUE;
795 common->might_be_empty = TRUE;
796 cc += 1;
797 break;
799 case OP_REF:
800 case OP_REFI:
801 common->optimized_cbracket[GET2(cc, 1)] = 0;
802 cc += 1 + IMM2_SIZE;
803 break;
805 case OP_BRA:
806 case OP_CBRA:
807 case OP_SBRA:
808 case OP_SCBRA:
809 count = no_alternatives(cc);
810 if (count > 4)
811 common->read_only_data_size += count * sizeof(sljit_uw);
812 cc += 1 + LINK_SIZE + (*cc == OP_CBRA || *cc == OP_SCBRA ? IMM2_SIZE : 0);
813 break;
815 case OP_CBRAPOS:
816 case OP_SCBRAPOS:
817 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
818 cc += 1 + LINK_SIZE + IMM2_SIZE;
819 break;
821 case OP_COND:
822 case OP_SCOND:
823 /* Only AUTO_CALLOUT can insert this opcode. We do
824 not intend to support this case. */
825 if (cc[1 + LINK_SIZE] == OP_CALLOUT)
826 return FALSE;
827 cc += 1 + LINK_SIZE;
828 break;
830 case OP_CREF:
831 common->optimized_cbracket[GET2(cc, 1)] = 0;
832 cc += 1 + IMM2_SIZE;
833 break;
835 case OP_DNREF:
836 case OP_DNREFI:
837 case OP_DNCREF:
838 count = GET2(cc, 1 + IMM2_SIZE);
839 slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
840 while (count-- > 0)
842 common->optimized_cbracket[GET2(slot, 0)] = 0;
843 slot += common->name_entry_size;
845 cc += 1 + 2 * IMM2_SIZE;
846 break;
848 case OP_RECURSE:
849 /* Set its value only once. */
850 if (common->recursive_head_ptr == 0)
852 common->recursive_head_ptr = common->ovector_start;
853 common->ovector_start += sizeof(sljit_sw);
855 cc += 1 + LINK_SIZE;
856 break;
858 case OP_CALLOUT:
859 if (common->capture_last_ptr == 0)
861 common->capture_last_ptr = common->ovector_start;
862 common->ovector_start += sizeof(sljit_sw);
864 cc += 2 + 2 * LINK_SIZE;
865 break;
867 case OP_THEN_ARG:
868 common->has_then = TRUE;
869 common->control_head_ptr = 1;
870 /* Fall through. */
872 case OP_PRUNE_ARG:
873 common->needs_start_ptr = TRUE;
874 /* Fall through. */
876 case OP_MARK:
877 if (common->mark_ptr == 0)
879 common->mark_ptr = common->ovector_start;
880 common->ovector_start += sizeof(sljit_sw);
882 cc += 1 + 2 + cc[1];
883 break;
885 case OP_THEN:
886 common->has_then = TRUE;
887 common->control_head_ptr = 1;
888 /* Fall through. */
890 case OP_PRUNE:
891 case OP_SKIP:
892 common->needs_start_ptr = TRUE;
893 cc += 1;
894 break;
896 case OP_SKIP_ARG:
897 common->control_head_ptr = 1;
898 common->has_skip_arg = TRUE;
899 cc += 1 + 2 + cc[1];
900 break;
902 default:
903 cc = next_opcode(common, cc);
904 if (cc == NULL)
905 return FALSE;
906 break;
909 return TRUE;
912 static int get_class_iterator_size(pcre_uchar *cc)
914 switch(*cc)
916 case OP_CRSTAR:
917 case OP_CRPLUS:
918 return 2;
920 case OP_CRMINSTAR:
921 case OP_CRMINPLUS:
922 case OP_CRQUERY:
923 case OP_CRMINQUERY:
924 return 1;
926 case OP_CRRANGE:
927 case OP_CRMINRANGE:
928 if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
929 return 0;
930 return 2;
932 default:
933 return 0;
937 static BOOL detect_repeat(compiler_common *common, pcre_uchar *begin)
939 pcre_uchar *end = bracketend(begin);
940 pcre_uchar *next;
941 pcre_uchar *next_end;
942 pcre_uchar *max_end;
943 pcre_uchar type;
944 sljit_sw length = end - begin;
945 int min, max, i;
947 /* Detect fixed iterations first. */
948 if (end[-(1 + LINK_SIZE)] != OP_KET)
949 return FALSE;
951 /* Already detected repeat. */
952 if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0)
953 return TRUE;
955 next = end;
956 min = 1;
957 while (1)
959 if (*next != *begin)
960 break;
961 next_end = bracketend(next);
962 if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
963 break;
964 next = next_end;
965 min++;
968 if (min == 2)
969 return FALSE;
971 max = 0;
972 max_end = next;
973 if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
975 type = *next;
976 while (1)
978 if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
979 break;
980 next_end = bracketend(next + 2 + LINK_SIZE);
981 if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
982 break;
983 next = next_end;
984 max++;
987 if (next[0] == type && next[1] == *begin && max >= 1)
989 next_end = bracketend(next + 1);
990 if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
992 for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
993 if (*next_end != OP_KET)
994 break;
996 if (i == max)
998 common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
999 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
1000 /* +2 the original and the last. */
1001 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
1002 if (min == 1)
1003 return TRUE;
1004 min--;
1005 max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
1011 if (min >= 3)
1013 common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
1014 common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
1015 common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
1016 return TRUE;
1019 return FALSE;
1022 #define CASE_ITERATOR_PRIVATE_DATA_1 \
1023 case OP_MINSTAR: \
1024 case OP_MINPLUS: \
1025 case OP_QUERY: \
1026 case OP_MINQUERY: \
1027 case OP_MINSTARI: \
1028 case OP_MINPLUSI: \
1029 case OP_QUERYI: \
1030 case OP_MINQUERYI: \
1031 case OP_NOTMINSTAR: \
1032 case OP_NOTMINPLUS: \
1033 case OP_NOTQUERY: \
1034 case OP_NOTMINQUERY: \
1035 case OP_NOTMINSTARI: \
1036 case OP_NOTMINPLUSI: \
1037 case OP_NOTQUERYI: \
1038 case OP_NOTMINQUERYI:
1040 #define CASE_ITERATOR_PRIVATE_DATA_2A \
1041 case OP_STAR: \
1042 case OP_PLUS: \
1043 case OP_STARI: \
1044 case OP_PLUSI: \
1045 case OP_NOTSTAR: \
1046 case OP_NOTPLUS: \
1047 case OP_NOTSTARI: \
1048 case OP_NOTPLUSI:
1050 #define CASE_ITERATOR_PRIVATE_DATA_2B \
1051 case OP_UPTO: \
1052 case OP_MINUPTO: \
1053 case OP_UPTOI: \
1054 case OP_MINUPTOI: \
1055 case OP_NOTUPTO: \
1056 case OP_NOTMINUPTO: \
1057 case OP_NOTUPTOI: \
1058 case OP_NOTMINUPTOI:
1060 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1061 case OP_TYPEMINSTAR: \
1062 case OP_TYPEMINPLUS: \
1063 case OP_TYPEQUERY: \
1064 case OP_TYPEMINQUERY:
1066 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1067 case OP_TYPESTAR: \
1068 case OP_TYPEPLUS:
1070 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1071 case OP_TYPEUPTO: \
1072 case OP_TYPEMINUPTO:
1074 static void set_private_data_ptrs(compiler_common *common, int *private_data_start, pcre_uchar *ccend)
1076 pcre_uchar *cc = common->start;
1077 pcre_uchar *alternative;
1078 pcre_uchar *end = NULL;
1079 int private_data_ptr = *private_data_start;
1080 int space, size, bracketlen;
1082 while (cc < ccend)
1084 space = 0;
1085 size = 0;
1086 bracketlen = 0;
1087 if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1088 return;
1090 if (*cc == OP_ONCE || *cc == OP_ONCE_NC || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND)
1091 if (detect_repeat(common, cc))
1093 /* These brackets are converted to repeats, so no global
1094 based single character repeat is allowed. */
1095 if (cc >= end)
1096 end = bracketend(cc);
1099 switch(*cc)
1101 case OP_KET:
1102 if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1104 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1105 private_data_ptr += sizeof(sljit_sw);
1106 cc += common->private_data_ptrs[cc + 1 - common->start];
1108 cc += 1 + LINK_SIZE;
1109 break;
1111 case OP_ASSERT:
1112 case OP_ASSERT_NOT:
1113 case OP_ASSERTBACK:
1114 case OP_ASSERTBACK_NOT:
1115 case OP_ONCE:
1116 case OP_ONCE_NC:
1117 case OP_BRAPOS:
1118 case OP_SBRA:
1119 case OP_SBRAPOS:
1120 case OP_SCOND:
1121 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1122 private_data_ptr += sizeof(sljit_sw);
1123 bracketlen = 1 + LINK_SIZE;
1124 break;
1126 case OP_CBRAPOS:
1127 case OP_SCBRAPOS:
1128 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1129 private_data_ptr += sizeof(sljit_sw);
1130 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1131 break;
1133 case OP_COND:
1134 /* Might be a hidden SCOND. */
1135 alternative = cc + GET(cc, 1);
1136 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1138 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1139 private_data_ptr += sizeof(sljit_sw);
1141 bracketlen = 1 + LINK_SIZE;
1142 break;
1144 case OP_BRA:
1145 bracketlen = 1 + LINK_SIZE;
1146 break;
1148 case OP_CBRA:
1149 case OP_SCBRA:
1150 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1151 break;
1153 CASE_ITERATOR_PRIVATE_DATA_1
1154 space = 1;
1155 size = -2;
1156 break;
1158 CASE_ITERATOR_PRIVATE_DATA_2A
1159 space = 2;
1160 size = -2;
1161 break;
1163 CASE_ITERATOR_PRIVATE_DATA_2B
1164 space = 2;
1165 size = -(2 + IMM2_SIZE);
1166 break;
1168 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1169 space = 1;
1170 size = 1;
1171 break;
1173 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1174 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1175 space = 2;
1176 size = 1;
1177 break;
1179 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1180 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1181 space = 2;
1182 size = 1 + IMM2_SIZE;
1183 break;
1185 case OP_CLASS:
1186 case OP_NCLASS:
1187 size += 1 + 32 / sizeof(pcre_uchar);
1188 space = get_class_iterator_size(cc + size);
1189 break;
1191 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1192 case OP_XCLASS:
1193 size = GET(cc, 1);
1194 space = get_class_iterator_size(cc + size);
1195 break;
1196 #endif
1198 default:
1199 cc = next_opcode(common, cc);
1200 SLJIT_ASSERT(cc != NULL);
1201 break;
1204 /* Character iterators, which are not inside a repeated bracket,
1205 gets a private slot instead of allocating it on the stack. */
1206 if (space > 0 && cc >= end)
1208 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1209 private_data_ptr += sizeof(sljit_sw) * space;
1212 if (size != 0)
1214 if (size < 0)
1216 cc += -size;
1217 #ifdef SUPPORT_UTF
1218 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1219 #endif
1221 else
1222 cc += size;
1225 if (bracketlen > 0)
1227 if (cc >= end)
1229 end = bracketend(cc);
1230 if (end[-1 - LINK_SIZE] == OP_KET)
1231 end = NULL;
1233 cc += bracketlen;
1236 *private_data_start = private_data_ptr;
1239 /* Returns with a frame_types (always < 0) if no need for frame. */
1240 static int get_framesize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL recursive, BOOL* needs_control_head)
1242 int length = 0;
1243 int possessive = 0;
1244 BOOL stack_restore = FALSE;
1245 BOOL setsom_found = recursive;
1246 BOOL setmark_found = recursive;
1247 /* The last capture is a local variable even for recursions. */
1248 BOOL capture_last_found = FALSE;
1250 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1251 SLJIT_ASSERT(common->control_head_ptr != 0);
1252 *needs_control_head = TRUE;
1253 #else
1254 *needs_control_head = FALSE;
1255 #endif
1257 if (ccend == NULL)
1259 ccend = bracketend(cc) - (1 + LINK_SIZE);
1260 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1262 possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1263 /* This is correct regardless of common->capture_last_ptr. */
1264 capture_last_found = TRUE;
1266 cc = next_opcode(common, cc);
1269 SLJIT_ASSERT(cc != NULL);
1270 while (cc < ccend)
1271 switch(*cc)
1273 case OP_SET_SOM:
1274 SLJIT_ASSERT(common->has_set_som);
1275 stack_restore = TRUE;
1276 if (!setsom_found)
1278 length += 2;
1279 setsom_found = TRUE;
1281 cc += 1;
1282 break;
1284 case OP_MARK:
1285 case OP_PRUNE_ARG:
1286 case OP_THEN_ARG:
1287 SLJIT_ASSERT(common->mark_ptr != 0);
1288 stack_restore = TRUE;
1289 if (!setmark_found)
1291 length += 2;
1292 setmark_found = TRUE;
1294 if (common->control_head_ptr != 0)
1295 *needs_control_head = TRUE;
1296 cc += 1 + 2 + cc[1];
1297 break;
1299 case OP_RECURSE:
1300 stack_restore = TRUE;
1301 if (common->has_set_som && !setsom_found)
1303 length += 2;
1304 setsom_found = TRUE;
1306 if (common->mark_ptr != 0 && !setmark_found)
1308 length += 2;
1309 setmark_found = TRUE;
1311 if (common->capture_last_ptr != 0 && !capture_last_found)
1313 length += 2;
1314 capture_last_found = TRUE;
1316 cc += 1 + LINK_SIZE;
1317 break;
1319 case OP_CBRA:
1320 case OP_CBRAPOS:
1321 case OP_SCBRA:
1322 case OP_SCBRAPOS:
1323 stack_restore = TRUE;
1324 if (common->capture_last_ptr != 0 && !capture_last_found)
1326 length += 2;
1327 capture_last_found = TRUE;
1329 length += 3;
1330 cc += 1 + LINK_SIZE + IMM2_SIZE;
1331 break;
1333 default:
1334 stack_restore = TRUE;
1335 /* Fall through. */
1337 case OP_NOT_WORD_BOUNDARY:
1338 case OP_WORD_BOUNDARY:
1339 case OP_NOT_DIGIT:
1340 case OP_DIGIT:
1341 case OP_NOT_WHITESPACE:
1342 case OP_WHITESPACE:
1343 case OP_NOT_WORDCHAR:
1344 case OP_WORDCHAR:
1345 case OP_ANY:
1346 case OP_ALLANY:
1347 case OP_ANYBYTE:
1348 case OP_NOTPROP:
1349 case OP_PROP:
1350 case OP_ANYNL:
1351 case OP_NOT_HSPACE:
1352 case OP_HSPACE:
1353 case OP_NOT_VSPACE:
1354 case OP_VSPACE:
1355 case OP_EXTUNI:
1356 case OP_EODN:
1357 case OP_EOD:
1358 case OP_CIRC:
1359 case OP_CIRCM:
1360 case OP_DOLL:
1361 case OP_DOLLM:
1362 case OP_CHAR:
1363 case OP_CHARI:
1364 case OP_NOT:
1365 case OP_NOTI:
1367 case OP_EXACT:
1368 case OP_POSSTAR:
1369 case OP_POSPLUS:
1370 case OP_POSQUERY:
1371 case OP_POSUPTO:
1373 case OP_EXACTI:
1374 case OP_POSSTARI:
1375 case OP_POSPLUSI:
1376 case OP_POSQUERYI:
1377 case OP_POSUPTOI:
1379 case OP_NOTEXACT:
1380 case OP_NOTPOSSTAR:
1381 case OP_NOTPOSPLUS:
1382 case OP_NOTPOSQUERY:
1383 case OP_NOTPOSUPTO:
1385 case OP_NOTEXACTI:
1386 case OP_NOTPOSSTARI:
1387 case OP_NOTPOSPLUSI:
1388 case OP_NOTPOSQUERYI:
1389 case OP_NOTPOSUPTOI:
1391 case OP_TYPEEXACT:
1392 case OP_TYPEPOSSTAR:
1393 case OP_TYPEPOSPLUS:
1394 case OP_TYPEPOSQUERY:
1395 case OP_TYPEPOSUPTO:
1397 case OP_CLASS:
1398 case OP_NCLASS:
1399 case OP_XCLASS:
1401 cc = next_opcode(common, cc);
1402 SLJIT_ASSERT(cc != NULL);
1403 break;
1406 /* Possessive quantifiers can use a special case. */
1407 if (SLJIT_UNLIKELY(possessive == length))
1408 return stack_restore ? no_frame : no_stack;
1410 if (length > 0)
1411 return length + 1;
1412 return stack_restore ? no_frame : no_stack;
1415 static void init_frame(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, int stackpos, int stacktop, BOOL recursive)
1417 DEFINE_COMPILER;
1418 BOOL setsom_found = recursive;
1419 BOOL setmark_found = recursive;
1420 /* The last capture is a local variable even for recursions. */
1421 BOOL capture_last_found = FALSE;
1422 int offset;
1424 /* >= 1 + shortest item size (2) */
1425 SLJIT_UNUSED_ARG(stacktop);
1426 SLJIT_ASSERT(stackpos >= stacktop + 2);
1428 stackpos = STACK(stackpos);
1429 if (ccend == NULL)
1431 ccend = bracketend(cc) - (1 + LINK_SIZE);
1432 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1433 cc = next_opcode(common, cc);
1436 SLJIT_ASSERT(cc != NULL);
1437 while (cc < ccend)
1438 switch(*cc)
1440 case OP_SET_SOM:
1441 SLJIT_ASSERT(common->has_set_som);
1442 if (!setsom_found)
1444 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1445 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1446 stackpos += (int)sizeof(sljit_sw);
1447 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1448 stackpos += (int)sizeof(sljit_sw);
1449 setsom_found = TRUE;
1451 cc += 1;
1452 break;
1454 case OP_MARK:
1455 case OP_PRUNE_ARG:
1456 case OP_THEN_ARG:
1457 SLJIT_ASSERT(common->mark_ptr != 0);
1458 if (!setmark_found)
1460 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1461 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1462 stackpos += (int)sizeof(sljit_sw);
1463 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1464 stackpos += (int)sizeof(sljit_sw);
1465 setmark_found = TRUE;
1467 cc += 1 + 2 + cc[1];
1468 break;
1470 case OP_RECURSE:
1471 if (common->has_set_som && !setsom_found)
1473 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1474 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1475 stackpos += (int)sizeof(sljit_sw);
1476 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1477 stackpos += (int)sizeof(sljit_sw);
1478 setsom_found = TRUE;
1480 if (common->mark_ptr != 0 && !setmark_found)
1482 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1483 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1484 stackpos += (int)sizeof(sljit_sw);
1485 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1486 stackpos += (int)sizeof(sljit_sw);
1487 setmark_found = TRUE;
1489 if (common->capture_last_ptr != 0 && !capture_last_found)
1491 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
1492 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1493 stackpos += (int)sizeof(sljit_sw);
1494 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1495 stackpos += (int)sizeof(sljit_sw);
1496 capture_last_found = TRUE;
1498 cc += 1 + LINK_SIZE;
1499 break;
1501 case OP_CBRA:
1502 case OP_CBRAPOS:
1503 case OP_SCBRA:
1504 case OP_SCBRAPOS:
1505 if (common->capture_last_ptr != 0 && !capture_last_found)
1507 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
1508 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1509 stackpos += (int)sizeof(sljit_sw);
1510 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1511 stackpos += (int)sizeof(sljit_sw);
1512 capture_last_found = TRUE;
1514 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1515 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1516 stackpos += (int)sizeof(sljit_sw);
1517 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
1518 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
1519 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1520 stackpos += (int)sizeof(sljit_sw);
1521 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1522 stackpos += (int)sizeof(sljit_sw);
1524 cc += 1 + LINK_SIZE + IMM2_SIZE;
1525 break;
1527 default:
1528 cc = next_opcode(common, cc);
1529 SLJIT_ASSERT(cc != NULL);
1530 break;
1533 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
1534 SLJIT_ASSERT(stackpos == STACK(stacktop));
1537 static SLJIT_INLINE int get_private_data_copy_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL needs_control_head)
1539 int private_data_length = needs_control_head ? 3 : 2;
1540 int size;
1541 pcre_uchar *alternative;
1542 /* Calculate the sum of the private machine words. */
1543 while (cc < ccend)
1545 size = 0;
1546 switch(*cc)
1548 case OP_KET:
1549 if (PRIVATE_DATA(cc) != 0)
1550 private_data_length++;
1551 cc += 1 + LINK_SIZE;
1552 break;
1554 case OP_ASSERT:
1555 case OP_ASSERT_NOT:
1556 case OP_ASSERTBACK:
1557 case OP_ASSERTBACK_NOT:
1558 case OP_ONCE:
1559 case OP_ONCE_NC:
1560 case OP_BRAPOS:
1561 case OP_SBRA:
1562 case OP_SBRAPOS:
1563 case OP_SCOND:
1564 private_data_length++;
1565 cc += 1 + LINK_SIZE;
1566 break;
1568 case OP_CBRA:
1569 case OP_SCBRA:
1570 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1571 private_data_length++;
1572 cc += 1 + LINK_SIZE + IMM2_SIZE;
1573 break;
1575 case OP_CBRAPOS:
1576 case OP_SCBRAPOS:
1577 private_data_length += 2;
1578 cc += 1 + LINK_SIZE + IMM2_SIZE;
1579 break;
1581 case OP_COND:
1582 /* Might be a hidden SCOND. */
1583 alternative = cc + GET(cc, 1);
1584 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1585 private_data_length++;
1586 cc += 1 + LINK_SIZE;
1587 break;
1589 CASE_ITERATOR_PRIVATE_DATA_1
1590 if (PRIVATE_DATA(cc))
1591 private_data_length++;
1592 cc += 2;
1593 #ifdef SUPPORT_UTF
1594 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1595 #endif
1596 break;
1598 CASE_ITERATOR_PRIVATE_DATA_2A
1599 if (PRIVATE_DATA(cc))
1600 private_data_length += 2;
1601 cc += 2;
1602 #ifdef SUPPORT_UTF
1603 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1604 #endif
1605 break;
1607 CASE_ITERATOR_PRIVATE_DATA_2B
1608 if (PRIVATE_DATA(cc))
1609 private_data_length += 2;
1610 cc += 2 + IMM2_SIZE;
1611 #ifdef SUPPORT_UTF
1612 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1613 #endif
1614 break;
1616 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1617 if (PRIVATE_DATA(cc))
1618 private_data_length++;
1619 cc += 1;
1620 break;
1622 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1623 if (PRIVATE_DATA(cc))
1624 private_data_length += 2;
1625 cc += 1;
1626 break;
1628 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1629 if (PRIVATE_DATA(cc))
1630 private_data_length += 2;
1631 cc += 1 + IMM2_SIZE;
1632 break;
1634 case OP_CLASS:
1635 case OP_NCLASS:
1636 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1637 case OP_XCLASS:
1638 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1639 #else
1640 size = 1 + 32 / (int)sizeof(pcre_uchar);
1641 #endif
1642 if (PRIVATE_DATA(cc))
1643 private_data_length += get_class_iterator_size(cc + size);
1644 cc += size;
1645 break;
1647 default:
1648 cc = next_opcode(common, cc);
1649 SLJIT_ASSERT(cc != NULL);
1650 break;
1653 SLJIT_ASSERT(cc == ccend);
1654 return private_data_length;
1657 static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1658 BOOL save, int stackptr, int stacktop, BOOL needs_control_head)
1660 DEFINE_COMPILER;
1661 int srcw[2];
1662 int count, size;
1663 BOOL tmp1next = TRUE;
1664 BOOL tmp1empty = TRUE;
1665 BOOL tmp2empty = TRUE;
1666 pcre_uchar *alternative;
1667 enum {
1668 start,
1669 loop,
1671 } status;
1673 status = save ? start : loop;
1674 stackptr = STACK(stackptr - 2);
1675 stacktop = STACK(stacktop - 1);
1677 if (!save)
1679 stackptr += (needs_control_head ? 2 : 1) * sizeof(sljit_sw);
1680 if (stackptr < stacktop)
1682 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1683 stackptr += sizeof(sljit_sw);
1684 tmp1empty = FALSE;
1686 if (stackptr < stacktop)
1688 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1689 stackptr += sizeof(sljit_sw);
1690 tmp2empty = FALSE;
1692 /* The tmp1next must be TRUE in either way. */
1697 count = 0;
1698 switch(status)
1700 case start:
1701 SLJIT_ASSERT(save && common->recursive_head_ptr != 0);
1702 count = 1;
1703 srcw[0] = common->recursive_head_ptr;
1704 if (needs_control_head)
1706 SLJIT_ASSERT(common->control_head_ptr != 0);
1707 count = 2;
1708 srcw[1] = common->control_head_ptr;
1710 status = loop;
1711 break;
1713 case loop:
1714 if (cc >= ccend)
1716 status = end;
1717 break;
1720 switch(*cc)
1722 case OP_KET:
1723 if (PRIVATE_DATA(cc) != 0)
1725 count = 1;
1726 srcw[0] = PRIVATE_DATA(cc);
1728 cc += 1 + LINK_SIZE;
1729 break;
1731 case OP_ASSERT:
1732 case OP_ASSERT_NOT:
1733 case OP_ASSERTBACK:
1734 case OP_ASSERTBACK_NOT:
1735 case OP_ONCE:
1736 case OP_ONCE_NC:
1737 case OP_BRAPOS:
1738 case OP_SBRA:
1739 case OP_SBRAPOS:
1740 case OP_SCOND:
1741 count = 1;
1742 srcw[0] = PRIVATE_DATA(cc);
1743 SLJIT_ASSERT(srcw[0] != 0);
1744 cc += 1 + LINK_SIZE;
1745 break;
1747 case OP_CBRA:
1748 case OP_SCBRA:
1749 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1751 count = 1;
1752 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1754 cc += 1 + LINK_SIZE + IMM2_SIZE;
1755 break;
1757 case OP_CBRAPOS:
1758 case OP_SCBRAPOS:
1759 count = 2;
1760 srcw[0] = PRIVATE_DATA(cc);
1761 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1762 SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1763 cc += 1 + LINK_SIZE + IMM2_SIZE;
1764 break;
1766 case OP_COND:
1767 /* Might be a hidden SCOND. */
1768 alternative = cc + GET(cc, 1);
1769 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1771 count = 1;
1772 srcw[0] = PRIVATE_DATA(cc);
1773 SLJIT_ASSERT(srcw[0] != 0);
1775 cc += 1 + LINK_SIZE;
1776 break;
1778 CASE_ITERATOR_PRIVATE_DATA_1
1779 if (PRIVATE_DATA(cc))
1781 count = 1;
1782 srcw[0] = PRIVATE_DATA(cc);
1784 cc += 2;
1785 #ifdef SUPPORT_UTF
1786 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1787 #endif
1788 break;
1790 CASE_ITERATOR_PRIVATE_DATA_2A
1791 if (PRIVATE_DATA(cc))
1793 count = 2;
1794 srcw[0] = PRIVATE_DATA(cc);
1795 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1797 cc += 2;
1798 #ifdef SUPPORT_UTF
1799 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1800 #endif
1801 break;
1803 CASE_ITERATOR_PRIVATE_DATA_2B
1804 if (PRIVATE_DATA(cc))
1806 count = 2;
1807 srcw[0] = PRIVATE_DATA(cc);
1808 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1810 cc += 2 + IMM2_SIZE;
1811 #ifdef SUPPORT_UTF
1812 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1813 #endif
1814 break;
1816 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1817 if (PRIVATE_DATA(cc))
1819 count = 1;
1820 srcw[0] = PRIVATE_DATA(cc);
1822 cc += 1;
1823 break;
1825 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1826 if (PRIVATE_DATA(cc))
1828 count = 2;
1829 srcw[0] = PRIVATE_DATA(cc);
1830 srcw[1] = srcw[0] + sizeof(sljit_sw);
1832 cc += 1;
1833 break;
1835 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1836 if (PRIVATE_DATA(cc))
1838 count = 2;
1839 srcw[0] = PRIVATE_DATA(cc);
1840 srcw[1] = srcw[0] + sizeof(sljit_sw);
1842 cc += 1 + IMM2_SIZE;
1843 break;
1845 case OP_CLASS:
1846 case OP_NCLASS:
1847 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1848 case OP_XCLASS:
1849 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1850 #else
1851 size = 1 + 32 / (int)sizeof(pcre_uchar);
1852 #endif
1853 if (PRIVATE_DATA(cc))
1854 switch(get_class_iterator_size(cc + size))
1856 case 1:
1857 count = 1;
1858 srcw[0] = PRIVATE_DATA(cc);
1859 break;
1861 case 2:
1862 count = 2;
1863 srcw[0] = PRIVATE_DATA(cc);
1864 srcw[1] = srcw[0] + sizeof(sljit_sw);
1865 break;
1867 default:
1868 SLJIT_ASSERT_STOP();
1869 break;
1871 cc += size;
1872 break;
1874 default:
1875 cc = next_opcode(common, cc);
1876 SLJIT_ASSERT(cc != NULL);
1877 break;
1879 break;
1881 case end:
1882 SLJIT_ASSERT_STOP();
1883 break;
1886 while (count > 0)
1888 count--;
1889 if (save)
1891 if (tmp1next)
1893 if (!tmp1empty)
1895 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1896 stackptr += sizeof(sljit_sw);
1898 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1899 tmp1empty = FALSE;
1900 tmp1next = FALSE;
1902 else
1904 if (!tmp2empty)
1906 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1907 stackptr += sizeof(sljit_sw);
1909 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1910 tmp2empty = FALSE;
1911 tmp1next = TRUE;
1914 else
1916 if (tmp1next)
1918 SLJIT_ASSERT(!tmp1empty);
1919 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP1, 0);
1920 tmp1empty = stackptr >= stacktop;
1921 if (!tmp1empty)
1923 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1924 stackptr += sizeof(sljit_sw);
1926 tmp1next = FALSE;
1928 else
1930 SLJIT_ASSERT(!tmp2empty);
1931 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP2, 0);
1932 tmp2empty = stackptr >= stacktop;
1933 if (!tmp2empty)
1935 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1936 stackptr += sizeof(sljit_sw);
1938 tmp1next = TRUE;
1943 while (status != end);
1945 if (save)
1947 if (tmp1next)
1949 if (!tmp1empty)
1951 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1952 stackptr += sizeof(sljit_sw);
1954 if (!tmp2empty)
1956 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1957 stackptr += sizeof(sljit_sw);
1960 else
1962 if (!tmp2empty)
1964 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1965 stackptr += sizeof(sljit_sw);
1967 if (!tmp1empty)
1969 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1970 stackptr += sizeof(sljit_sw);
1974 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1977 static SLJIT_INLINE pcre_uchar *set_then_offsets(compiler_common *common, pcre_uchar *cc, pcre_uint8 *current_offset)
1979 pcre_uchar *end = bracketend(cc);
1980 BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
1982 /* Assert captures then. */
1983 if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
1984 current_offset = NULL;
1985 /* Conditional block does not. */
1986 if (*cc == OP_COND || *cc == OP_SCOND)
1987 has_alternatives = FALSE;
1989 cc = next_opcode(common, cc);
1990 if (has_alternatives)
1991 current_offset = common->then_offsets + (cc - common->start);
1993 while (cc < end)
1995 if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
1996 cc = set_then_offsets(common, cc, current_offset);
1997 else
1999 if (*cc == OP_ALT && has_alternatives)
2000 current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
2001 if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
2002 *current_offset = 1;
2003 cc = next_opcode(common, cc);
2007 return end;
2010 #undef CASE_ITERATOR_PRIVATE_DATA_1
2011 #undef CASE_ITERATOR_PRIVATE_DATA_2A
2012 #undef CASE_ITERATOR_PRIVATE_DATA_2B
2013 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2014 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2015 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2017 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
2019 return (value & (value - 1)) == 0;
2022 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
2024 while (list)
2026 /* sljit_set_label is clever enough to do nothing
2027 if either the jump or the label is NULL. */
2028 SET_LABEL(list->jump, label);
2029 list = list->next;
2033 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump* jump)
2035 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
2036 if (list_item)
2038 list_item->next = *list;
2039 list_item->jump = jump;
2040 *list = list_item;
2044 static void add_stub(compiler_common *common, struct sljit_jump *start)
2046 DEFINE_COMPILER;
2047 stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
2049 if (list_item)
2051 list_item->start = start;
2052 list_item->quit = LABEL();
2053 list_item->next = common->stubs;
2054 common->stubs = list_item;
2058 static void flush_stubs(compiler_common *common)
2060 DEFINE_COMPILER;
2061 stub_list* list_item = common->stubs;
2063 while (list_item)
2065 JUMPHERE(list_item->start);
2066 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
2067 JUMPTO(SLJIT_JUMP, list_item->quit);
2068 list_item = list_item->next;
2070 common->stubs = NULL;
2073 static void add_label_addr(compiler_common *common)
2075 DEFINE_COMPILER;
2076 label_addr_list *label_addr;
2078 label_addr = sljit_alloc_memory(compiler, sizeof(label_addr_list));
2079 if (label_addr == NULL)
2080 return;
2081 label_addr->label = LABEL();
2082 label_addr->addr = common->read_only_data_ptr;
2083 label_addr->next = common->label_addrs;
2084 common->label_addrs = label_addr;
2085 common->read_only_data_ptr++;
2088 static SLJIT_INLINE void count_match(compiler_common *common)
2090 DEFINE_COMPILER;
2092 OP2(SLJIT_SUB | SLJIT_SET_E, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
2093 add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));
2096 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
2098 /* May destroy all locals and registers except TMP2. */
2099 DEFINE_COMPILER;
2101 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2102 #ifdef DESTROY_REGISTERS
2103 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
2104 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2105 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
2106 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0);
2107 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
2108 #endif
2109 add_stub(common, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
2112 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
2114 DEFINE_COMPILER;
2115 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2118 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
2120 DEFINE_COMPILER;
2121 struct sljit_label *loop;
2122 int i;
2124 /* At this point we can freely use all temporary registers. */
2125 SLJIT_ASSERT(length > 1);
2126 /* TMP1 returns with begin - 1. */
2127 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
2128 if (length < 8)
2130 for (i = 1; i < length; i++)
2131 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_SCRATCH_REG1, 0);
2133 else
2135 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, OVECTOR_START);
2136 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, length - 1);
2137 loop = LABEL();
2138 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(sljit_sw), SLJIT_SCRATCH_REG1, 0);
2139 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 1);
2140 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2144 static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
2146 DEFINE_COMPILER;
2147 struct sljit_label *loop;
2148 int i;
2150 SLJIT_ASSERT(length > 1);
2151 /* OVECTOR(1) contains the "string begin - 1" constant. */
2152 if (length > 2)
2153 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
2154 if (length < 8)
2156 for (i = 2; i < length; i++)
2157 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), TMP1, 0);
2159 else
2161 GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
2162 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2163 loop = LABEL();
2164 OP1(SLJIT_MOVU, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
2165 OP2(SLJIT_SUB | SLJIT_SET_E, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2166 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2169 OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
2170 if (common->mark_ptr != 0)
2171 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, SLJIT_IMM, 0);
2172 if (common->control_head_ptr != 0)
2173 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
2174 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
2175 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_ptr);
2176 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, base));
2179 static sljit_sw SLJIT_CALL do_search_mark(sljit_sw *current, const pcre_uchar *skip_arg)
2181 while (current != NULL)
2183 switch (current[-2])
2185 case type_then_trap:
2186 break;
2188 case type_mark:
2189 if (STRCMP_UC_UC(skip_arg, (pcre_uchar *)current[-3]) == 0)
2190 return current[-4];
2191 break;
2193 default:
2194 SLJIT_ASSERT_STOP();
2195 break;
2197 current = (sljit_sw*)current[-1];
2199 return -1;
2202 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
2204 DEFINE_COMPILER;
2205 struct sljit_label *loop;
2206 struct sljit_jump *early_quit;
2208 /* At this point we can freely use all registers. */
2209 OP1(SLJIT_MOV, SLJIT_SAVED_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
2210 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0);
2212 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, ARGUMENTS, 0);
2213 if (common->mark_ptr != 0)
2214 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
2215 OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offset_count));
2216 if (common->mark_ptr != 0)
2217 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_SCRATCH_REG3, 0);
2218 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
2219 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, begin));
2220 GET_LOCAL_BASE(SLJIT_SAVED_REG1, 0, OVECTOR_START);
2221 /* Unlikely, but possible */
2222 early_quit = CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 0);
2223 loop = LABEL();
2224 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), 0, SLJIT_SCRATCH_REG1, 0);
2225 OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_SAVED_REG1, 0, SLJIT_IMM, sizeof(sljit_sw));
2226 /* Copy the integer value to the output buffer */
2227 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2228 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
2229 #endif
2230 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG3), sizeof(int), SLJIT_SAVED_REG2, 0);
2231 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
2232 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2233 JUMPHERE(early_quit);
2235 /* Calculate the return value, which is the maximum ovector value. */
2236 if (topbracket > 1)
2238 GET_LOCAL_BASE(SLJIT_SCRATCH_REG1, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
2239 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, topbracket + 1);
2241 /* OVECTOR(0) is never equal to SLJIT_SAVED_REG3. */
2242 loop = LABEL();
2243 OP1(SLJIT_MOVU, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), -(2 * (sljit_sw)sizeof(sljit_sw)));
2244 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
2245 CMPTO(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG3, 0, loop);
2246 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_SCRATCH_REG2, 0);
2248 else
2249 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
2252 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
2254 DEFINE_COMPILER;
2255 struct sljit_jump *jump;
2257 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_SAVED_REG2, str_end_must_be_saved_reg2);
2258 SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
2259 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
2261 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
2262 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
2263 OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, real_offset_count));
2264 CMPTO(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 2, quit);
2266 /* Store match begin and end. */
2267 OP1(SLJIT_MOV, SLJIT_SAVED_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, begin));
2268 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, offsets));
2270 jump = CMP(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 3);
2271 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_SAVED_REG1, 0);
2272 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2273 OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
2274 #endif
2275 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 2 * sizeof(int), SLJIT_SCRATCH_REG3, 0);
2276 JUMPHERE(jump);
2278 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
2279 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, STR_END, 0, SLJIT_SAVED_REG1, 0);
2280 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2281 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
2282 #endif
2283 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(int), SLJIT_SAVED_REG2, 0);
2285 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG1, 0);
2286 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2287 OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
2288 #endif
2289 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 0, SLJIT_SCRATCH_REG3, 0);
2291 JUMPTO(SLJIT_JUMP, quit);
2294 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
2296 /* May destroy TMP1. */
2297 DEFINE_COMPILER;
2298 struct sljit_jump *jump;
2300 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2302 /* The value of -1 must be kept for start_used_ptr! */
2303 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, 1);
2304 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
2305 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
2306 jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
2307 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2308 JUMPHERE(jump);
2310 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
2312 jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2313 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2314 JUMPHERE(jump);
2318 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc)
2320 /* Detects if the character has an othercase. */
2321 unsigned int c;
2323 #ifdef SUPPORT_UTF
2324 if (common->utf)
2326 GETCHAR(c, cc);
2327 if (c > 127)
2329 #ifdef SUPPORT_UCP
2330 return c != UCD_OTHERCASE(c);
2331 #else
2332 return FALSE;
2333 #endif
2335 #ifndef COMPILE_PCRE8
2336 return common->fcc[c] != c;
2337 #endif
2339 else
2340 #endif
2341 c = *cc;
2342 return MAX_255(c) ? common->fcc[c] != c : FALSE;
2345 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
2347 /* Returns with the othercase. */
2348 #ifdef SUPPORT_UTF
2349 if (common->utf && c > 127)
2351 #ifdef SUPPORT_UCP
2352 return UCD_OTHERCASE(c);
2353 #else
2354 return c;
2355 #endif
2357 #endif
2358 return TABLE_GET(c, common->fcc, c);
2361 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc)
2363 /* Detects if the character and its othercase has only 1 bit difference. */
2364 unsigned int c, oc, bit;
2365 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2366 int n;
2367 #endif
2369 #ifdef SUPPORT_UTF
2370 if (common->utf)
2372 GETCHAR(c, cc);
2373 if (c <= 127)
2374 oc = common->fcc[c];
2375 else
2377 #ifdef SUPPORT_UCP
2378 oc = UCD_OTHERCASE(c);
2379 #else
2380 oc = c;
2381 #endif
2384 else
2386 c = *cc;
2387 oc = TABLE_GET(c, common->fcc, c);
2389 #else
2390 c = *cc;
2391 oc = TABLE_GET(c, common->fcc, c);
2392 #endif
2394 SLJIT_ASSERT(c != oc);
2396 bit = c ^ oc;
2397 /* Optimized for English alphabet. */
2398 if (c <= 127 && bit == 0x20)
2399 return (0 << 8) | 0x20;
2401 /* Since c != oc, they must have at least 1 bit difference. */
2402 if (!is_powerof2(bit))
2403 return 0;
2405 #if defined COMPILE_PCRE8
2407 #ifdef SUPPORT_UTF
2408 if (common->utf && c > 127)
2410 n = GET_EXTRALEN(*cc);
2411 while ((bit & 0x3f) == 0)
2413 n--;
2414 bit >>= 6;
2416 return (n << 8) | bit;
2418 #endif /* SUPPORT_UTF */
2419 return (0 << 8) | bit;
2421 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2423 #ifdef SUPPORT_UTF
2424 if (common->utf && c > 65535)
2426 if (bit >= (1 << 10))
2427 bit >>= 10;
2428 else
2429 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2431 #endif /* SUPPORT_UTF */
2432 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2434 #endif /* COMPILE_PCRE[8|16|32] */
2437 static void check_partial(compiler_common *common, BOOL force)
2439 /* Checks whether a partial matching is occurred. Does not modify registers. */
2440 DEFINE_COMPILER;
2441 struct sljit_jump *jump = NULL;
2443 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2445 if (common->mode == JIT_COMPILE)
2446 return;
2448 if (!force)
2449 jump = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2450 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2451 jump = CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1);
2453 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2454 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2455 else
2457 if (common->partialmatchlabel != NULL)
2458 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2459 else
2460 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2463 if (jump != NULL)
2464 JUMPHERE(jump);
2467 static void check_str_end(compiler_common *common, jump_list **end_reached)
2469 /* Does not affect registers. Usually used in a tight spot. */
2470 DEFINE_COMPILER;
2471 struct sljit_jump *jump;
2473 if (common->mode == JIT_COMPILE)
2475 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2476 return;
2479 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2480 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2482 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2483 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2484 add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
2486 else
2488 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2489 if (common->partialmatchlabel != NULL)
2490 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2491 else
2492 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2494 JUMPHERE(jump);
2497 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2499 DEFINE_COMPILER;
2500 struct sljit_jump *jump;
2502 if (common->mode == JIT_COMPILE)
2504 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2505 return;
2508 /* Partial matching mode. */
2509 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2510 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2511 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2513 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2514 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2516 else
2518 if (common->partialmatchlabel != NULL)
2519 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2520 else
2521 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2523 JUMPHERE(jump);
2526 static void peek_char(compiler_common *common, pcre_uint32 max)
2528 /* Reads the character into TMP1, keeps STR_PTR.
2529 Does not check STR_END. TMP2 Destroyed. */
2530 DEFINE_COMPILER;
2531 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2532 struct sljit_jump *jump;
2533 #endif
2535 SLJIT_UNUSED_ARG(max);
2537 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2538 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2539 if (common->utf)
2541 if (max < 128) return;
2543 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2544 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2545 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2546 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2547 JUMPHERE(jump);
2549 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2551 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2552 if (common->utf)
2554 if (max < 0xd800) return;
2556 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2557 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2558 /* TMP2 contains the high surrogate. */
2559 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2560 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2561 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2562 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2563 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2564 JUMPHERE(jump);
2566 #endif
2569 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2571 static BOOL is_char7_bitset(const pcre_uint8 *bitset, BOOL nclass)
2573 /* Tells whether the character codes below 128 are enough
2574 to determine a match. */
2575 const pcre_uint8 value = nclass ? 0xff : 0;
2576 const pcre_uint8* end = bitset + 32;
2578 bitset += 16;
2581 if (*bitset++ != value)
2582 return FALSE;
2584 while (bitset < end);
2585 return TRUE;
2588 static void read_char7_type(compiler_common *common, BOOL full_read)
2590 /* Reads the precise character type of a character into TMP1, if the character
2591 is less than 128. Otherwise it returns with zero. Does not check STR_END. The
2592 full_read argument tells whether characters above max are accepted or not. */
2593 DEFINE_COMPILER;
2594 struct sljit_jump *jump;
2596 SLJIT_ASSERT(common->utf);
2598 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2599 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2601 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2603 if (full_read)
2605 jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2606 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2607 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2608 JUMPHERE(jump);
2612 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2614 static void read_char_range(compiler_common *common, pcre_uint32 min, pcre_uint32 max, BOOL update_str_ptr)
2616 /* Reads the precise value of a character into TMP1, if the character is
2617 between min and max (c >= min && c <= max). Otherwise it returns with a value
2618 outside the range. Does not check STR_END. */
2619 DEFINE_COMPILER;
2620 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2621 struct sljit_jump *jump;
2622 #endif
2623 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2624 struct sljit_jump *jump2;
2625 #endif
2627 SLJIT_UNUSED_ARG(update_str_ptr);
2628 SLJIT_UNUSED_ARG(min);
2629 SLJIT_UNUSED_ARG(max);
2630 SLJIT_ASSERT(min <= max);
2632 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2633 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2635 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2636 if (common->utf)
2638 if (max < 128 && !update_str_ptr) return;
2640 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2641 if (min >= 0x10000)
2643 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
2644 if (update_str_ptr)
2645 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2646 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2647 jump2 = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
2648 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2649 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2650 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2651 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2652 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2653 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2654 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2655 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2656 if (!update_str_ptr)
2657 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2658 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2659 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2660 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2661 JUMPHERE(jump2);
2662 if (update_str_ptr)
2663 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2665 else if (min >= 0x800 && max <= 0xffff)
2667 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
2668 if (update_str_ptr)
2669 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2670 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2671 jump2 = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
2672 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2673 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2674 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2675 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2676 if (!update_str_ptr)
2677 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2678 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2679 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2680 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2681 JUMPHERE(jump2);
2682 if (update_str_ptr)
2683 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2685 else if (max >= 0x800)
2686 add_jump(compiler, (max < 0x10000) ? &common->utfreadchar16 : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2687 else if (max < 128)
2689 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2690 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2692 else
2694 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2695 if (!update_str_ptr)
2696 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2697 else
2698 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2699 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2700 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2701 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2702 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2703 if (update_str_ptr)
2704 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2706 JUMPHERE(jump);
2708 #endif
2710 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2711 if (common->utf)
2713 if (max >= 0x10000)
2715 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2716 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2717 /* TMP2 contains the high surrogate. */
2718 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2719 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2720 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2721 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2722 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2723 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2724 JUMPHERE(jump);
2725 return;
2728 if (max < 0xd800 && !update_str_ptr) return;
2730 /* Skip low surrogate if necessary. */
2731 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2732 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2733 if (update_str_ptr)
2734 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2735 if (max >= 0xd800)
2736 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
2737 JUMPHERE(jump);
2739 #endif
2742 static SLJIT_INLINE void read_char(compiler_common *common)
2744 read_char_range(common, 0, READ_CHAR_MAX, TRUE);
2747 static void read_char8_type(compiler_common *common, BOOL update_str_ptr)
2749 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
2750 DEFINE_COMPILER;
2751 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2752 struct sljit_jump *jump;
2753 #endif
2754 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2755 struct sljit_jump *jump2;
2756 #endif
2758 SLJIT_UNUSED_ARG(update_str_ptr);
2760 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2761 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2763 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2764 if (common->utf)
2766 /* This can be an extra read in some situations, but hopefully
2767 it is needed in most cases. */
2768 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2769 jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2770 if (!update_str_ptr)
2772 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2773 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2774 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2775 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2776 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2777 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2778 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2779 jump2 = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2780 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2781 JUMPHERE(jump2);
2783 else
2784 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
2785 JUMPHERE(jump);
2786 return;
2788 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2790 #if !defined COMPILE_PCRE8
2791 /* The ctypes array contains only 256 values. */
2792 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2793 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2794 #endif
2795 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2796 #if !defined COMPILE_PCRE8
2797 JUMPHERE(jump);
2798 #endif
2800 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2801 if (common->utf && update_str_ptr)
2803 /* Skip low surrogate if necessary. */
2804 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2805 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2806 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2807 JUMPHERE(jump);
2809 #endif /* SUPPORT_UTF && COMPILE_PCRE16 */
2812 static void skip_char_back(compiler_common *common)
2814 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
2815 DEFINE_COMPILER;
2816 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2817 #if defined COMPILE_PCRE8
2818 struct sljit_label *label;
2820 if (common->utf)
2822 label = LABEL();
2823 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2824 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2825 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
2826 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
2827 return;
2829 #elif defined COMPILE_PCRE16
2830 if (common->utf)
2832 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2833 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2834 /* Skip low surrogate if necessary. */
2835 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2836 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
2837 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2838 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2839 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2840 return;
2842 #endif /* COMPILE_PCRE[8|16] */
2843 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2844 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2847 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
2849 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2850 DEFINE_COMPILER;
2851 struct sljit_jump *jump;
2853 if (nltype == NLTYPE_ANY)
2855 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2856 add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2858 else if (nltype == NLTYPE_ANYCRLF)
2860 if (jumpifmatch)
2862 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
2863 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
2865 else
2867 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
2868 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
2869 JUMPHERE(jump);
2872 else
2874 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
2875 add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2879 #ifdef SUPPORT_UTF
2881 #if defined COMPILE_PCRE8
2882 static void do_utfreadchar(compiler_common *common)
2884 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2885 of the character (>= 0xc0). Return char value in TMP1, length in TMP2. */
2886 DEFINE_COMPILER;
2887 struct sljit_jump *jump;
2889 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2890 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2891 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2892 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2893 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2894 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2896 /* Searching for the first zero. */
2897 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
2898 jump = JUMP(SLJIT_C_NOT_ZERO);
2899 /* Two byte sequence. */
2900 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2901 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2902 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2904 JUMPHERE(jump);
2905 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2906 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
2907 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2908 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2909 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2911 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2912 jump = JUMP(SLJIT_C_NOT_ZERO);
2913 /* Three byte sequence. */
2914 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2915 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
2916 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2918 /* Four byte sequence. */
2919 JUMPHERE(jump);
2920 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2921 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2922 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2923 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2924 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2925 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2926 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(4));
2927 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2930 static void do_utfreadchar16(compiler_common *common)
2932 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2933 of the character (>= 0xc0). Return value in TMP1. */
2934 DEFINE_COMPILER;
2935 struct sljit_jump *jump;
2937 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2938 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2939 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2940 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2941 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2942 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2944 /* Searching for the first zero. */
2945 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
2946 jump = JUMP(SLJIT_C_NOT_ZERO);
2947 /* Two byte sequence. */
2948 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2949 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2951 JUMPHERE(jump);
2952 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
2953 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_NOT_ZERO);
2954 /* This code runs only in 8 bit mode. No need to shift the value. */
2955 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2956 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2957 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
2958 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2959 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2960 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2961 /* Three byte sequence. */
2962 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2963 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2966 static void do_utfreadtype8(compiler_common *common)
2968 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
2969 of the character (>= 0xc0). Return value in TMP1. */
2970 DEFINE_COMPILER;
2971 struct sljit_jump *jump;
2972 struct sljit_jump *compare;
2974 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2976 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
2977 jump = JUMP(SLJIT_C_NOT_ZERO);
2978 /* Two byte sequence. */
2979 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2980 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2981 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
2982 /* The upper 5 bits are known at this point. */
2983 compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
2984 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2985 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2986 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2987 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2988 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2990 JUMPHERE(compare);
2991 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2992 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2994 /* We only have types for characters less than 256. */
2995 JUMPHERE(jump);
2996 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2997 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2998 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2999 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3002 #endif /* COMPILE_PCRE8 */
3004 #endif /* SUPPORT_UTF */
3006 #ifdef SUPPORT_UCP
3008 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
3009 #define UCD_BLOCK_MASK 127
3010 #define UCD_BLOCK_SHIFT 7
3012 static void do_getucd(compiler_common *common)
3014 /* Search the UCD record for the character comes in TMP1.
3015 Returns chartype in TMP1 and UCD offset in TMP2. */
3016 DEFINE_COMPILER;
3018 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
3020 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3021 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3022 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
3023 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
3024 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3025 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3026 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
3027 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
3028 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
3029 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
3030 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3032 #endif
3034 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
3036 DEFINE_COMPILER;
3037 struct sljit_label *mainloop;
3038 struct sljit_label *newlinelabel = NULL;
3039 struct sljit_jump *start;
3040 struct sljit_jump *end = NULL;
3041 struct sljit_jump *nl = NULL;
3042 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3043 struct sljit_jump *singlechar;
3044 #endif
3045 jump_list *newline = NULL;
3046 BOOL newlinecheck = FALSE;
3047 BOOL readuchar = FALSE;
3049 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
3050 common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
3051 newlinecheck = TRUE;
3053 if (firstline)
3055 /* Search for the end of the first line. */
3056 SLJIT_ASSERT(common->first_line_end != 0);
3057 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
3059 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3061 mainloop = LABEL();
3062 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3063 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3064 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3065 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3066 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
3067 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
3068 JUMPHERE(end);
3069 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3071 else
3073 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3074 mainloop = LABEL();
3075 /* Continual stores does not cause data dependency. */
3076 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
3077 read_char_range(common, common->nlmin, common->nlmax, TRUE);
3078 check_newlinechar(common, common->nltype, &newline, TRUE);
3079 CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
3080 JUMPHERE(end);
3081 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
3082 set_jumps(newline, LABEL());
3085 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
3088 start = JUMP(SLJIT_JUMP);
3090 if (newlinecheck)
3092 newlinelabel = LABEL();
3093 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3094 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3095 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3096 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
3097 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3098 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3099 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3100 #endif
3101 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3102 nl = JUMP(SLJIT_JUMP);
3105 mainloop = LABEL();
3107 /* Increasing the STR_PTR here requires one less jump in the most common case. */
3108 #ifdef SUPPORT_UTF
3109 if (common->utf) readuchar = TRUE;
3110 #endif
3111 if (newlinecheck) readuchar = TRUE;
3113 if (readuchar)
3114 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3116 if (newlinecheck)
3117 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
3119 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3120 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3121 #if defined COMPILE_PCRE8
3122 if (common->utf)
3124 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3125 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3126 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3127 JUMPHERE(singlechar);
3129 #elif defined COMPILE_PCRE16
3130 if (common->utf)
3132 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3133 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3134 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3135 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3136 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3137 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3138 JUMPHERE(singlechar);
3140 #endif /* COMPILE_PCRE[8|16] */
3141 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
3142 JUMPHERE(start);
3144 if (newlinecheck)
3146 JUMPHERE(end);
3147 JUMPHERE(nl);
3150 return mainloop;
3153 #define MAX_N_CHARS 16
3154 #define MAX_N_BYTES 8
3156 static SLJIT_INLINE void add_prefix_byte(pcre_uint8 byte, pcre_uint8 *bytes)
3158 pcre_uint8 len = bytes[0];
3159 int i;
3161 if (len == 255)
3162 return;
3164 if (len == 0)
3166 bytes[0] = 1;
3167 bytes[1] = byte;
3168 return;
3171 for (i = len; i > 0; i--)
3172 if (bytes[i] == byte)
3173 return;
3175 if (len >= MAX_N_BYTES - 1)
3177 bytes[0] = 255;
3178 return;
3181 len++;
3182 bytes[len] = byte;
3183 bytes[0] = len;
3186 static int scan_prefix(compiler_common *common, pcre_uchar *cc, pcre_uint32 *chars, pcre_uint8 *bytes, int max_chars)
3188 /* Recursive function, which scans prefix literals. */
3189 BOOL last, any, caseless;
3190 int len, repeat, len_save, consumed = 0;
3191 pcre_uint32 chr, mask;
3192 pcre_uchar *alternative, *cc_save, *oc;
3193 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3194 pcre_uchar othercase[8];
3195 #elif defined SUPPORT_UTF && defined COMPILE_PCRE16
3196 pcre_uchar othercase[2];
3197 #else
3198 pcre_uchar othercase[1];
3199 #endif
3201 repeat = 1;
3202 while (TRUE)
3204 last = TRUE;
3205 any = FALSE;
3206 caseless = FALSE;
3207 switch (*cc)
3209 case OP_CHARI:
3210 caseless = TRUE;
3211 case OP_CHAR:
3212 last = FALSE;
3213 cc++;
3214 break;
3216 case OP_SOD:
3217 case OP_SOM:
3218 case OP_SET_SOM:
3219 case OP_NOT_WORD_BOUNDARY:
3220 case OP_WORD_BOUNDARY:
3221 case OP_EODN:
3222 case OP_EOD:
3223 case OP_CIRC:
3224 case OP_CIRCM:
3225 case OP_DOLL:
3226 case OP_DOLLM:
3227 /* Zero width assertions. */
3228 cc++;
3229 continue;
3231 case OP_ASSERT:
3232 case OP_ASSERT_NOT:
3233 case OP_ASSERTBACK:
3234 case OP_ASSERTBACK_NOT:
3235 cc = bracketend(cc);
3236 continue;
3238 case OP_PLUSI:
3239 case OP_MINPLUSI:
3240 case OP_POSPLUSI:
3241 caseless = TRUE;
3242 case OP_PLUS:
3243 case OP_MINPLUS:
3244 case OP_POSPLUS:
3245 cc++;
3246 break;
3248 case OP_EXACTI:
3249 caseless = TRUE;
3250 case OP_EXACT:
3251 repeat = GET2(cc, 1);
3252 last = FALSE;
3253 cc += 1 + IMM2_SIZE;
3254 break;
3256 case OP_QUERYI:
3257 case OP_MINQUERYI:
3258 case OP_POSQUERYI:
3259 caseless = TRUE;
3260 case OP_QUERY:
3261 case OP_MINQUERY:
3262 case OP_POSQUERY:
3263 len = 1;
3264 cc++;
3265 #ifdef SUPPORT_UTF
3266 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3267 #endif
3268 max_chars = scan_prefix(common, cc + len, chars, bytes, max_chars);
3269 if (max_chars == 0)
3270 return consumed;
3271 last = FALSE;
3272 break;
3274 case OP_KET:
3275 cc += 1 + LINK_SIZE;
3276 continue;
3278 case OP_ALT:
3279 cc += GET(cc, 1);
3280 continue;
3282 case OP_ONCE:
3283 case OP_ONCE_NC:
3284 case OP_BRA:
3285 case OP_BRAPOS:
3286 case OP_CBRA:
3287 case OP_CBRAPOS:
3288 alternative = cc + GET(cc, 1);
3289 while (*alternative == OP_ALT)
3291 max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, bytes, max_chars);
3292 if (max_chars == 0)
3293 return consumed;
3294 alternative += GET(alternative, 1);
3297 if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
3298 cc += IMM2_SIZE;
3299 cc += 1 + LINK_SIZE;
3300 continue;
3302 case OP_CLASS:
3303 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3304 if (common->utf && !is_char7_bitset((const pcre_uint8 *)(cc + 1), FALSE)) return consumed;
3305 #endif
3306 any = TRUE;
3307 cc += 1 + 32 / sizeof(pcre_uchar);
3308 break;
3310 case OP_NCLASS:
3311 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3312 if (common->utf) return consumed;
3313 #endif
3314 any = TRUE;
3315 cc += 1 + 32 / sizeof(pcre_uchar);
3316 break;
3318 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3319 case OP_XCLASS:
3320 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3321 if (common->utf) return consumed;
3322 #endif
3323 any = TRUE;
3324 cc += GET(cc, 1);
3325 break;
3326 #endif
3328 case OP_DIGIT:
3329 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3330 if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
3331 return consumed;
3332 #endif
3333 any = TRUE;
3334 cc++;
3335 break;
3337 case OP_WHITESPACE:
3338 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3339 if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_space, FALSE))
3340 return consumed;
3341 #endif
3342 any = TRUE;
3343 cc++;
3344 break;
3346 case OP_WORDCHAR:
3347 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3348 if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_word, FALSE))
3349 return consumed;
3350 #endif
3351 any = TRUE;
3352 cc++;
3353 break;
3355 case OP_NOT:
3356 case OP_NOTI:
3357 cc++;
3358 /* Fall through. */
3359 case OP_NOT_DIGIT:
3360 case OP_NOT_WHITESPACE:
3361 case OP_NOT_WORDCHAR:
3362 case OP_ANY:
3363 case OP_ALLANY:
3364 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3365 if (common->utf) return consumed;
3366 #endif
3367 any = TRUE;
3368 cc++;
3369 break;
3371 #ifdef SUPPORT_UCP
3372 case OP_NOTPROP:
3373 case OP_PROP:
3374 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3375 if (common->utf) return consumed;
3376 #endif
3377 any = TRUE;
3378 cc += 1 + 2;
3379 break;
3380 #endif
3382 case OP_TYPEEXACT:
3383 repeat = GET2(cc, 1);
3384 cc += 1 + IMM2_SIZE;
3385 continue;
3387 case OP_NOTEXACT:
3388 case OP_NOTEXACTI:
3389 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3390 if (common->utf) return consumed;
3391 #endif
3392 any = TRUE;
3393 repeat = GET2(cc, 1);
3394 cc += 1 + IMM2_SIZE + 1;
3395 break;
3397 default:
3398 return consumed;
3401 if (any)
3403 #if defined COMPILE_PCRE8
3404 mask = 0xff;
3405 #elif defined COMPILE_PCRE16
3406 mask = 0xffff;
3407 #elif defined COMPILE_PCRE32
3408 mask = 0xffffffff;
3409 #else
3410 SLJIT_ASSERT_STOP();
3411 #endif
3415 chars[0] = mask;
3416 chars[1] = mask;
3417 bytes[0] = 255;
3419 consumed++;
3420 if (--max_chars == 0)
3421 return consumed;
3422 chars += 2;
3423 bytes += MAX_N_BYTES;
3425 while (--repeat > 0);
3427 repeat = 1;
3428 continue;
3431 len = 1;
3432 #ifdef SUPPORT_UTF
3433 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3434 #endif
3436 if (caseless && char_has_othercase(common, cc))
3438 #ifdef SUPPORT_UTF
3439 if (common->utf)
3441 GETCHAR(chr, cc);
3442 if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len)
3443 return consumed;
3445 else
3446 #endif
3448 chr = *cc;
3449 othercase[0] = TABLE_GET(chr, common->fcc, chr);
3452 else
3453 caseless = FALSE;
3455 len_save = len;
3456 cc_save = cc;
3457 while (TRUE)
3459 oc = othercase;
3462 chr = *cc;
3463 #ifdef COMPILE_PCRE32
3464 if (SLJIT_UNLIKELY(chr == NOTACHAR))
3465 return consumed;
3466 #endif
3467 add_prefix_byte((pcre_uint8)chr, bytes);
3469 mask = 0;
3470 if (caseless)
3472 add_prefix_byte((pcre_uint8)*oc, bytes);
3473 mask = *cc ^ *oc;
3474 chr |= mask;
3477 #ifdef COMPILE_PCRE32
3478 if (chars[0] == NOTACHAR && chars[1] == 0)
3479 #else
3480 if (chars[0] == NOTACHAR)
3481 #endif
3483 chars[0] = chr;
3484 chars[1] = mask;
3486 else
3488 mask |= chars[0] ^ chr;
3489 chr |= mask;
3490 chars[0] = chr;
3491 chars[1] |= mask;
3494 len--;
3495 consumed++;
3496 if (--max_chars == 0)
3497 return consumed;
3498 chars += 2;
3499 bytes += MAX_N_BYTES;
3500 cc++;
3501 oc++;
3503 while (len > 0);
3505 if (--repeat == 0)
3506 break;
3508 len = len_save;
3509 cc = cc_save;
3512 repeat = 1;
3513 if (last)
3514 return consumed;
3518 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
3520 DEFINE_COMPILER;
3521 struct sljit_label *start;
3522 struct sljit_jump *quit;
3523 pcre_uint32 chars[MAX_N_CHARS * 2];
3524 pcre_uint8 bytes[MAX_N_CHARS * MAX_N_BYTES];
3525 pcre_uint8 ones[MAX_N_CHARS];
3526 int offsets[3];
3527 pcre_uint32 mask;
3528 pcre_uint8 *byte_set, *byte_set_end;
3529 int i, max, from;
3530 int range_right = -1, range_len = 3 - 1;
3531 sljit_ub *update_table = NULL;
3532 BOOL in_range;
3534 /* This is even TRUE, if both are NULL. */
3535 SLJIT_ASSERT(common->read_only_data_ptr == common->read_only_data);
3537 for (i = 0; i < MAX_N_CHARS; i++)
3539 chars[i << 1] = NOTACHAR;
3540 chars[(i << 1) + 1] = 0;
3541 bytes[i * MAX_N_BYTES] = 0;
3544 max = scan_prefix(common, common->start, chars, bytes, MAX_N_CHARS);
3546 if (max <= 1)
3547 return FALSE;
3549 for (i = 0; i < max; i++)
3551 mask = chars[(i << 1) + 1];
3552 ones[i] = ones_in_half_byte[mask & 0xf];
3553 mask >>= 4;
3554 while (mask != 0)
3556 ones[i] += ones_in_half_byte[mask & 0xf];
3557 mask >>= 4;
3561 in_range = FALSE;
3562 from = 0; /* Prevent compiler "uninitialized" warning */
3563 for (i = 0; i <= max; i++)
3565 if (in_range && (i - from) > range_len && (bytes[(i - 1) * MAX_N_BYTES] <= 4))
3567 range_len = i - from;
3568 range_right = i - 1;
3571 if (i < max && bytes[i * MAX_N_BYTES] < 255)
3573 if (!in_range)
3575 in_range = TRUE;
3576 from = i;
3579 else if (in_range)
3580 in_range = FALSE;
3583 if (range_right >= 0)
3585 /* Since no data is consumed (see the assert in the beginning
3586 of this function), this space can be reallocated. */
3587 if (common->read_only_data)
3588 SLJIT_FREE(common->read_only_data);
3590 common->read_only_data_size += 256;
3591 common->read_only_data = (sljit_uw *)SLJIT_MALLOC(common->read_only_data_size);
3592 if (common->read_only_data == NULL)
3593 return TRUE;
3595 update_table = (sljit_ub *)common->read_only_data;
3596 common->read_only_data_ptr = (sljit_uw *)(update_table + 256);
3597 memset(update_table, IN_UCHARS(range_len), 256);
3599 for (i = 0; i < range_len; i++)
3601 byte_set = bytes + ((range_right - i) * MAX_N_BYTES);
3602 SLJIT_ASSERT(byte_set[0] > 0 && byte_set[0] < 255);
3603 byte_set_end = byte_set + byte_set[0];
3604 byte_set++;
3605 while (byte_set <= byte_set_end)
3607 if (update_table[*byte_set] > IN_UCHARS(i))
3608 update_table[*byte_set] = IN_UCHARS(i);
3609 byte_set++;
3614 offsets[0] = -1;
3615 /* Scan forward. */
3616 for (i = 0; i < max; i++)
3617 if (ones[i] <= 2) {
3618 offsets[0] = i;
3619 break;
3622 if (offsets[0] < 0 && range_right < 0)
3623 return FALSE;
3625 if (offsets[0] >= 0)
3627 /* Scan backward. */
3628 offsets[1] = -1;
3629 for (i = max - 1; i > offsets[0]; i--)
3630 if (ones[i] <= 2 && i != range_right)
3632 offsets[1] = i;
3633 break;
3636 /* This case is handled better by fast_forward_first_char. */
3637 if (offsets[1] == -1 && offsets[0] == 0 && range_right < 0)
3638 return FALSE;
3640 offsets[2] = -1;
3641 /* We only search for a middle character if there is no range check. */
3642 if (offsets[1] >= 0 && range_right == -1)
3644 /* Scan from middle. */
3645 for (i = (offsets[0] + offsets[1]) / 2 + 1; i < offsets[1]; i++)
3646 if (ones[i] <= 2)
3648 offsets[2] = i;
3649 break;
3652 if (offsets[2] == -1)
3654 for (i = (offsets[0] + offsets[1]) / 2; i > offsets[0]; i--)
3655 if (ones[i] <= 2)
3657 offsets[2] = i;
3658 break;
3663 SLJIT_ASSERT(offsets[1] == -1 || (offsets[0] < offsets[1]));
3664 SLJIT_ASSERT(offsets[2] == -1 || (offsets[0] < offsets[2] && offsets[1] > offsets[2]));
3666 chars[0] = chars[offsets[0] << 1];
3667 chars[1] = chars[(offsets[0] << 1) + 1];
3668 if (offsets[2] >= 0)
3670 chars[2] = chars[offsets[2] << 1];
3671 chars[3] = chars[(offsets[2] << 1) + 1];
3673 if (offsets[1] >= 0)
3675 chars[4] = chars[offsets[1] << 1];
3676 chars[5] = chars[(offsets[1] << 1) + 1];
3680 max -= 1;
3681 if (firstline)
3683 SLJIT_ASSERT(common->first_line_end != 0);
3684 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3685 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3686 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3687 quit = CMP(SLJIT_C_LESS_EQUAL, STR_END, 0, TMP1, 0);
3688 OP1(SLJIT_MOV, STR_END, 0, TMP1, 0);
3689 JUMPHERE(quit);
3691 else
3692 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3694 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3695 if (range_right >= 0)
3696 OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);
3697 #endif
3699 start = LABEL();
3700 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3702 SLJIT_ASSERT(range_right >= 0 || offsets[0] >= 0);
3704 if (range_right >= 0)
3706 #if defined COMPILE_PCRE8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
3707 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));
3708 #else
3709 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);
3710 #endif
3712 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3713 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0);
3714 #else
3715 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table);
3716 #endif
3717 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3718 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);
3721 if (offsets[0] >= 0)
3723 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[0]));
3724 if (offsets[1] >= 0)
3725 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[1]));
3726 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3728 if (chars[1] != 0)
3729 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);
3730 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);
3731 if (offsets[2] >= 0)
3732 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[2] - 1));
3734 if (offsets[1] >= 0)
3736 if (chars[5] != 0)
3737 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[5]);
3738 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[4], start);
3741 if (offsets[2] >= 0)
3743 if (chars[3] != 0)
3744 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[3]);
3745 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[2], start);
3747 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3750 JUMPHERE(quit);
3752 if (firstline)
3754 if (range_right >= 0)
3755 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3756 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3757 if (range_right >= 0)
3759 quit = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
3760 OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
3761 JUMPHERE(quit);
3764 else
3765 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3766 return TRUE;
3769 #undef MAX_N_CHARS
3770 #undef MAX_N_BYTES
3772 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
3774 DEFINE_COMPILER;
3775 struct sljit_label *start;
3776 struct sljit_jump *quit;
3777 struct sljit_jump *found;
3778 pcre_uchar oc, bit;
3780 if (firstline)
3782 SLJIT_ASSERT(common->first_line_end != 0);
3783 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3784 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3787 start = LABEL();
3788 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3789 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3791 oc = first_char;
3792 if (caseless)
3794 oc = TABLE_GET(first_char, common->fcc, first_char);
3795 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3796 if (first_char > 127 && common->utf)
3797 oc = UCD_OTHERCASE(first_char);
3798 #endif
3800 if (first_char == oc)
3801 found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
3802 else
3804 bit = first_char ^ oc;
3805 if (is_powerof2(bit))
3807 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
3808 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
3810 else
3812 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
3813 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3814 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
3815 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3816 found = JUMP(SLJIT_C_NOT_ZERO);
3820 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3821 JUMPTO(SLJIT_JUMP, start);
3822 JUMPHERE(found);
3823 JUMPHERE(quit);
3825 if (firstline)
3826 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3829 static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
3831 DEFINE_COMPILER;
3832 struct sljit_label *loop;
3833 struct sljit_jump *lastchar;
3834 struct sljit_jump *firstchar;
3835 struct sljit_jump *quit;
3836 struct sljit_jump *foundcr = NULL;
3837 struct sljit_jump *notfoundnl;
3838 jump_list *newline = NULL;
3840 if (firstline)
3842 SLJIT_ASSERT(common->first_line_end != 0);
3843 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3844 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3847 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3849 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3850 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3851 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3852 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3853 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3855 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
3856 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
3857 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER_EQUAL);
3858 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3859 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
3860 #endif
3861 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3863 loop = LABEL();
3864 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3865 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3866 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
3867 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3868 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
3869 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
3871 JUMPHERE(quit);
3872 JUMPHERE(firstchar);
3873 JUMPHERE(lastchar);
3875 if (firstline)
3876 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3877 return;
3880 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3881 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3882 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3883 skip_char_back(common);
3885 loop = LABEL();
3886 common->ff_newline_shortcut = loop;
3888 read_char_range(common, common->nlmin, common->nlmax, TRUE);
3889 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3890 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3891 foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3892 check_newlinechar(common, common->nltype, &newline, FALSE);
3893 set_jumps(newline, loop);
3895 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3897 quit = JUMP(SLJIT_JUMP);
3898 JUMPHERE(foundcr);
3899 notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3900 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3901 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
3902 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3903 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3904 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3905 #endif
3906 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3907 JUMPHERE(notfoundnl);
3908 JUMPHERE(quit);
3910 JUMPHERE(lastchar);
3911 JUMPHERE(firstchar);
3913 if (firstline)
3914 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3917 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
3919 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, pcre_uint8 *start_bits, BOOL firstline)
3921 DEFINE_COMPILER;
3922 struct sljit_label *start;
3923 struct sljit_jump *quit;
3924 struct sljit_jump *found = NULL;
3925 jump_list *matches = NULL;
3926 #ifndef COMPILE_PCRE8
3927 struct sljit_jump *jump;
3928 #endif
3930 if (firstline)
3932 SLJIT_ASSERT(common->first_line_end != 0);
3933 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
3934 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3937 start = LABEL();
3938 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3939 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3940 #ifdef SUPPORT_UTF
3941 if (common->utf)
3942 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3943 #endif
3945 if (!check_class_ranges(common, start_bits, (start_bits[31] & 0x80) != 0, TRUE, &matches))
3947 #ifndef COMPILE_PCRE8
3948 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);
3949 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
3950 JUMPHERE(jump);
3951 #endif
3952 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3953 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3954 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
3955 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3956 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3957 found = JUMP(SLJIT_C_NOT_ZERO);
3960 #ifdef SUPPORT_UTF
3961 if (common->utf)
3962 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3963 #endif
3964 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3965 #ifdef SUPPORT_UTF
3966 #if defined COMPILE_PCRE8
3967 if (common->utf)
3969 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
3970 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3971 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3973 #elif defined COMPILE_PCRE16
3974 if (common->utf)
3976 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
3977 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3978 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3979 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3980 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3981 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3983 #endif /* COMPILE_PCRE[8|16] */
3984 #endif /* SUPPORT_UTF */
3985 JUMPTO(SLJIT_JUMP, start);
3986 if (found != NULL)
3987 JUMPHERE(found);
3988 if (matches != NULL)
3989 set_jumps(matches, LABEL());
3990 JUMPHERE(quit);
3992 if (firstline)
3993 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
3996 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
3998 DEFINE_COMPILER;
3999 struct sljit_label *loop;
4000 struct sljit_jump *toolong;
4001 struct sljit_jump *alreadyfound;
4002 struct sljit_jump *found;
4003 struct sljit_jump *foundoc = NULL;
4004 struct sljit_jump *notfound;
4005 pcre_uint32 oc, bit;
4007 SLJIT_ASSERT(common->req_char_ptr != 0);
4008 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr);
4009 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
4010 toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);
4011 alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);
4013 if (has_firstchar)
4014 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4015 else
4016 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
4018 loop = LABEL();
4019 notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0);
4021 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
4022 oc = req_char;
4023 if (caseless)
4025 oc = TABLE_GET(req_char, common->fcc, req_char);
4026 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
4027 if (req_char > 127 && common->utf)
4028 oc = UCD_OTHERCASE(req_char);
4029 #endif
4031 if (req_char == oc)
4032 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
4033 else
4035 bit = req_char ^ oc;
4036 if (is_powerof2(bit))
4038 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
4039 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
4041 else
4043 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
4044 foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc);
4047 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4048 JUMPTO(SLJIT_JUMP, loop);
4050 JUMPHERE(found);
4051 if (foundoc)
4052 JUMPHERE(foundoc);
4053 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr, TMP1, 0);
4054 JUMPHERE(alreadyfound);
4055 JUMPHERE(toolong);
4056 return notfound;
4059 static void do_revertframes(compiler_common *common)
4061 DEFINE_COMPILER;
4062 struct sljit_jump *jump;
4063 struct sljit_label *mainloop;
4065 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4066 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
4067 GET_LOCAL_BASE(TMP3, 0, 0);
4069 /* Drop frames until we reach STACK_TOP. */
4070 mainloop = LABEL();
4071 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
4072 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0);
4073 jump = JUMP(SLJIT_C_SIG_LESS_EQUAL);
4075 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
4076 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
4077 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_sw));
4078 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
4079 JUMPTO(SLJIT_JUMP, mainloop);
4081 JUMPHERE(jump);
4082 jump = JUMP(SLJIT_C_SIG_LESS);
4083 /* End of dropping frames. */
4084 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4086 JUMPHERE(jump);
4087 OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
4088 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
4089 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
4090 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
4091 JUMPTO(SLJIT_JUMP, mainloop);
4094 static void check_wordboundary(compiler_common *common)
4096 DEFINE_COMPILER;
4097 struct sljit_jump *skipread;
4098 jump_list *skipread_list = NULL;
4099 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
4100 struct sljit_jump *jump;
4101 #endif
4103 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
4105 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4106 /* Get type of the previous char, and put it to LOCALS1. */
4107 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4108 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4109 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0);
4110 skipread = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
4111 skip_char_back(common);
4112 check_start_used_ptr(common);
4113 read_char(common);
4115 /* Testing char type. */
4116 #ifdef SUPPORT_UCP
4117 if (common->use_ucp)
4119 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
4120 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
4121 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4122 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
4123 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4124 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4125 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
4126 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4127 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4128 JUMPHERE(jump);
4129 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
4131 else
4132 #endif
4134 #ifndef COMPILE_PCRE8
4135 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4136 #elif defined SUPPORT_UTF
4137 /* Here LOCALS1 has already been zeroed. */
4138 jump = NULL;
4139 if (common->utf)
4140 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4141 #endif /* COMPILE_PCRE8 */
4142 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
4143 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
4144 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4145 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
4146 #ifndef COMPILE_PCRE8
4147 JUMPHERE(jump);
4148 #elif defined SUPPORT_UTF
4149 if (jump != NULL)
4150 JUMPHERE(jump);
4151 #endif /* COMPILE_PCRE8 */
4153 JUMPHERE(skipread);
4155 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4156 check_str_end(common, &skipread_list);
4157 peek_char(common, READ_CHAR_MAX);
4159 /* Testing char type. This is a code duplication. */
4160 #ifdef SUPPORT_UCP
4161 if (common->use_ucp)
4163 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
4164 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
4165 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4166 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
4167 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4168 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4169 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
4170 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4171 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4172 JUMPHERE(jump);
4174 else
4175 #endif
4177 #ifndef COMPILE_PCRE8
4178 /* TMP2 may be destroyed by peek_char. */
4179 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4180 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4181 #elif defined SUPPORT_UTF
4182 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4183 jump = NULL;
4184 if (common->utf)
4185 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4186 #endif
4187 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
4188 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
4189 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
4190 #ifndef COMPILE_PCRE8
4191 JUMPHERE(jump);
4192 #elif defined SUPPORT_UTF
4193 if (jump != NULL)
4194 JUMPHERE(jump);
4195 #endif /* COMPILE_PCRE8 */
4197 set_jumps(skipread_list, LABEL());
4199 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
4200 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4203 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
4205 DEFINE_COMPILER;
4206 int ranges[MAX_RANGE_SIZE];
4207 pcre_uint8 bit, cbit, all;
4208 int i, byte, length = 0;
4210 bit = bits[0] & 0x1;
4211 /* All bits will be zero or one (since bit is zero or one). */
4212 all = -bit;
4214 for (i = 0; i < 256; )
4216 byte = i >> 3;
4217 if ((i & 0x7) == 0 && bits[byte] == all)
4218 i += 8;
4219 else
4221 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
4222 if (cbit != bit)
4224 if (length >= MAX_RANGE_SIZE)
4225 return FALSE;
4226 ranges[length] = i;
4227 length++;
4228 bit = cbit;
4229 all = -cbit;
4231 i++;
4235 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
4237 if (length >= MAX_RANGE_SIZE)
4238 return FALSE;
4239 ranges[length] = 256;
4240 length++;
4243 if (length < 0 || length > 4)
4244 return FALSE;
4246 bit = bits[0] & 0x1;
4247 if (invert) bit ^= 0x1;
4249 /* No character is accepted. */
4250 if (length == 0 && bit == 0)
4251 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4253 switch(length)
4255 case 0:
4256 /* When bit != 0, all characters are accepted. */
4257 return TRUE;
4259 case 1:
4260 add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4261 return TRUE;
4263 case 2:
4264 if (ranges[0] + 1 != ranges[1])
4266 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4267 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4269 else
4270 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4271 return TRUE;
4273 case 3:
4274 if (bit != 0)
4276 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
4277 if (ranges[0] + 1 != ranges[1])
4279 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4280 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4282 else
4283 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4284 return TRUE;
4287 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
4288 if (ranges[1] + 1 != ranges[2])
4290 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
4291 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
4293 else
4294 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
4295 return TRUE;
4297 case 4:
4298 if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
4299 && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
4300 && is_powerof2(ranges[2] - ranges[0]))
4302 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
4303 if (ranges[2] + 1 != ranges[3])
4305 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
4306 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
4308 else
4309 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
4310 return TRUE;
4313 if (bit != 0)
4315 i = 0;
4316 if (ranges[0] + 1 != ranges[1])
4318 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4319 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4320 i = ranges[0];
4322 else
4323 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4325 if (ranges[2] + 1 != ranges[3])
4327 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
4328 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
4330 else
4331 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
4332 return TRUE;
4335 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4336 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
4337 if (ranges[1] + 1 != ranges[2])
4339 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
4340 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
4342 else
4343 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4344 return TRUE;
4346 default:
4347 SLJIT_ASSERT_STOP();
4348 return FALSE;
4352 static void check_anynewline(compiler_common *common)
4354 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4355 DEFINE_COMPILER;
4357 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4359 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
4360 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
4361 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4362 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
4363 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4364 #ifdef COMPILE_PCRE8
4365 if (common->utf)
4367 #endif
4368 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4369 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
4370 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
4371 #ifdef COMPILE_PCRE8
4373 #endif
4374 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4375 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4376 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4379 static void check_hspace(compiler_common *common)
4381 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4382 DEFINE_COMPILER;
4384 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4386 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
4387 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4388 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
4389 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4390 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
4391 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4392 #ifdef COMPILE_PCRE8
4393 if (common->utf)
4395 #endif
4396 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4397 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
4398 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4399 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
4400 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4401 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
4402 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
4403 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4404 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
4405 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4406 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
4407 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4408 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
4409 #ifdef COMPILE_PCRE8
4411 #endif
4412 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4413 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4415 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4418 static void check_vspace(compiler_common *common)
4420 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4421 DEFINE_COMPILER;
4423 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4425 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
4426 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
4427 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4428 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
4429 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4430 #ifdef COMPILE_PCRE8
4431 if (common->utf)
4433 #endif
4434 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4435 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
4436 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
4437 #ifdef COMPILE_PCRE8
4439 #endif
4440 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4441 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4443 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4446 #define CHAR1 STR_END
4447 #define CHAR2 STACK_TOP
4449 static void do_casefulcmp(compiler_common *common)
4451 DEFINE_COMPILER;
4452 struct sljit_jump *jump;
4453 struct sljit_label *label;
4455 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4456 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4457 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
4458 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR2, 0);
4459 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4460 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4462 label = LABEL();
4463 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
4464 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4465 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
4466 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
4467 JUMPTO(SLJIT_C_NOT_ZERO, label);
4469 JUMPHERE(jump);
4470 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4471 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
4472 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4473 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4476 #define LCC_TABLE STACK_LIMIT
4478 static void do_caselesscmp(compiler_common *common)
4480 DEFINE_COMPILER;
4481 struct sljit_jump *jump;
4482 struct sljit_label *label;
4484 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4485 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4487 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
4488 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR1, 0);
4489 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, CHAR2, 0);
4490 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
4491 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4492 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4494 label = LABEL();
4495 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
4496 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4497 #ifndef COMPILE_PCRE8
4498 jump = CMP(SLJIT_C_GREATER, CHAR1, 0, SLJIT_IMM, 255);
4499 #endif
4500 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
4501 #ifndef COMPILE_PCRE8
4502 JUMPHERE(jump);
4503 jump = CMP(SLJIT_C_GREATER, CHAR2, 0, SLJIT_IMM, 255);
4504 #endif
4505 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
4506 #ifndef COMPILE_PCRE8
4507 JUMPHERE(jump);
4508 #endif
4509 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
4510 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
4511 JUMPTO(SLJIT_C_NOT_ZERO, label);
4513 JUMPHERE(jump);
4514 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4515 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
4516 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4517 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
4518 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4521 #undef LCC_TABLE
4522 #undef CHAR1
4523 #undef CHAR2
4525 #if defined SUPPORT_UTF && defined SUPPORT_UCP
4527 static const pcre_uchar * SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
4529 /* This function would be ineffective to do in JIT level. */
4530 pcre_uint32 c1, c2;
4531 const pcre_uchar *src2 = args->uchar_ptr;
4532 const pcre_uchar *end2 = args->end;
4533 const ucd_record *ur;
4534 const pcre_uint32 *pp;
4536 while (src1 < end1)
4538 if (src2 >= end2)
4539 return (pcre_uchar*)1;
4540 GETCHARINC(c1, src1);
4541 GETCHARINC(c2, src2);
4542 ur = GET_UCD(c2);
4543 if (c1 != c2 && c1 != c2 + ur->other_case)
4545 pp = PRIV(ucd_caseless_sets) + ur->caseset;
4546 for (;;)
4548 if (c1 < *pp) return NULL;
4549 if (c1 == *pp++) break;
4553 return src2;
4556 #endif /* SUPPORT_UTF && SUPPORT_UCP */
4558 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
4559 compare_context* context, jump_list **backtracks)
4561 DEFINE_COMPILER;
4562 unsigned int othercasebit = 0;
4563 pcre_uchar *othercasechar = NULL;
4564 #ifdef SUPPORT_UTF
4565 int utflength;
4566 #endif
4568 if (caseless && char_has_othercase(common, cc))
4570 othercasebit = char_get_othercase_bit(common, cc);
4571 SLJIT_ASSERT(othercasebit);
4572 /* Extracting bit difference info. */
4573 #if defined COMPILE_PCRE8
4574 othercasechar = cc + (othercasebit >> 8);
4575 othercasebit &= 0xff;
4576 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4577 /* Note that this code only handles characters in the BMP. If there
4578 ever are characters outside the BMP whose othercase differs in only one
4579 bit from itself (there currently are none), this code will need to be
4580 revised for COMPILE_PCRE32. */
4581 othercasechar = cc + (othercasebit >> 9);
4582 if ((othercasebit & 0x100) != 0)
4583 othercasebit = (othercasebit & 0xff) << 8;
4584 else
4585 othercasebit &= 0xff;
4586 #endif /* COMPILE_PCRE[8|16|32] */
4589 if (context->sourcereg == -1)
4591 #if defined COMPILE_PCRE8
4592 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4593 if (context->length >= 4)
4594 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4595 else if (context->length >= 2)
4596 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4597 else
4598 #endif
4599 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4600 #elif defined COMPILE_PCRE16
4601 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4602 if (context->length >= 4)
4603 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4604 else
4605 #endif
4606 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4607 #elif defined COMPILE_PCRE32
4608 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4609 #endif /* COMPILE_PCRE[8|16|32] */
4610 context->sourcereg = TMP2;
4613 #ifdef SUPPORT_UTF
4614 utflength = 1;
4615 if (common->utf && HAS_EXTRALEN(*cc))
4616 utflength += GET_EXTRALEN(*cc);
4620 #endif
4622 context->length -= IN_UCHARS(1);
4623 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
4625 /* Unaligned read is supported. */
4626 if (othercasebit != 0 && othercasechar == cc)
4628 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
4629 context->oc.asuchars[context->ucharptr] = othercasebit;
4631 else
4633 context->c.asuchars[context->ucharptr] = *cc;
4634 context->oc.asuchars[context->ucharptr] = 0;
4636 context->ucharptr++;
4638 #if defined COMPILE_PCRE8
4639 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
4640 #else
4641 if (context->ucharptr >= 2 || context->length == 0)
4642 #endif
4644 if (context->length >= 4)
4645 OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4646 else if (context->length >= 2)
4647 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4648 #if defined COMPILE_PCRE8
4649 else if (context->length >= 1)
4650 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4651 #endif /* COMPILE_PCRE8 */
4652 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
4654 switch(context->ucharptr)
4656 case 4 / sizeof(pcre_uchar):
4657 if (context->oc.asint != 0)
4658 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
4659 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
4660 break;
4662 case 2 / sizeof(pcre_uchar):
4663 if (context->oc.asushort != 0)
4664 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
4665 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
4666 break;
4668 #ifdef COMPILE_PCRE8
4669 case 1:
4670 if (context->oc.asbyte != 0)
4671 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
4672 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
4673 break;
4674 #endif
4676 default:
4677 SLJIT_ASSERT_STOP();
4678 break;
4680 context->ucharptr = 0;
4683 #else
4685 /* Unaligned read is unsupported or in 32 bit mode. */
4686 if (context->length >= 1)
4687 OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4689 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
4691 if (othercasebit != 0 && othercasechar == cc)
4693 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
4694 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
4696 else
4697 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
4699 #endif
4701 cc++;
4702 #ifdef SUPPORT_UTF
4703 utflength--;
4705 while (utflength > 0);
4706 #endif
4708 return cc;
4711 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4713 #define SET_TYPE_OFFSET(value) \
4714 if ((value) != typeoffset) \
4716 if ((value) < typeoffset) \
4717 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
4718 else \
4719 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
4721 typeoffset = (value);
4723 #define SET_CHAR_OFFSET(value) \
4724 if ((value) != charoffset) \
4726 if ((value) < charoffset) \
4727 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
4728 else \
4729 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
4731 charoffset = (value);
4733 static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
4735 DEFINE_COMPILER;
4736 jump_list *found = NULL;
4737 jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
4738 sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX;
4739 struct sljit_jump *jump = NULL;
4740 pcre_uchar *ccbegin;
4741 int compares, invertcmp, numberofcmps;
4742 #if defined SUPPORT_UTF && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
4743 BOOL utf = common->utf;
4744 #endif
4746 #ifdef SUPPORT_UCP
4747 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
4748 BOOL charsaved = FALSE;
4749 int typereg = TMP1, scriptreg = TMP1;
4750 const pcre_uint32 *other_cases;
4751 sljit_uw typeoffset;
4752 #endif
4754 /* Scanning the necessary info. */
4755 cc++;
4756 ccbegin = cc;
4757 compares = 0;
4758 if (cc[-1] & XCL_MAP)
4760 min = 0;
4761 cc += 32 / sizeof(pcre_uchar);
4764 while (*cc != XCL_END)
4766 compares++;
4767 if (*cc == XCL_SINGLE)
4769 cc ++;
4770 GETCHARINCTEST(c, cc);
4771 if (c > max) max = c;
4772 if (c < min) min = c;
4773 #ifdef SUPPORT_UCP
4774 needschar = TRUE;
4775 #endif
4777 else if (*cc == XCL_RANGE)
4779 cc ++;
4780 GETCHARINCTEST(c, cc);
4781 if (c < min) min = c;
4782 GETCHARINCTEST(c, cc);
4783 if (c > max) max = c;
4784 #ifdef SUPPORT_UCP
4785 needschar = TRUE;
4786 #endif
4788 #ifdef SUPPORT_UCP
4789 else
4791 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
4792 cc++;
4793 if (*cc == PT_CLIST)
4795 other_cases = PRIV(ucd_caseless_sets) + cc[1];
4796 while (*other_cases != NOTACHAR)
4798 if (*other_cases > max) max = *other_cases;
4799 if (*other_cases < min) min = *other_cases;
4800 other_cases++;
4803 else
4805 max = READ_CHAR_MAX;
4806 min = 0;
4809 switch(*cc)
4811 case PT_ANY:
4812 break;
4814 case PT_LAMP:
4815 case PT_GC:
4816 case PT_PC:
4817 case PT_ALNUM:
4818 needstype = TRUE;
4819 break;
4821 case PT_SC:
4822 needsscript = TRUE;
4823 break;
4825 case PT_SPACE:
4826 case PT_PXSPACE:
4827 case PT_WORD:
4828 case PT_PXGRAPH:
4829 case PT_PXPRINT:
4830 case PT_PXPUNCT:
4831 needstype = TRUE;
4832 needschar = TRUE;
4833 break;
4835 case PT_CLIST:
4836 case PT_UCNC:
4837 needschar = TRUE;
4838 break;
4840 default:
4841 SLJIT_ASSERT_STOP();
4842 break;
4844 cc += 2;
4846 #endif
4849 /* We are not necessary in utf mode even in 8 bit mode. */
4850 cc = ccbegin;
4851 detect_partial_match(common, backtracks);
4852 read_char_range(common, min, max, (cc[-1] & XCL_NOT) != 0);
4854 if ((cc[-1] & XCL_HASPROP) == 0)
4856 if ((cc[-1] & XCL_MAP) != 0)
4858 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4859 if (!check_class_ranges(common, (const pcre_uint8 *)cc, (((const pcre_uint8 *)cc)[31] & 0x80) != 0, TRUE, &found))
4861 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4862 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4863 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4864 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4865 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4866 add_jump(compiler, &found, JUMP(SLJIT_C_NOT_ZERO));
4869 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4870 JUMPHERE(jump);
4872 cc += 32 / sizeof(pcre_uchar);
4874 else
4876 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min);
4877 add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, max - min));
4880 else if ((cc[-1] & XCL_MAP) != 0)
4882 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4883 #ifdef SUPPORT_UCP
4884 charsaved = TRUE;
4885 #endif
4886 if (!check_class_ranges(common, (const pcre_uint8 *)cc, FALSE, TRUE, list))
4888 #ifdef COMPILE_PCRE8
4889 SLJIT_ASSERT(common->utf);
4890 #endif
4891 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4893 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4894 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4895 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4896 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4897 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4898 add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
4900 JUMPHERE(jump);
4903 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4904 cc += 32 / sizeof(pcre_uchar);
4907 #ifdef SUPPORT_UCP
4908 /* Simple register allocation. TMP1 is preferred if possible. */
4909 if (needstype || needsscript)
4911 if (needschar && !charsaved)
4912 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4913 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4914 if (needschar)
4916 if (needstype)
4918 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
4919 typereg = RETURN_ADDR;
4922 if (needsscript)
4923 scriptreg = TMP3;
4924 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4926 else if (needstype && needsscript)
4927 scriptreg = TMP3;
4928 /* In all other cases only one of them was specified, and that can goes to TMP1. */
4930 if (needsscript)
4932 if (scriptreg == TMP1)
4934 OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4935 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
4937 else
4939 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
4940 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4941 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
4945 #endif
4947 /* Generating code. */
4948 charoffset = 0;
4949 numberofcmps = 0;
4950 #ifdef SUPPORT_UCP
4951 typeoffset = 0;
4952 #endif
4954 while (*cc != XCL_END)
4956 compares--;
4957 invertcmp = (compares == 0 && list != backtracks);
4958 jump = NULL;
4960 if (*cc == XCL_SINGLE)
4962 cc ++;
4963 GETCHARINCTEST(c, cc);
4965 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4967 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4968 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_EQUAL);
4969 numberofcmps++;
4971 else if (numberofcmps > 0)
4973 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4974 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4975 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4976 numberofcmps = 0;
4978 else
4980 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4981 numberofcmps = 0;
4984 else if (*cc == XCL_RANGE)
4986 cc ++;
4987 GETCHARINCTEST(c, cc);
4988 SET_CHAR_OFFSET(c);
4989 GETCHARINCTEST(c, cc);
4991 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4993 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4994 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
4995 numberofcmps++;
4997 else if (numberofcmps > 0)
4999 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5000 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
5001 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
5002 numberofcmps = 0;
5004 else
5006 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5007 numberofcmps = 0;
5010 #ifdef SUPPORT_UCP
5011 else
5013 if (*cc == XCL_NOTPROP)
5014 invertcmp ^= 0x1;
5015 cc++;
5016 switch(*cc)
5018 case PT_ANY:
5019 if (list != backtracks)
5021 if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
5022 continue;
5024 else if (cc[-1] == XCL_NOTPROP)
5025 continue;
5026 jump = JUMP(SLJIT_JUMP);
5027 break;
5029 case PT_LAMP:
5030 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
5031 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
5032 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
5033 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5034 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
5035 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5036 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
5037 break;
5039 case PT_GC:
5040 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
5041 SET_TYPE_OFFSET(c);
5042 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
5043 break;
5045 case PT_PC:
5046 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
5047 break;
5049 case PT_SC:
5050 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
5051 break;
5053 case PT_SPACE:
5054 case PT_PXSPACE:
5055 SET_CHAR_OFFSET(9);
5056 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
5057 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
5059 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
5060 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5062 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
5063 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5065 SET_TYPE_OFFSET(ucp_Zl);
5066 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
5067 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
5068 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
5069 break;
5071 case PT_WORD:
5072 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset));
5073 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
5074 /* Fall through. */
5076 case PT_ALNUM:
5077 SET_TYPE_OFFSET(ucp_Ll);
5078 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
5079 OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, (*cc == PT_ALNUM) ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
5080 SET_TYPE_OFFSET(ucp_Nd);
5081 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
5082 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
5083 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
5084 break;
5086 case PT_CLIST:
5087 other_cases = PRIV(ucd_caseless_sets) + cc[1];
5089 /* At least three characters are required.
5090 Otherwise this case would be handled by the normal code path. */
5091 SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
5092 SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
5094 /* Optimizing character pairs, if their difference is power of 2. */
5095 if (is_powerof2(other_cases[1] ^ other_cases[0]))
5097 if (charoffset == 0)
5098 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5099 else
5101 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
5102 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5104 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
5105 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
5106 other_cases += 2;
5108 else if (is_powerof2(other_cases[2] ^ other_cases[1]))
5110 if (charoffset == 0)
5111 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
5112 else
5114 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
5115 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5117 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
5118 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
5120 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
5121 OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5123 other_cases += 3;
5125 else
5127 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
5128 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
5131 while (*other_cases != NOTACHAR)
5133 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
5134 OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5136 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
5137 break;
5139 case PT_UCNC:
5140 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
5141 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
5142 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
5143 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5144 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
5145 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5147 SET_CHAR_OFFSET(0xa0);
5148 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
5149 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
5150 SET_CHAR_OFFSET(0);
5151 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
5152 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_GREATER_EQUAL);
5153 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
5154 break;
5156 case PT_PXGRAPH:
5157 /* C and Z groups are the farthest two groups. */
5158 SET_TYPE_OFFSET(ucp_Ll);
5159 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
5160 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER);
5162 jump = CMP(SLJIT_C_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
5164 /* In case of ucp_Cf, we overwrite the result. */
5165 SET_CHAR_OFFSET(0x2066);
5166 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
5167 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
5169 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
5170 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5172 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
5173 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5175 JUMPHERE(jump);
5176 jump = CMP(SLJIT_C_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
5177 break;
5179 case PT_PXPRINT:
5180 /* C and Z groups are the farthest two groups. */
5181 SET_TYPE_OFFSET(ucp_Ll);
5182 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
5183 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER);
5185 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
5186 OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_C_NOT_EQUAL);
5188 jump = CMP(SLJIT_C_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
5190 /* In case of ucp_Cf, we overwrite the result. */
5191 SET_CHAR_OFFSET(0x2066);
5192 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
5193 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
5195 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
5196 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5198 JUMPHERE(jump);
5199 jump = CMP(SLJIT_C_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
5200 break;
5202 case PT_PXPUNCT:
5203 SET_TYPE_OFFSET(ucp_Sc);
5204 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
5205 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
5207 SET_CHAR_OFFSET(0);
5208 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xff);
5209 OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
5211 SET_TYPE_OFFSET(ucp_Pc);
5212 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
5213 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
5214 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
5215 break;
5217 cc += 2;
5219 #endif
5221 if (jump != NULL)
5222 add_jump(compiler, compares > 0 ? list : backtracks, jump);
5225 if (found != NULL)
5226 set_jumps(found, LABEL());
5229 #undef SET_TYPE_OFFSET
5230 #undef SET_CHAR_OFFSET
5232 #endif
5234 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
5236 DEFINE_COMPILER;
5237 int length;
5238 unsigned int c, oc, bit;
5239 compare_context context;
5240 struct sljit_jump *jump[4];
5241 jump_list *end_list;
5242 #ifdef SUPPORT_UTF
5243 struct sljit_label *label;
5244 #ifdef SUPPORT_UCP
5245 pcre_uchar propdata[5];
5246 #endif
5247 #endif /* SUPPORT_UTF */
5249 switch(type)
5251 case OP_SOD:
5252 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5253 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5254 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
5255 return cc;
5257 case OP_SOM:
5258 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5259 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
5260 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
5261 return cc;
5263 case OP_NOT_WORD_BOUNDARY:
5264 case OP_WORD_BOUNDARY:
5265 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
5266 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
5267 return cc;
5269 case OP_NOT_DIGIT:
5270 case OP_DIGIT:
5271 /* Digits are usually 0-9, so it is worth to optimize them. */
5272 detect_partial_match(common, backtracks);
5273 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5274 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_digit, FALSE))
5275 read_char7_type(common, type == OP_NOT_DIGIT);
5276 else
5277 #endif
5278 read_char8_type(common, type == OP_NOT_DIGIT);
5279 /* Flip the starting bit in the negative case. */
5280 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
5281 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
5282 return cc;
5284 case OP_NOT_WHITESPACE:
5285 case OP_WHITESPACE:
5286 detect_partial_match(common, backtracks);
5287 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5288 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_space, FALSE))
5289 read_char7_type(common, type == OP_NOT_WHITESPACE);
5290 else
5291 #endif
5292 read_char8_type(common, type == OP_NOT_WHITESPACE);
5293 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
5294 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
5295 return cc;
5297 case OP_NOT_WORDCHAR:
5298 case OP_WORDCHAR:
5299 detect_partial_match(common, backtracks);
5300 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5301 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_word, FALSE))
5302 read_char7_type(common, type == OP_NOT_WORDCHAR);
5303 else
5304 #endif
5305 read_char8_type(common, type == OP_NOT_WORDCHAR);
5306 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
5307 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
5308 return cc;
5310 case OP_ANY:
5311 detect_partial_match(common, backtracks);
5312 read_char_range(common, common->nlmin, common->nlmax, TRUE);
5313 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5315 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
5316 end_list = NULL;
5317 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
5318 add_jump(compiler, &end_list, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5319 else
5320 check_str_end(common, &end_list);
5322 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5323 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
5324 set_jumps(end_list, LABEL());
5325 JUMPHERE(jump[0]);
5327 else
5328 check_newlinechar(common, common->nltype, backtracks, TRUE);
5329 return cc;
5331 case OP_ALLANY:
5332 detect_partial_match(common, backtracks);
5333 #ifdef SUPPORT_UTF
5334 if (common->utf)
5336 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5337 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5338 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
5339 #if defined COMPILE_PCRE8
5340 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
5341 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
5342 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5343 #elif defined COMPILE_PCRE16
5344 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
5345 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
5346 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5347 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
5348 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5349 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5350 #endif
5351 JUMPHERE(jump[0]);
5352 #endif /* COMPILE_PCRE[8|16] */
5353 return cc;
5355 #endif
5356 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5357 return cc;
5359 case OP_ANYBYTE:
5360 detect_partial_match(common, backtracks);
5361 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5362 return cc;
5364 #ifdef SUPPORT_UTF
5365 #ifdef SUPPORT_UCP
5366 case OP_NOTPROP:
5367 case OP_PROP:
5368 propdata[0] = XCL_HASPROP;
5369 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
5370 propdata[2] = cc[0];
5371 propdata[3] = cc[1];
5372 propdata[4] = XCL_END;
5373 compile_xclass_matchingpath(common, propdata, backtracks);
5374 return cc + 2;
5375 #endif
5376 #endif
5378 case OP_ANYNL:
5379 detect_partial_match(common, backtracks);
5380 read_char_range(common, common->bsr_nlmin, common->bsr_nlmax, FALSE);
5381 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
5382 /* We don't need to handle soft partial matching case. */
5383 end_list = NULL;
5384 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
5385 add_jump(compiler, &end_list, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5386 else
5387 check_str_end(common, &end_list);
5388 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5389 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
5390 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5391 jump[2] = JUMP(SLJIT_JUMP);
5392 JUMPHERE(jump[0]);
5393 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
5394 set_jumps(end_list, LABEL());
5395 JUMPHERE(jump[1]);
5396 JUMPHERE(jump[2]);
5397 return cc;
5399 case OP_NOT_HSPACE:
5400 case OP_HSPACE:
5401 detect_partial_match(common, backtracks);
5402 read_char_range(common, 0x9, 0x3000, type == OP_NOT_HSPACE);
5403 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
5404 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
5405 return cc;
5407 case OP_NOT_VSPACE:
5408 case OP_VSPACE:
5409 detect_partial_match(common, backtracks);
5410 read_char_range(common, 0xa, 0x2029, type == OP_NOT_VSPACE);
5411 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
5412 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
5413 return cc;
5415 #ifdef SUPPORT_UCP
5416 case OP_EXTUNI:
5417 detect_partial_match(common, backtracks);
5418 read_char(common);
5419 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
5420 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
5421 /* Optimize register allocation: use a real register. */
5422 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
5423 OP1(SLJIT_MOV_UB, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5425 label = LABEL();
5426 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5427 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
5428 read_char(common);
5429 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
5430 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
5431 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5433 OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2);
5434 OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable));
5435 OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
5436 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5437 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5438 JUMPTO(SLJIT_C_NOT_ZERO, label);
5440 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
5441 JUMPHERE(jump[0]);
5442 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
5444 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
5446 jump[0] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
5447 /* Since we successfully read a char above, partial matching must occure. */
5448 check_partial(common, TRUE);
5449 JUMPHERE(jump[0]);
5451 return cc;
5452 #endif
5454 case OP_EODN:
5455 /* Requires rather complex checks. */
5456 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5457 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5459 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5460 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5461 if (common->mode == JIT_COMPILE)
5462 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
5463 else
5465 jump[1] = CMP(SLJIT_C_EQUAL, TMP2, 0, STR_END, 0);
5466 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
5467 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS);
5468 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
5469 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_NOT_EQUAL);
5470 add_jump(compiler, backtracks, JUMP(SLJIT_C_NOT_EQUAL));
5471 check_partial(common, TRUE);
5472 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5473 JUMPHERE(jump[1]);
5475 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5476 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5477 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5479 else if (common->nltype == NLTYPE_FIXED)
5481 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5482 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5483 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
5484 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
5486 else
5488 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5489 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
5490 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5491 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
5492 jump[2] = JUMP(SLJIT_C_GREATER);
5493 add_jump(compiler, backtracks, JUMP(SLJIT_C_LESS));
5494 /* Equal. */
5495 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5496 jump[3] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
5497 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5499 JUMPHERE(jump[1]);
5500 if (common->nltype == NLTYPE_ANYCRLF)
5502 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5503 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, STR_END, 0));
5504 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
5506 else
5508 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);
5509 read_char_range(common, common->nlmin, common->nlmax, TRUE);
5510 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
5511 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
5512 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
5513 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
5515 JUMPHERE(jump[2]);
5516 JUMPHERE(jump[3]);
5518 JUMPHERE(jump[0]);
5519 check_partial(common, FALSE);
5520 return cc;
5522 case OP_EOD:
5523 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
5524 check_partial(common, FALSE);
5525 return cc;
5527 case OP_CIRC:
5528 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5529 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5530 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0));
5531 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
5532 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5533 return cc;
5535 case OP_CIRCM:
5536 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5537 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5538 jump[1] = CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0);
5539 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
5540 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5541 jump[0] = JUMP(SLJIT_JUMP);
5542 JUMPHERE(jump[1]);
5544 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5545 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5547 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5548 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, TMP1, 0));
5549 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5550 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5551 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5552 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5554 else
5556 skip_char_back(common);
5557 read_char_range(common, common->nlmin, common->nlmax, TRUE);
5558 check_newlinechar(common, common->nltype, backtracks, FALSE);
5560 JUMPHERE(jump[0]);
5561 return cc;
5563 case OP_DOLL:
5564 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5565 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
5566 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5568 if (!common->endonly)
5569 compile_char1_matchingpath(common, OP_EODN, cc, backtracks);
5570 else
5572 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
5573 check_partial(common, FALSE);
5575 return cc;
5577 case OP_DOLLM:
5578 jump[1] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
5579 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5580 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
5581 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5582 check_partial(common, FALSE);
5583 jump[0] = JUMP(SLJIT_JUMP);
5584 JUMPHERE(jump[1]);
5586 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5588 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5589 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5590 if (common->mode == JIT_COMPILE)
5591 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0));
5592 else
5594 jump[1] = CMP(SLJIT_C_LESS_EQUAL, TMP2, 0, STR_END, 0);
5595 /* STR_PTR = STR_END - IN_UCHARS(1) */
5596 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5597 check_partial(common, TRUE);
5598 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5599 JUMPHERE(jump[1]);
5602 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5603 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5604 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5606 else
5608 peek_char(common, common->nlmax);
5609 check_newlinechar(common, common->nltype, backtracks, FALSE);
5611 JUMPHERE(jump[0]);
5612 return cc;
5614 case OP_CHAR:
5615 case OP_CHARI:
5616 length = 1;
5617 #ifdef SUPPORT_UTF
5618 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
5619 #endif
5620 if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
5622 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
5623 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
5625 context.length = IN_UCHARS(length);
5626 context.sourcereg = -1;
5627 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5628 context.ucharptr = 0;
5629 #endif
5630 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
5633 detect_partial_match(common, backtracks);
5634 #ifdef SUPPORT_UTF
5635 if (common->utf)
5637 GETCHAR(c, cc);
5639 else
5640 #endif
5641 c = *cc;
5643 if (type == OP_CHAR || !char_has_othercase(common, cc))
5645 read_char_range(common, c, c, FALSE);
5646 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
5647 return cc + length;
5649 oc = char_othercase(common, c);
5650 read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, FALSE);
5651 bit = c ^ oc;
5652 if (is_powerof2(bit))
5654 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
5655 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
5656 return cc + length;
5658 jump[0] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c);
5659 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
5660 JUMPHERE(jump[0]);
5661 return cc + length;
5663 case OP_NOT:
5664 case OP_NOTI:
5665 detect_partial_match(common, backtracks);
5666 length = 1;
5667 #ifdef SUPPORT_UTF
5668 if (common->utf)
5670 #ifdef COMPILE_PCRE8
5671 c = *cc;
5672 if (c < 128)
5674 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5675 if (type == OP_NOT || !char_has_othercase(common, cc))
5676 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
5677 else
5679 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
5680 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
5681 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
5683 /* Skip the variable-length character. */
5684 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5685 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
5686 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
5687 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5688 JUMPHERE(jump[0]);
5689 return cc + 1;
5691 else
5692 #endif /* COMPILE_PCRE8 */
5694 GETCHARLEN(c, cc, length);
5697 else
5698 #endif /* SUPPORT_UTF */
5699 c = *cc;
5701 if (type == OP_NOT || !char_has_othercase(common, cc))
5703 read_char_range(common, c, c, TRUE);
5704 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
5706 else
5708 oc = char_othercase(common, c);
5709 read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, TRUE);
5710 bit = c ^ oc;
5711 if (is_powerof2(bit))
5713 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
5714 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
5716 else
5718 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
5719 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, oc));
5722 return cc + length;
5724 case OP_CLASS:
5725 case OP_NCLASS:
5726 detect_partial_match(common, backtracks);
5728 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5729 bit = (common->utf && is_char7_bitset((const pcre_uint8 *)cc, type == OP_NCLASS)) ? 127 : 255;
5730 read_char_range(common, 0, bit, type == OP_NCLASS);
5731 #else
5732 read_char_range(common, 0, 255, type == OP_NCLASS);
5733 #endif
5735 if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, FALSE, backtracks))
5736 return cc + 32 / sizeof(pcre_uchar);
5738 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5739 jump[0] = NULL;
5740 if (common->utf)
5742 jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, bit);
5743 if (type == OP_CLASS)
5745 add_jump(compiler, backtracks, jump[0]);
5746 jump[0] = NULL;
5749 #elif !defined COMPILE_PCRE8
5750 jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
5751 if (type == OP_CLASS)
5753 add_jump(compiler, backtracks, jump[0]);
5754 jump[0] = NULL;
5756 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
5758 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
5759 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
5760 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
5761 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5762 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5763 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
5765 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
5766 if (jump[0] != NULL)
5767 JUMPHERE(jump[0]);
5768 #endif
5770 return cc + 32 / sizeof(pcre_uchar);
5772 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5773 case OP_XCLASS:
5774 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
5775 return cc + GET(cc, 0) - 1;
5776 #endif
5778 case OP_REVERSE:
5779 length = GET(cc, 0);
5780 if (length == 0)
5781 return cc + LINK_SIZE;
5782 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5783 #ifdef SUPPORT_UTF
5784 if (common->utf)
5786 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5787 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
5788 label = LABEL();
5789 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
5790 skip_char_back(common);
5791 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5792 JUMPTO(SLJIT_C_NOT_ZERO, label);
5794 else
5795 #endif
5797 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5798 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
5799 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0));
5801 check_start_used_ptr(common);
5802 return cc + LINK_SIZE;
5804 SLJIT_ASSERT_STOP();
5805 return cc;
5808 static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
5810 /* This function consumes at least one input character. */
5811 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
5812 DEFINE_COMPILER;
5813 pcre_uchar *ccbegin = cc;
5814 compare_context context;
5815 int size;
5817 context.length = 0;
5820 if (cc >= ccend)
5821 break;
5823 if (*cc == OP_CHAR)
5825 size = 1;
5826 #ifdef SUPPORT_UTF
5827 if (common->utf && HAS_EXTRALEN(cc[1]))
5828 size += GET_EXTRALEN(cc[1]);
5829 #endif
5831 else if (*cc == OP_CHARI)
5833 size = 1;
5834 #ifdef SUPPORT_UTF
5835 if (common->utf)
5837 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5838 size = 0;
5839 else if (HAS_EXTRALEN(cc[1]))
5840 size += GET_EXTRALEN(cc[1]);
5842 else
5843 #endif
5844 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5845 size = 0;
5847 else
5848 size = 0;
5850 cc += 1 + size;
5851 context.length += IN_UCHARS(size);
5853 while (size > 0 && context.length <= 128);
5855 cc = ccbegin;
5856 if (context.length > 0)
5858 /* We have a fixed-length byte sequence. */
5859 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
5860 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
5862 context.sourcereg = -1;
5863 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5864 context.ucharptr = 0;
5865 #endif
5866 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
5867 return cc;
5870 /* A non-fixed length character will be checked if length == 0. */
5871 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks);
5874 /* Forward definitions. */
5875 static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
5876 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
5878 #define PUSH_BACKTRACK(size, ccstart, error) \
5879 do \
5881 backtrack = sljit_alloc_memory(compiler, (size)); \
5882 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5883 return error; \
5884 memset(backtrack, 0, size); \
5885 backtrack->prev = parent->top; \
5886 backtrack->cc = (ccstart); \
5887 parent->top = backtrack; \
5889 while (0)
5891 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
5892 do \
5894 backtrack = sljit_alloc_memory(compiler, (size)); \
5895 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5896 return; \
5897 memset(backtrack, 0, size); \
5898 backtrack->prev = parent->top; \
5899 backtrack->cc = (ccstart); \
5900 parent->top = backtrack; \
5902 while (0)
5904 #define BACKTRACK_AS(type) ((type *)backtrack)
5906 static void compile_dnref_search(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
5908 /* The OVECTOR offset goes to TMP2. */
5909 DEFINE_COMPILER;
5910 int count = GET2(cc, 1 + IMM2_SIZE);
5911 pcre_uchar *slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
5912 unsigned int offset;
5913 jump_list *found = NULL;
5915 SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
5917 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
5919 count--;
5920 while (count-- > 0)
5922 offset = GET2(slot, 0) << 1;
5923 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
5924 add_jump(compiler, &found, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0));
5925 slot += common->name_entry_size;
5928 offset = GET2(slot, 0) << 1;
5929 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
5930 if (backtracks != NULL && !common->jscript_compat)
5931 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0));
5933 set_jumps(found, LABEL());
5936 static void compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
5938 DEFINE_COMPILER;
5939 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
5940 int offset = 0;
5941 struct sljit_jump *jump = NULL;
5942 struct sljit_jump *partial;
5943 struct sljit_jump *nopartial;
5945 if (ref)
5947 offset = GET2(cc, 1) << 1;
5948 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5949 /* OVECTOR(1) contains the "string begin - 1" constant. */
5950 if (withchecks && !common->jscript_compat)
5951 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5953 else
5954 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5956 #if defined SUPPORT_UTF && defined SUPPORT_UCP
5957 if (common->utf && *cc == OP_REFI)
5959 SLJIT_ASSERT(TMP1 == SLJIT_SCRATCH_REG1 && STACK_TOP == SLJIT_SCRATCH_REG2 && TMP2 == SLJIT_SCRATCH_REG3);
5960 if (ref)
5961 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5962 else
5963 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5965 if (withchecks)
5966 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);
5968 /* Needed to save important temporary registers. */
5969 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
5970 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
5971 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
5972 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
5973 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
5974 if (common->mode == JIT_COMPILE)
5975 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
5976 else
5978 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
5979 nopartial = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
5980 check_partial(common, FALSE);
5981 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5982 JUMPHERE(nopartial);
5984 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
5986 else
5987 #endif /* SUPPORT_UTF && SUPPORT_UCP */
5989 if (ref)
5990 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
5991 else
5992 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
5994 if (withchecks)
5995 jump = JUMP(SLJIT_C_ZERO);
5997 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5998 partial = CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0);
5999 if (common->mode == JIT_COMPILE)
6000 add_jump(compiler, backtracks, partial);
6002 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
6003 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6005 if (common->mode != JIT_COMPILE)
6007 nopartial = JUMP(SLJIT_JUMP);
6008 JUMPHERE(partial);
6009 /* TMP2 -= STR_END - STR_PTR */
6010 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
6011 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
6012 partial = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0);
6013 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
6014 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
6015 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6016 JUMPHERE(partial);
6017 check_partial(common, FALSE);
6018 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6019 JUMPHERE(nopartial);
6023 if (jump != NULL)
6025 if (emptyfail)
6026 add_jump(compiler, backtracks, jump);
6027 else
6028 JUMPHERE(jump);
6032 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6034 DEFINE_COMPILER;
6035 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
6036 backtrack_common *backtrack;
6037 pcre_uchar type;
6038 int offset = 0;
6039 struct sljit_label *label;
6040 struct sljit_jump *zerolength;
6041 struct sljit_jump *jump = NULL;
6042 pcre_uchar *ccbegin = cc;
6043 int min = 0, max = 0;
6044 BOOL minimize;
6046 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
6048 if (ref)
6049 offset = GET2(cc, 1) << 1;
6050 else
6051 cc += IMM2_SIZE;
6052 type = cc[1 + IMM2_SIZE];
6054 SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
6055 minimize = (type & 0x1) != 0;
6056 switch(type)
6058 case OP_CRSTAR:
6059 case OP_CRMINSTAR:
6060 min = 0;
6061 max = 0;
6062 cc += 1 + IMM2_SIZE + 1;
6063 break;
6064 case OP_CRPLUS:
6065 case OP_CRMINPLUS:
6066 min = 1;
6067 max = 0;
6068 cc += 1 + IMM2_SIZE + 1;
6069 break;
6070 case OP_CRQUERY:
6071 case OP_CRMINQUERY:
6072 min = 0;
6073 max = 1;
6074 cc += 1 + IMM2_SIZE + 1;
6075 break;
6076 case OP_CRRANGE:
6077 case OP_CRMINRANGE:
6078 min = GET2(cc, 1 + IMM2_SIZE + 1);
6079 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
6080 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
6081 break;
6082 default:
6083 SLJIT_ASSERT_STOP();
6084 break;
6087 if (!minimize)
6089 if (min == 0)
6091 allocate_stack(common, 2);
6092 if (ref)
6093 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
6094 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6095 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
6096 /* Temporary release of STR_PTR. */
6097 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6098 /* Handles both invalid and empty cases. Since the minimum repeat,
6099 is zero the invalid case is basically the same as an empty case. */
6100 if (ref)
6101 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6102 else
6104 compile_dnref_search(common, ccbegin, NULL);
6105 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6106 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, TMP2, 0);
6107 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6109 /* Restore if not zero length. */
6110 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6112 else
6114 allocate_stack(common, 1);
6115 if (ref)
6116 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
6117 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6118 if (ref)
6120 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
6121 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6123 else
6125 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
6126 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6127 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, TMP2, 0);
6128 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6132 if (min > 1 || max > 1)
6133 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
6135 label = LABEL();
6136 if (!ref)
6137 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
6138 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
6140 if (min > 1 || max > 1)
6142 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
6143 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6144 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
6145 if (min > 1)
6146 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label);
6147 if (max > 1)
6149 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
6150 allocate_stack(common, 1);
6151 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6152 JUMPTO(SLJIT_JUMP, label);
6153 JUMPHERE(jump);
6157 if (max == 0)
6159 /* Includes min > 1 case as well. */
6160 allocate_stack(common, 1);
6161 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6162 JUMPTO(SLJIT_JUMP, label);
6165 JUMPHERE(zerolength);
6166 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6168 count_match(common);
6169 return cc;
6172 allocate_stack(common, ref ? 2 : 3);
6173 if (ref)
6174 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
6175 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6176 if (type != OP_CRMINSTAR)
6177 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
6179 if (min == 0)
6181 /* Handles both invalid and empty cases. Since the minimum repeat,
6182 is zero the invalid case is basically the same as an empty case. */
6183 if (ref)
6184 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6185 else
6187 compile_dnref_search(common, ccbegin, NULL);
6188 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6189 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
6190 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6192 /* Length is non-zero, we can match real repeats. */
6193 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6194 jump = JUMP(SLJIT_JUMP);
6196 else
6198 if (ref)
6200 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
6201 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6203 else
6205 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
6206 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6207 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
6208 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6212 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6213 if (max > 0)
6214 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
6216 if (!ref)
6217 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
6218 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
6219 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6221 if (min > 1)
6223 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6224 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6225 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6226 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(iterator_backtrack)->matchingpath);
6228 else if (max > 0)
6229 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
6231 if (jump != NULL)
6232 JUMPHERE(jump);
6233 JUMPHERE(zerolength);
6235 count_match(common);
6236 return cc;
6239 static SLJIT_INLINE pcre_uchar *compile_recurse_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6241 DEFINE_COMPILER;
6242 backtrack_common *backtrack;
6243 recurse_entry *entry = common->entries;
6244 recurse_entry *prev = NULL;
6245 sljit_sw start = GET(cc, 1);
6246 pcre_uchar *start_cc;
6247 BOOL needs_control_head;
6249 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
6251 /* Inlining simple patterns. */
6252 if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
6254 start_cc = common->start + start;
6255 compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
6256 BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
6257 return cc + 1 + LINK_SIZE;
6260 while (entry != NULL)
6262 if (entry->start == start)
6263 break;
6264 prev = entry;
6265 entry = entry->next;
6268 if (entry == NULL)
6270 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
6271 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6272 return NULL;
6273 entry->next = NULL;
6274 entry->entry = NULL;
6275 entry->calls = NULL;
6276 entry->start = start;
6278 if (prev != NULL)
6279 prev->next = entry;
6280 else
6281 common->entries = entry;
6284 if (common->has_set_som && common->mark_ptr != 0)
6286 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
6287 allocate_stack(common, 2);
6288 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
6289 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6290 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6292 else if (common->has_set_som || common->mark_ptr != 0)
6294 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
6295 allocate_stack(common, 1);
6296 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6299 if (entry->entry == NULL)
6300 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
6301 else
6302 JUMPTO(SLJIT_FAST_CALL, entry->entry);
6303 /* Leave if the match is failed. */
6304 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0));
6305 return cc + 1 + LINK_SIZE;
6308 static int SLJIT_CALL do_callout(struct jit_arguments* arguments, PUBL(callout_block) *callout_block, pcre_uchar **jit_ovector)
6310 const pcre_uchar *begin = arguments->begin;
6311 int *offset_vector = arguments->offsets;
6312 int offset_count = arguments->offset_count;
6313 int i;
6315 if (PUBL(callout) == NULL)
6316 return 0;
6318 callout_block->version = 2;
6319 callout_block->callout_data = arguments->callout_data;
6321 /* Offsets in subject. */
6322 callout_block->subject_length = arguments->end - arguments->begin;
6323 callout_block->start_match = (pcre_uchar*)callout_block->subject - arguments->begin;
6324 callout_block->current_position = (pcre_uchar*)callout_block->offset_vector - arguments->begin;
6325 #if defined COMPILE_PCRE8
6326 callout_block->subject = (PCRE_SPTR)begin;
6327 #elif defined COMPILE_PCRE16
6328 callout_block->subject = (PCRE_SPTR16)begin;
6329 #elif defined COMPILE_PCRE32
6330 callout_block->subject = (PCRE_SPTR32)begin;
6331 #endif
6333 /* Convert and copy the JIT offset vector to the offset_vector array. */
6334 callout_block->capture_top = 0;
6335 callout_block->offset_vector = offset_vector;
6336 for (i = 2; i < offset_count; i += 2)
6338 offset_vector[i] = jit_ovector[i] - begin;
6339 offset_vector[i + 1] = jit_ovector[i + 1] - begin;
6340 if (jit_ovector[i] >= begin)
6341 callout_block->capture_top = i;
6344 callout_block->capture_top = (callout_block->capture_top >> 1) + 1;
6345 if (offset_count > 0)
6346 offset_vector[0] = -1;
6347 if (offset_count > 1)
6348 offset_vector[1] = -1;
6349 return (*PUBL(callout))(callout_block);
6352 /* Aligning to 8 byte. */
6353 #define CALLOUT_ARG_SIZE \
6354 (((int)sizeof(PUBL(callout_block)) + 7) & ~7)
6356 #define CALLOUT_ARG_OFFSET(arg) \
6357 (-CALLOUT_ARG_SIZE + SLJIT_OFFSETOF(PUBL(callout_block), arg))
6359 static SLJIT_INLINE pcre_uchar *compile_callout_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6361 DEFINE_COMPILER;
6362 backtrack_common *backtrack;
6364 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
6366 allocate_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
6368 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
6369 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6370 SLJIT_ASSERT(common->capture_last_ptr != 0);
6371 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]);
6372 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
6374 /* These pointer sized fields temporarly stores internal variables. */
6375 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
6376 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
6377 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(subject), TMP2, 0);
6379 if (common->mark_ptr != 0)
6380 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
6381 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 2));
6382 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 2 + LINK_SIZE));
6383 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
6385 /* Needed to save important temporary registers. */
6386 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
6387 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, STACK_TOP, 0, SLJIT_IMM, CALLOUT_ARG_SIZE);
6388 GET_LOCAL_BASE(SLJIT_SCRATCH_REG3, 0, OVECTOR_START);
6389 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
6390 OP1(SLJIT_MOV_SI, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0);
6391 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
6392 free_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
6394 /* Check return value. */
6395 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
6396 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_C_SIG_GREATER));
6397 if (common->forced_quit_label == NULL)
6398 add_jump(compiler, &common->forced_quit, JUMP(SLJIT_C_SIG_LESS));
6399 else
6400 JUMPTO(SLJIT_C_SIG_LESS, common->forced_quit_label);
6401 return cc + 2 + 2 * LINK_SIZE;
6404 #undef CALLOUT_ARG_SIZE
6405 #undef CALLOUT_ARG_OFFSET
6407 static pcre_uchar *compile_assert_matchingpath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
6409 DEFINE_COMPILER;
6410 int framesize;
6411 int extrasize;
6412 BOOL needs_control_head;
6413 int private_data_ptr;
6414 backtrack_common altbacktrack;
6415 pcre_uchar *ccbegin;
6416 pcre_uchar opcode;
6417 pcre_uchar bra = OP_BRA;
6418 jump_list *tmp = NULL;
6419 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
6420 jump_list **found;
6421 /* Saving previous accept variables. */
6422 BOOL save_local_exit = common->local_exit;
6423 BOOL save_positive_assert = common->positive_assert;
6424 then_trap_backtrack *save_then_trap = common->then_trap;
6425 struct sljit_label *save_quit_label = common->quit_label;
6426 struct sljit_label *save_accept_label = common->accept_label;
6427 jump_list *save_quit = common->quit;
6428 jump_list *save_positive_assert_quit = common->positive_assert_quit;
6429 jump_list *save_accept = common->accept;
6430 struct sljit_jump *jump;
6431 struct sljit_jump *brajump = NULL;
6433 /* Assert captures then. */
6434 common->then_trap = NULL;
6436 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
6438 SLJIT_ASSERT(!conditional);
6439 bra = *cc;
6440 cc++;
6442 private_data_ptr = PRIVATE_DATA(cc);
6443 SLJIT_ASSERT(private_data_ptr != 0);
6444 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
6445 backtrack->framesize = framesize;
6446 backtrack->private_data_ptr = private_data_ptr;
6447 opcode = *cc;
6448 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
6449 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
6450 ccbegin = cc;
6451 cc += GET(cc, 1);
6453 if (bra == OP_BRAMINZERO)
6455 /* This is a braminzero backtrack path. */
6456 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6457 free_stack(common, 1);
6458 brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6461 if (framesize < 0)
6463 extrasize = needs_control_head ? 2 : 1;
6464 if (framesize == no_frame)
6465 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
6466 allocate_stack(common, extrasize);
6467 if (needs_control_head)
6468 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
6469 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6470 if (needs_control_head)
6472 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
6473 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6476 else
6478 extrasize = needs_control_head ? 3 : 2;
6479 allocate_stack(common, framesize + extrasize);
6480 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6481 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
6482 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
6483 if (needs_control_head)
6484 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
6485 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6486 if (needs_control_head)
6488 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
6489 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
6490 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
6492 else
6493 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6494 init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize, FALSE);
6497 memset(&altbacktrack, 0, sizeof(backtrack_common));
6498 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6500 /* Negative assert is stronger than positive assert. */
6501 common->local_exit = TRUE;
6502 common->quit_label = NULL;
6503 common->quit = NULL;
6504 common->positive_assert = FALSE;
6506 else
6507 common->positive_assert = TRUE;
6508 common->positive_assert_quit = NULL;
6510 while (1)
6512 common->accept_label = NULL;
6513 common->accept = NULL;
6514 altbacktrack.top = NULL;
6515 altbacktrack.topbacktracks = NULL;
6517 if (*ccbegin == OP_ALT)
6518 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6520 altbacktrack.cc = ccbegin;
6521 compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
6522 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6524 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6526 common->local_exit = save_local_exit;
6527 common->quit_label = save_quit_label;
6528 common->quit = save_quit;
6530 common->positive_assert = save_positive_assert;
6531 common->then_trap = save_then_trap;
6532 common->accept_label = save_accept_label;
6533 common->positive_assert_quit = save_positive_assert_quit;
6534 common->accept = save_accept;
6535 return NULL;
6537 common->accept_label = LABEL();
6538 if (common->accept != NULL)
6539 set_jumps(common->accept, common->accept_label);
6541 /* Reset stack. */
6542 if (framesize < 0)
6544 if (framesize == no_frame)
6545 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6546 else
6547 free_stack(common, extrasize);
6548 if (needs_control_head)
6549 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
6551 else
6553 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
6555 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6556 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
6557 if (needs_control_head)
6558 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
6560 else
6562 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6563 if (needs_control_head)
6564 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_sw));
6565 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6569 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6571 /* We know that STR_PTR was stored on the top of the stack. */
6572 if (conditional)
6573 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? sizeof(sljit_sw) : 0);
6574 else if (bra == OP_BRAZERO)
6576 if (framesize < 0)
6577 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
6578 else
6580 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6581 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + extrasize - 1) * sizeof(sljit_sw));
6582 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
6584 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6585 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6587 else if (framesize >= 0)
6589 /* For OP_BRA and OP_BRAMINZERO. */
6590 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6593 add_jump(compiler, found, JUMP(SLJIT_JUMP));
6595 compile_backtrackingpath(common, altbacktrack.top);
6596 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6598 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6600 common->local_exit = save_local_exit;
6601 common->quit_label = save_quit_label;
6602 common->quit = save_quit;
6604 common->positive_assert = save_positive_assert;
6605 common->then_trap = save_then_trap;
6606 common->accept_label = save_accept_label;
6607 common->positive_assert_quit = save_positive_assert_quit;
6608 common->accept = save_accept;
6609 return NULL;
6611 set_jumps(altbacktrack.topbacktracks, LABEL());
6613 if (*cc != OP_ALT)
6614 break;
6616 ccbegin = cc;
6617 cc += GET(cc, 1);
6620 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6622 SLJIT_ASSERT(common->positive_assert_quit == NULL);
6623 /* Makes the check less complicated below. */
6624 common->positive_assert_quit = common->quit;
6627 /* None of them matched. */
6628 if (common->positive_assert_quit != NULL)
6630 jump = JUMP(SLJIT_JUMP);
6631 set_jumps(common->positive_assert_quit, LABEL());
6632 SLJIT_ASSERT(framesize != no_stack);
6633 if (framesize < 0)
6634 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
6635 else
6637 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6638 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6639 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
6641 JUMPHERE(jump);
6644 if (needs_control_head)
6645 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
6647 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
6649 /* Assert is failed. */
6650 if (conditional || bra == OP_BRAZERO)
6651 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6653 if (framesize < 0)
6655 /* The topmost item should be 0. */
6656 if (bra == OP_BRAZERO)
6658 if (extrasize == 2)
6659 free_stack(common, 1);
6660 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6662 else
6663 free_stack(common, extrasize);
6665 else
6667 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
6668 /* The topmost item should be 0. */
6669 if (bra == OP_BRAZERO)
6671 free_stack(common, framesize + extrasize - 1);
6672 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6674 else
6675 free_stack(common, framesize + extrasize);
6676 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
6678 jump = JUMP(SLJIT_JUMP);
6679 if (bra != OP_BRAZERO)
6680 add_jump(compiler, target, jump);
6682 /* Assert is successful. */
6683 set_jumps(tmp, LABEL());
6684 if (framesize < 0)
6686 /* We know that STR_PTR was stored on the top of the stack. */
6687 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
6688 /* Keep the STR_PTR on the top of the stack. */
6689 if (bra == OP_BRAZERO)
6691 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6692 if (extrasize == 2)
6693 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6695 else if (bra == OP_BRAMINZERO)
6697 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6698 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6701 else
6703 if (bra == OP_BRA)
6705 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6706 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
6707 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 2) * sizeof(sljit_sw));
6709 else
6711 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6712 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
6713 if (extrasize == 2)
6715 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6716 if (bra == OP_BRAMINZERO)
6717 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6719 else
6721 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
6722 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
6727 if (bra == OP_BRAZERO)
6729 backtrack->matchingpath = LABEL();
6730 SET_LABEL(jump, backtrack->matchingpath);
6732 else if (bra == OP_BRAMINZERO)
6734 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
6735 JUMPHERE(brajump);
6736 if (framesize >= 0)
6738 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6739 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6740 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6742 set_jumps(backtrack->common.topbacktracks, LABEL());
6745 else
6747 /* AssertNot is successful. */
6748 if (framesize < 0)
6750 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6751 if (bra != OP_BRA)
6753 if (extrasize == 2)
6754 free_stack(common, 1);
6755 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6757 else
6758 free_stack(common, extrasize);
6760 else
6762 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6763 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
6764 /* The topmost item should be 0. */
6765 if (bra != OP_BRA)
6767 free_stack(common, framesize + extrasize - 1);
6768 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6770 else
6771 free_stack(common, framesize + extrasize);
6772 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
6775 if (bra == OP_BRAZERO)
6776 backtrack->matchingpath = LABEL();
6777 else if (bra == OP_BRAMINZERO)
6779 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
6780 JUMPHERE(brajump);
6783 if (bra != OP_BRA)
6785 SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
6786 set_jumps(backtrack->common.topbacktracks, LABEL());
6787 backtrack->common.topbacktracks = NULL;
6791 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6793 common->local_exit = save_local_exit;
6794 common->quit_label = save_quit_label;
6795 common->quit = save_quit;
6797 common->positive_assert = save_positive_assert;
6798 common->then_trap = save_then_trap;
6799 common->accept_label = save_accept_label;
6800 common->positive_assert_quit = save_positive_assert_quit;
6801 common->accept = save_accept;
6802 return cc + 1 + LINK_SIZE;
6805 static SLJIT_INLINE void match_once_common(compiler_common *common, pcre_uchar ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)
6807 DEFINE_COMPILER;
6808 int stacksize;
6810 if (framesize < 0)
6812 if (framesize == no_frame)
6813 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6814 else
6816 stacksize = needs_control_head ? 1 : 0;
6817 if (ket != OP_KET || has_alternatives)
6818 stacksize++;
6819 free_stack(common, stacksize);
6822 if (needs_control_head)
6823 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? sizeof(sljit_sw) : 0);
6825 /* TMP2 which is set here used by OP_KETRMAX below. */
6826 if (ket == OP_KETRMAX)
6827 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
6828 else if (ket == OP_KETRMIN)
6830 /* Move the STR_PTR to the private_data_ptr. */
6831 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), 0);
6834 else
6836 stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1;
6837 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));
6838 if (needs_control_head)
6839 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), 0);
6841 if (ket == OP_KETRMAX)
6843 /* TMP2 which is set here used by OP_KETRMAX below. */
6844 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6847 if (needs_control_head)
6848 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, TMP1, 0);
6851 static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr)
6853 DEFINE_COMPILER;
6855 if (common->capture_last_ptr != 0)
6857 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
6858 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
6859 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6860 stacksize++;
6862 if (common->optimized_cbracket[offset >> 1] == 0)
6864 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
6865 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6866 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6867 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6868 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
6869 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6870 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6871 stacksize += 2;
6873 return stacksize;
6877 Handling bracketed expressions is probably the most complex part.
6879 Stack layout naming characters:
6880 S - Push the current STR_PTR
6881 0 - Push a 0 (NULL)
6882 A - Push the current STR_PTR. Needed for restoring the STR_PTR
6883 before the next alternative. Not pushed if there are no alternatives.
6884 M - Any values pushed by the current alternative. Can be empty, or anything.
6885 C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
6886 L - Push the previous local (pointed by localptr) to the stack
6887 () - opional values stored on the stack
6888 ()* - optonal, can be stored multiple times
6890 The following list shows the regular expression templates, their PCRE byte codes
6891 and stack layout supported by pcre-sljit.
6893 (?:) OP_BRA | OP_KET A M
6894 () OP_CBRA | OP_KET C M
6895 (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
6896 OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
6897 (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
6898 OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
6899 ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
6900 OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
6901 ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
6902 OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
6903 (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
6904 (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
6905 ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
6906 ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
6907 (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
6908 OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
6909 (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
6910 OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
6911 ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
6912 OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
6913 ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
6914 OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
6917 Stack layout naming characters:
6918 A - Push the alternative index (starting from 0) on the stack.
6919 Not pushed if there is no alternatives.
6920 M - Any values pushed by the current alternative. Can be empty, or anything.
6922 The next list shows the possible content of a bracket:
6923 (|) OP_*BRA | OP_ALT ... M A
6924 (?()|) OP_*COND | OP_ALT M A
6925 (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
6926 (?>|) OP_ONCE_NC | OP_ALT ... [stack trace] M A
6927 Or nothing, if trace is unnecessary
6930 static pcre_uchar *compile_bracket_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6932 DEFINE_COMPILER;
6933 backtrack_common *backtrack;
6934 pcre_uchar opcode;
6935 int private_data_ptr = 0;
6936 int offset = 0;
6937 int i, stacksize;
6938 int repeat_ptr = 0, repeat_length = 0;
6939 int repeat_type = 0, repeat_count = 0;
6940 pcre_uchar *ccbegin;
6941 pcre_uchar *matchingpath;
6942 pcre_uchar *slot;
6943 pcre_uchar bra = OP_BRA;
6944 pcre_uchar ket;
6945 assert_backtrack *assert;
6946 BOOL has_alternatives;
6947 BOOL needs_control_head = FALSE;
6948 struct sljit_jump *jump;
6949 struct sljit_jump *skip;
6950 struct sljit_label *rmax_label = NULL;
6951 struct sljit_jump *braminzero = NULL;
6953 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
6955 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
6957 bra = *cc;
6958 cc++;
6959 opcode = *cc;
6962 opcode = *cc;
6963 ccbegin = cc;
6964 matchingpath = bracketend(cc) - 1 - LINK_SIZE;
6965 ket = *matchingpath;
6966 if (ket == OP_KET && PRIVATE_DATA(matchingpath) != 0)
6968 repeat_ptr = PRIVATE_DATA(matchingpath);
6969 repeat_length = PRIVATE_DATA(matchingpath + 1);
6970 repeat_type = PRIVATE_DATA(matchingpath + 2);
6971 repeat_count = PRIVATE_DATA(matchingpath + 3);
6972 SLJIT_ASSERT(repeat_length != 0 && repeat_type != 0 && repeat_count != 0);
6973 if (repeat_type == OP_UPTO)
6974 ket = OP_KETRMAX;
6975 if (repeat_type == OP_MINUPTO)
6976 ket = OP_KETRMIN;
6979 if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)
6981 /* Drop this bracket_backtrack. */
6982 parent->top = backtrack->prev;
6983 return matchingpath + 1 + LINK_SIZE + repeat_length;
6986 matchingpath = ccbegin + 1 + LINK_SIZE;
6987 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
6988 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
6989 cc += GET(cc, 1);
6991 has_alternatives = *cc == OP_ALT;
6992 if (SLJIT_UNLIKELY(opcode == OP_COND || opcode == OP_SCOND))
6993 has_alternatives = (*matchingpath == OP_RREF || *matchingpath == OP_DNRREF) ? FALSE : TRUE;
6995 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
6996 opcode = OP_SCOND;
6997 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
6998 opcode = OP_ONCE;
7000 if (opcode == OP_CBRA || opcode == OP_SCBRA)
7002 /* Capturing brackets has a pre-allocated space. */
7003 offset = GET2(ccbegin, 1 + LINK_SIZE);
7004 if (common->optimized_cbracket[offset] == 0)
7006 private_data_ptr = OVECTOR_PRIV(offset);
7007 offset <<= 1;
7009 else
7011 offset <<= 1;
7012 private_data_ptr = OVECTOR(offset);
7014 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
7015 matchingpath += IMM2_SIZE;
7017 else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND)
7019 /* Other brackets simply allocate the next entry. */
7020 private_data_ptr = PRIVATE_DATA(ccbegin);
7021 SLJIT_ASSERT(private_data_ptr != 0);
7022 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
7023 if (opcode == OP_ONCE)
7024 BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head);
7027 /* Instructions before the first alternative. */
7028 stacksize = 0;
7029 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
7030 stacksize++;
7031 if (bra == OP_BRAZERO)
7032 stacksize++;
7034 if (stacksize > 0)
7035 allocate_stack(common, stacksize);
7037 stacksize = 0;
7038 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
7040 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
7041 stacksize++;
7044 if (bra == OP_BRAZERO)
7045 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
7047 if (bra == OP_BRAMINZERO)
7049 /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
7050 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7051 if (ket != OP_KETRMIN)
7053 free_stack(common, 1);
7054 braminzero = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
7056 else
7058 if (opcode == OP_ONCE || opcode >= OP_SBRA)
7060 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
7061 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
7062 /* Nothing stored during the first run. */
7063 skip = JUMP(SLJIT_JUMP);
7064 JUMPHERE(jump);
7065 /* Checking zero-length iteration. */
7066 if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
7068 /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
7069 braminzero = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
7071 else
7073 /* Except when the whole stack frame must be saved. */
7074 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
7075 braminzero = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (BACKTRACK_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw));
7077 JUMPHERE(skip);
7079 else
7081 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
7082 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
7083 JUMPHERE(jump);
7088 if (repeat_type != 0)
7090 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_IMM, repeat_count);
7091 if (repeat_type == OP_EXACT)
7092 rmax_label = LABEL();
7095 if (ket == OP_KETRMIN)
7096 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
7098 if (ket == OP_KETRMAX)
7100 rmax_label = LABEL();
7101 if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA && repeat_type == 0)
7102 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmax_label;
7105 /* Handling capturing brackets and alternatives. */
7106 if (opcode == OP_ONCE)
7108 stacksize = 0;
7109 if (needs_control_head)
7111 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
7112 stacksize++;
7115 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
7117 /* Neither capturing brackets nor recursions are found in the block. */
7118 if (ket == OP_KETRMIN)
7120 stacksize += 2;
7121 if (!needs_control_head)
7122 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
7124 else
7126 if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
7127 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
7128 if (ket == OP_KETRMAX || has_alternatives)
7129 stacksize++;
7132 if (stacksize > 0)
7133 allocate_stack(common, stacksize);
7135 stacksize = 0;
7136 if (needs_control_head)
7138 stacksize++;
7139 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
7142 if (ket == OP_KETRMIN)
7144 if (needs_control_head)
7145 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
7146 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
7147 if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
7148 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw));
7149 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
7151 else if (ket == OP_KETRMAX || has_alternatives)
7152 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
7154 else
7156 if (ket != OP_KET || has_alternatives)
7157 stacksize++;
7159 stacksize += BACKTRACK_AS(bracket_backtrack)->u.framesize + 1;
7160 allocate_stack(common, stacksize);
7162 if (needs_control_head)
7163 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
7165 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
7166 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
7168 stacksize = needs_control_head ? 1 : 0;
7169 if (ket != OP_KET || has_alternatives)
7171 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
7172 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
7173 stacksize++;
7174 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
7176 else
7178 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
7179 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
7181 init_frame(common, ccbegin, NULL, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1, FALSE);
7184 else if (opcode == OP_CBRA || opcode == OP_SCBRA)
7186 /* Saving the previous values. */
7187 if (common->optimized_cbracket[offset >> 1] != 0)
7189 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
7190 allocate_stack(common, 2);
7191 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
7192 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr + sizeof(sljit_sw));
7193 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
7194 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
7195 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
7197 else
7199 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
7200 allocate_stack(common, 1);
7201 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
7202 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
7205 else if (opcode == OP_SBRA || opcode == OP_SCOND)
7207 /* Saving the previous value. */
7208 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
7209 allocate_stack(common, 1);
7210 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
7211 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
7213 else if (has_alternatives)
7215 /* Pushing the starting string pointer. */
7216 allocate_stack(common, 1);
7217 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7220 /* Generating code for the first alternative. */
7221 if (opcode == OP_COND || opcode == OP_SCOND)
7223 if (*matchingpath == OP_CREF)
7225 SLJIT_ASSERT(has_alternatives);
7226 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
7227 CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
7228 matchingpath += 1 + IMM2_SIZE;
7230 else if (*matchingpath == OP_DNCREF)
7232 SLJIT_ASSERT(has_alternatives);
7234 i = GET2(matchingpath, 1 + IMM2_SIZE);
7235 slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
7236 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
7237 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
7238 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
7239 slot += common->name_entry_size;
7240 i--;
7241 while (i-- > 0)
7243 OP2(SLJIT_SUB, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
7244 OP2(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, STR_PTR, 0);
7245 slot += common->name_entry_size;
7247 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
7248 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), JUMP(SLJIT_C_ZERO));
7249 matchingpath += 1 + 2 * IMM2_SIZE;
7251 else if (*matchingpath == OP_RREF || *matchingpath == OP_DNRREF)
7253 /* Never has other case. */
7254 BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
7255 SLJIT_ASSERT(!has_alternatives);
7257 if (*matchingpath == OP_RREF)
7259 stacksize = GET2(matchingpath, 1);
7260 if (common->currententry == NULL)
7261 stacksize = 0;
7262 else if (stacksize == RREF_ANY)
7263 stacksize = 1;
7264 else if (common->currententry->start == 0)
7265 stacksize = stacksize == 0;
7266 else
7267 stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
7269 if (stacksize != 0)
7270 matchingpath += 1 + IMM2_SIZE;
7272 else
7274 if (common->currententry == NULL || common->currententry->start == 0)
7275 stacksize = 0;
7276 else
7278 stacksize = GET2(matchingpath, 1 + IMM2_SIZE);
7279 slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
7280 i = (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
7281 while (stacksize > 0)
7283 if ((int)GET2(slot, 0) == i)
7284 break;
7285 slot += common->name_entry_size;
7286 stacksize--;
7290 if (stacksize != 0)
7291 matchingpath += 1 + 2 * IMM2_SIZE;
7294 /* The stacksize == 0 is a common "else" case. */
7295 if (stacksize == 0)
7297 if (*cc == OP_ALT)
7299 matchingpath = cc + 1 + LINK_SIZE;
7300 cc += GET(cc, 1);
7302 else
7303 matchingpath = cc;
7306 else
7308 SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
7309 /* Similar code as PUSH_BACKTRACK macro. */
7310 assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
7311 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
7312 return NULL;
7313 memset(assert, 0, sizeof(assert_backtrack));
7314 assert->common.cc = matchingpath;
7315 BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
7316 matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
7320 compile_matchingpath(common, matchingpath, cc, backtrack);
7321 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
7322 return NULL;
7324 if (opcode == OP_ONCE)
7325 match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
7327 stacksize = 0;
7328 if (repeat_type == OP_MINUPTO)
7330 /* We need to preserve the counter. TMP2 will be used below. */
7331 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr);
7332 stacksize++;
7334 if (ket != OP_KET || bra != OP_BRA)
7335 stacksize++;
7336 if (offset != 0)
7338 if (common->capture_last_ptr != 0)
7339 stacksize++;
7340 if (common->optimized_cbracket[offset >> 1] == 0)
7341 stacksize += 2;
7343 if (has_alternatives && opcode != OP_ONCE)
7344 stacksize++;
7346 if (stacksize > 0)
7347 allocate_stack(common, stacksize);
7349 stacksize = 0;
7350 if (repeat_type == OP_MINUPTO)
7352 /* TMP2 was set above. */
7353 OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
7354 stacksize++;
7357 if (ket != OP_KET || bra != OP_BRA)
7359 if (ket != OP_KET)
7360 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
7361 else
7362 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
7363 stacksize++;
7366 if (offset != 0)
7367 stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
7369 if (has_alternatives)
7371 if (opcode != OP_ONCE)
7372 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
7373 if (ket != OP_KETRMAX)
7374 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
7377 /* Must be after the matchingpath label. */
7378 if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0)
7380 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
7381 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
7384 if (ket == OP_KETRMAX)
7386 if (repeat_type != 0)
7388 if (has_alternatives)
7389 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
7390 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_IMM, 1);
7391 JUMPTO(SLJIT_C_NOT_ZERO, rmax_label);
7392 /* Drop STR_PTR for greedy plus quantifier. */
7393 if (opcode != OP_ONCE)
7394 free_stack(common, 1);
7396 else if (opcode == OP_ONCE || opcode >= OP_SBRA)
7398 if (has_alternatives)
7399 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
7400 /* Checking zero-length iteration. */
7401 if (opcode != OP_ONCE)
7403 CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0, rmax_label);
7404 /* Drop STR_PTR for greedy plus quantifier. */
7405 if (bra != OP_BRAZERO)
7406 free_stack(common, 1);
7408 else
7409 /* TMP2 must contain the starting STR_PTR. */
7410 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmax_label);
7412 else
7413 JUMPTO(SLJIT_JUMP, rmax_label);
7414 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
7417 if (repeat_type == OP_EXACT)
7419 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_IMM, 1);
7420 JUMPTO(SLJIT_C_NOT_ZERO, rmax_label);
7422 else if (repeat_type == OP_UPTO)
7424 /* We need to preserve the counter. */
7425 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr);
7426 allocate_stack(common, 1);
7427 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
7430 if (bra == OP_BRAZERO)
7431 BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
7433 if (bra == OP_BRAMINZERO)
7435 /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
7436 JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
7437 if (braminzero != NULL)
7439 JUMPHERE(braminzero);
7440 /* We need to release the end pointer to perform the
7441 backtrack for the zero-length iteration. When
7442 framesize is < 0, OP_ONCE will do the release itself. */
7443 if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
7445 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
7446 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
7448 else if (ket == OP_KETRMIN && opcode != OP_ONCE)
7449 free_stack(common, 1);
7451 /* Continue to the normal backtrack. */
7454 if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
7455 count_match(common);
7457 /* Skip the other alternatives. */
7458 while (*cc == OP_ALT)
7459 cc += GET(cc, 1);
7460 cc += 1 + LINK_SIZE;
7462 /* Temporarily encoding the needs_control_head in framesize. */
7463 if (opcode == OP_ONCE)
7464 BACKTRACK_AS(bracket_backtrack)->u.framesize = (BACKTRACK_AS(bracket_backtrack)->u.framesize << 1) | (needs_control_head ? 1 : 0);
7465 return cc + repeat_length;
7468 static pcre_uchar *compile_bracketpos_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
7470 DEFINE_COMPILER;
7471 backtrack_common *backtrack;
7472 pcre_uchar opcode;
7473 int private_data_ptr;
7474 int cbraprivptr = 0;
7475 BOOL needs_control_head;
7476 int framesize;
7477 int stacksize;
7478 int offset = 0;
7479 BOOL zero = FALSE;
7480 pcre_uchar *ccbegin = NULL;
7481 int stack; /* Also contains the offset of control head. */
7482 struct sljit_label *loop = NULL;
7483 struct jump_list *emptymatch = NULL;
7485 PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
7486 if (*cc == OP_BRAPOSZERO)
7488 zero = TRUE;
7489 cc++;
7492 opcode = *cc;
7493 private_data_ptr = PRIVATE_DATA(cc);
7494 SLJIT_ASSERT(private_data_ptr != 0);
7495 BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;
7496 switch(opcode)
7498 case OP_BRAPOS:
7499 case OP_SBRAPOS:
7500 ccbegin = cc + 1 + LINK_SIZE;
7501 break;
7503 case OP_CBRAPOS:
7504 case OP_SCBRAPOS:
7505 offset = GET2(cc, 1 + LINK_SIZE);
7506 /* This case cannot be optimized in the same was as
7507 normal capturing brackets. */
7508 SLJIT_ASSERT(common->optimized_cbracket[offset] == 0);
7509 cbraprivptr = OVECTOR_PRIV(offset);
7510 offset <<= 1;
7511 ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
7512 break;
7514 default:
7515 SLJIT_ASSERT_STOP();
7516 break;
7519 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
7520 BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
7521 if (framesize < 0)
7523 if (offset != 0)
7525 stacksize = 2;
7526 if (common->capture_last_ptr != 0)
7527 stacksize++;
7529 else
7530 stacksize = 1;
7532 if (needs_control_head)
7533 stacksize++;
7534 if (!zero)
7535 stacksize++;
7537 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
7538 allocate_stack(common, stacksize);
7539 if (framesize == no_frame)
7540 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
7542 stack = 0;
7543 if (offset != 0)
7545 stack = 2;
7546 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
7547 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
7548 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
7549 if (common->capture_last_ptr != 0)
7550 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
7551 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
7552 if (needs_control_head)
7553 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
7554 if (common->capture_last_ptr != 0)
7556 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
7557 stack = 3;
7560 else
7562 if (needs_control_head)
7563 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
7564 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7565 stack = 1;
7568 if (needs_control_head)
7569 stack++;
7570 if (!zero)
7571 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), SLJIT_IMM, 1);
7572 if (needs_control_head)
7574 stack--;
7575 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
7578 else
7580 stacksize = framesize + 1;
7581 if (!zero)
7582 stacksize++;
7583 if (needs_control_head)
7584 stacksize++;
7585 if (offset == 0)
7586 stacksize++;
7587 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
7589 allocate_stack(common, stacksize);
7590 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
7591 if (needs_control_head)
7592 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
7593 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, -STACK(stacksize - 1));
7595 stack = 0;
7596 if (!zero)
7598 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
7599 stack = 1;
7601 if (needs_control_head)
7603 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
7604 stack++;
7606 if (offset == 0)
7608 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
7609 stack++;
7611 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
7612 init_frame(common, cc, NULL, stacksize - 1, stacksize - framesize, FALSE);
7613 stack -= 1 + (offset == 0);
7616 if (offset != 0)
7617 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
7619 loop = LABEL();
7620 while (*cc != OP_KETRPOS)
7622 backtrack->top = NULL;
7623 backtrack->topbacktracks = NULL;
7624 cc += GET(cc, 1);
7626 compile_matchingpath(common, ccbegin, cc, backtrack);
7627 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
7628 return NULL;
7630 if (framesize < 0)
7632 if (framesize == no_frame)
7633 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
7635 if (offset != 0)
7637 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
7638 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
7639 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
7640 if (common->capture_last_ptr != 0)
7641 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
7642 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
7644 else
7646 if (opcode == OP_SBRAPOS)
7647 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7648 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7651 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
7652 add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
7654 if (!zero)
7655 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
7657 else
7659 if (offset != 0)
7661 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_sw));
7662 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
7663 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
7664 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
7665 if (common->capture_last_ptr != 0)
7666 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
7667 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
7669 else
7671 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
7672 OP2(SLJIT_ADD, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
7673 if (opcode == OP_SBRAPOS)
7674 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw));
7675 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw), STR_PTR, 0);
7678 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
7679 add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
7681 if (!zero)
7683 if (framesize < 0)
7684 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
7685 else
7686 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7690 if (needs_control_head)
7691 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
7693 JUMPTO(SLJIT_JUMP, loop);
7694 flush_stubs(common);
7696 compile_backtrackingpath(common, backtrack->top);
7697 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
7698 return NULL;
7699 set_jumps(backtrack->topbacktracks, LABEL());
7701 if (framesize < 0)
7703 if (offset != 0)
7704 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
7705 else
7706 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7708 else
7710 if (offset != 0)
7712 /* Last alternative. */
7713 if (*cc == OP_KETRPOS)
7714 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
7715 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
7717 else
7719 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
7720 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw));
7724 if (*cc == OP_KETRPOS)
7725 break;
7726 ccbegin = cc + 1 + LINK_SIZE;
7729 /* We don't have to restore the control head in case of a failed match. */
7731 backtrack->topbacktracks = NULL;
7732 if (!zero)
7734 if (framesize < 0)
7735 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
7736 else /* TMP2 is set to [private_data_ptr] above. */
7737 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(TMP2), (stacksize - 1) * sizeof(sljit_sw), SLJIT_IMM, 0));
7740 /* None of them matched. */
7741 set_jumps(emptymatch, LABEL());
7742 count_match(common);
7743 return cc + 1 + LINK_SIZE;
7746 static SLJIT_INLINE pcre_uchar *get_iterator_parameters(compiler_common *common, pcre_uchar *cc, pcre_uchar *opcode, pcre_uchar *type, int *max, int *min, pcre_uchar **end)
7748 int class_len;
7750 *opcode = *cc;
7751 if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
7753 cc++;
7754 *type = OP_CHAR;
7756 else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
7758 cc++;
7759 *type = OP_CHARI;
7760 *opcode -= OP_STARI - OP_STAR;
7762 else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
7764 cc++;
7765 *type = OP_NOT;
7766 *opcode -= OP_NOTSTAR - OP_STAR;
7768 else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
7770 cc++;
7771 *type = OP_NOTI;
7772 *opcode -= OP_NOTSTARI - OP_STAR;
7774 else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
7776 cc++;
7777 *opcode -= OP_TYPESTAR - OP_STAR;
7778 *type = 0;
7780 else
7782 SLJIT_ASSERT(*opcode == OP_CLASS || *opcode == OP_NCLASS || *opcode == OP_XCLASS);
7783 *type = *opcode;
7784 cc++;
7785 class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(pcre_uchar))) : GET(cc, 0);
7786 *opcode = cc[class_len - 1];
7787 if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
7789 *opcode -= OP_CRSTAR - OP_STAR;
7790 if (end != NULL)
7791 *end = cc + class_len;
7793 else if (*opcode >= OP_CRPOSSTAR && *opcode <= OP_CRPOSQUERY)
7795 *opcode -= OP_CRPOSSTAR - OP_POSSTAR;
7796 if (end != NULL)
7797 *end = cc + class_len;
7799 else
7801 SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE || *opcode == OP_CRPOSRANGE);
7802 *max = GET2(cc, (class_len + IMM2_SIZE));
7803 *min = GET2(cc, class_len);
7805 if (*min == 0)
7807 SLJIT_ASSERT(*max != 0);
7808 *opcode = (*opcode == OP_CRRANGE) ? OP_UPTO : (*opcode == OP_CRMINRANGE ? OP_MINUPTO : OP_POSUPTO);
7810 if (*max == *min)
7811 *opcode = OP_EXACT;
7813 if (end != NULL)
7814 *end = cc + class_len + 2 * IMM2_SIZE;
7816 return cc;
7819 if (*opcode == OP_UPTO || *opcode == OP_MINUPTO || *opcode == OP_EXACT || *opcode == OP_POSUPTO)
7821 *max = GET2(cc, 0);
7822 cc += IMM2_SIZE;
7825 if (*type == 0)
7827 *type = *cc;
7828 if (end != NULL)
7829 *end = next_opcode(common, cc);
7830 cc++;
7831 return cc;
7834 if (end != NULL)
7836 *end = cc + 1;
7837 #ifdef SUPPORT_UTF
7838 if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
7839 #endif
7841 return cc;
7844 static pcre_uchar *compile_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
7846 DEFINE_COMPILER;
7847 backtrack_common *backtrack;
7848 pcre_uchar opcode;
7849 pcre_uchar type;
7850 int max = -1, min = -1;
7851 pcre_uchar* end;
7852 jump_list *nomatch = NULL;
7853 struct sljit_jump *jump = NULL;
7854 struct sljit_label *label;
7855 int private_data_ptr = PRIVATE_DATA(cc);
7856 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_LOCALS_REG);
7857 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
7858 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
7859 int tmp_base, tmp_offset;
7861 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
7863 cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &min, &end);
7865 switch(type)
7867 case OP_NOT_DIGIT:
7868 case OP_DIGIT:
7869 case OP_NOT_WHITESPACE:
7870 case OP_WHITESPACE:
7871 case OP_NOT_WORDCHAR:
7872 case OP_WORDCHAR:
7873 case OP_ANY:
7874 case OP_ALLANY:
7875 case OP_ANYBYTE:
7876 case OP_ANYNL:
7877 case OP_NOT_HSPACE:
7878 case OP_HSPACE:
7879 case OP_NOT_VSPACE:
7880 case OP_VSPACE:
7881 case OP_CHAR:
7882 case OP_CHARI:
7883 case OP_NOT:
7884 case OP_NOTI:
7885 case OP_CLASS:
7886 case OP_NCLASS:
7887 tmp_base = TMP3;
7888 tmp_offset = 0;
7889 break;
7891 default:
7892 SLJIT_ASSERT_STOP();
7893 /* Fall through. */
7895 case OP_EXTUNI:
7896 case OP_XCLASS:
7897 case OP_NOTPROP:
7898 case OP_PROP:
7899 tmp_base = SLJIT_MEM1(SLJIT_LOCALS_REG);
7900 tmp_offset = POSSESSIVE0;
7901 break;
7904 switch(opcode)
7906 case OP_STAR:
7907 case OP_PLUS:
7908 case OP_UPTO:
7909 case OP_CRRANGE:
7910 if (type == OP_ANYNL || type == OP_EXTUNI)
7912 SLJIT_ASSERT(private_data_ptr == 0);
7913 if (opcode == OP_STAR || opcode == OP_UPTO)
7915 allocate_stack(common, 2);
7916 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7917 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
7919 else
7921 allocate_stack(common, 1);
7922 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7925 if (opcode == OP_UPTO || opcode == OP_CRRANGE)
7926 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
7928 label = LABEL();
7929 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
7930 if (opcode == OP_UPTO || opcode == OP_CRRANGE)
7932 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
7933 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
7934 if (opcode == OP_CRRANGE && min > 0)
7935 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label);
7936 if (opcode == OP_UPTO || (opcode == OP_CRRANGE && max > 0))
7937 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
7938 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
7941 /* We cannot use TMP3 because of this allocate_stack. */
7942 allocate_stack(common, 1);
7943 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7944 JUMPTO(SLJIT_JUMP, label);
7945 if (jump != NULL)
7946 JUMPHERE(jump);
7948 else
7950 if (opcode == OP_PLUS)
7951 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
7952 if (private_data_ptr == 0)
7953 allocate_stack(common, 2);
7954 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
7955 if (opcode <= OP_PLUS)
7956 OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
7957 else
7958 OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, 1);
7959 label = LABEL();
7960 compile_char1_matchingpath(common, type, cc, &nomatch);
7961 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
7962 if (opcode <= OP_PLUS)
7963 JUMPTO(SLJIT_JUMP, label);
7964 else if (opcode == OP_CRRANGE && max == 0)
7966 OP2(SLJIT_ADD, base, offset1, base, offset1, SLJIT_IMM, 1);
7967 JUMPTO(SLJIT_JUMP, label);
7969 else
7971 OP1(SLJIT_MOV, TMP1, 0, base, offset1);
7972 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
7973 OP1(SLJIT_MOV, base, offset1, TMP1, 0);
7974 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, max + 1, label);
7976 set_jumps(nomatch, LABEL());
7977 if (opcode == OP_CRRANGE)
7978 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_LESS, base, offset1, SLJIT_IMM, min + 1));
7979 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
7981 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
7982 break;
7984 case OP_MINSTAR:
7985 case OP_MINPLUS:
7986 if (opcode == OP_MINPLUS)
7987 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
7988 if (private_data_ptr == 0)
7989 allocate_stack(common, 1);
7990 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
7991 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
7992 break;
7994 case OP_MINUPTO:
7995 case OP_CRMINRANGE:
7996 if (private_data_ptr == 0)
7997 allocate_stack(common, 2);
7998 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
7999 OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, 1);
8000 if (opcode == OP_CRMINRANGE)
8001 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
8002 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
8003 break;
8005 case OP_QUERY:
8006 case OP_MINQUERY:
8007 if (private_data_ptr == 0)
8008 allocate_stack(common, 1);
8009 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
8010 if (opcode == OP_QUERY)
8011 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
8012 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
8013 break;
8015 case OP_EXACT:
8016 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
8017 label = LABEL();
8018 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
8019 OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
8020 JUMPTO(SLJIT_C_NOT_ZERO, label);
8021 break;
8023 case OP_POSSTAR:
8024 case OP_POSPLUS:
8025 case OP_POSUPTO:
8026 if (opcode == OP_POSPLUS)
8027 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
8028 if (opcode == OP_POSUPTO)
8029 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_IMM, max);
8030 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
8031 label = LABEL();
8032 compile_char1_matchingpath(common, type, cc, &nomatch);
8033 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
8034 if (opcode != OP_POSUPTO)
8035 JUMPTO(SLJIT_JUMP, label);
8036 else
8038 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_IMM, 1);
8039 JUMPTO(SLJIT_C_NOT_ZERO, label);
8041 set_jumps(nomatch, LABEL());
8042 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
8043 break;
8045 case OP_POSQUERY:
8046 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
8047 compile_char1_matchingpath(common, type, cc, &nomatch);
8048 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
8049 set_jumps(nomatch, LABEL());
8050 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
8051 break;
8053 case OP_CRPOSRANGE:
8054 /* Combination of OP_EXACT and OP_POSSTAR or OP_POSUPTO */
8055 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, min);
8056 label = LABEL();
8057 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
8058 OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
8059 JUMPTO(SLJIT_C_NOT_ZERO, label);
8061 if (max != 0)
8063 SLJIT_ASSERT(max - min > 0);
8064 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_IMM, max - min);
8066 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
8067 label = LABEL();
8068 compile_char1_matchingpath(common, type, cc, &nomatch);
8069 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
8070 if (max == 0)
8071 JUMPTO(SLJIT_JUMP, label);
8072 else
8074 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_IMM, 1);
8075 JUMPTO(SLJIT_C_NOT_ZERO, label);
8077 set_jumps(nomatch, LABEL());
8078 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
8079 break;
8081 default:
8082 SLJIT_ASSERT_STOP();
8083 break;
8086 count_match(common);
8087 return end;
8090 static SLJIT_INLINE pcre_uchar *compile_fail_accept_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
8092 DEFINE_COMPILER;
8093 backtrack_common *backtrack;
8095 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
8097 if (*cc == OP_FAIL)
8099 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
8100 return cc + 1;
8103 if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL || !common->might_be_empty)
8105 /* No need to check notempty conditions. */
8106 if (common->accept_label == NULL)
8107 add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
8108 else
8109 JUMPTO(SLJIT_JUMP, common->accept_label);
8110 return cc + 1;
8113 if (common->accept_label == NULL)
8114 add_jump(compiler, &common->accept, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0)));
8115 else
8116 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), common->accept_label);
8117 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8118 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
8119 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
8120 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));
8121 if (common->accept_label == NULL)
8122 add_jump(compiler, &common->accept, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0));
8123 else
8124 CMPTO(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0, common->accept_label);
8125 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
8126 if (common->accept_label == NULL)
8127 add_jump(compiler, &common->accept, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0));
8128 else
8129 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->accept_label);
8130 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
8131 return cc + 1;
8134 static SLJIT_INLINE pcre_uchar *compile_close_matchingpath(compiler_common *common, pcre_uchar *cc)
8136 DEFINE_COMPILER;
8137 int offset = GET2(cc, 1);
8138 BOOL optimized_cbracket = common->optimized_cbracket[offset] != 0;
8140 /* Data will be discarded anyway... */
8141 if (common->currententry != NULL)
8142 return cc + 1 + IMM2_SIZE;
8144 if (!optimized_cbracket)
8145 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR_PRIV(offset));
8146 offset <<= 1;
8147 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
8148 if (!optimized_cbracket)
8149 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
8150 return cc + 1 + IMM2_SIZE;
8153 static SLJIT_INLINE pcre_uchar *compile_control_verb_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
8155 DEFINE_COMPILER;
8156 backtrack_common *backtrack;
8157 pcre_uchar opcode = *cc;
8158 pcre_uchar *ccend = cc + 1;
8160 if (opcode == OP_PRUNE_ARG || opcode == OP_SKIP_ARG || opcode == OP_THEN_ARG)
8161 ccend += 2 + cc[1];
8163 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
8165 if (opcode == OP_SKIP)
8167 allocate_stack(common, 1);
8168 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8169 return ccend;
8172 if (opcode == OP_PRUNE_ARG || opcode == OP_THEN_ARG)
8174 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8175 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
8176 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP2, 0);
8177 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
8180 return ccend;
8183 static pcre_uchar then_trap_opcode[1] = { OP_THEN_TRAP };
8185 static SLJIT_INLINE void compile_then_trap_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, backtrack_common *parent)
8187 DEFINE_COMPILER;
8188 backtrack_common *backtrack;
8189 BOOL needs_control_head;
8190 int size;
8192 PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
8193 common->then_trap = BACKTRACK_AS(then_trap_backtrack);
8194 BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
8195 BACKTRACK_AS(then_trap_backtrack)->start = (sljit_sw)(cc - common->start);
8196 BACKTRACK_AS(then_trap_backtrack)->framesize = get_framesize(common, cc, ccend, FALSE, &needs_control_head);
8198 size = BACKTRACK_AS(then_trap_backtrack)->framesize;
8199 size = 3 + (size < 0 ? 0 : size);
8201 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
8202 allocate_stack(common, size);
8203 if (size > 3)
8204 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, STACK_TOP, 0, SLJIT_IMM, (size - 3) * sizeof(sljit_sw));
8205 else
8206 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, STACK_TOP, 0);
8207 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 1), SLJIT_IMM, BACKTRACK_AS(then_trap_backtrack)->start);
8208 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 2), SLJIT_IMM, type_then_trap);
8209 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 3), TMP2, 0);
8211 size = BACKTRACK_AS(then_trap_backtrack)->framesize;
8212 if (size >= 0)
8213 init_frame(common, cc, ccend, size - 1, 0, FALSE);
8216 static void compile_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, backtrack_common *parent)
8218 DEFINE_COMPILER;
8219 backtrack_common *backtrack;
8220 BOOL has_then_trap = FALSE;
8221 then_trap_backtrack *save_then_trap = NULL;
8223 SLJIT_ASSERT(*ccend == OP_END || (*ccend >= OP_ALT && *ccend <= OP_KETRPOS));
8225 if (common->has_then && common->then_offsets[cc - common->start] != 0)
8227 SLJIT_ASSERT(*ccend != OP_END && common->control_head_ptr != 0);
8228 has_then_trap = TRUE;
8229 save_then_trap = common->then_trap;
8230 /* Tail item on backtrack. */
8231 compile_then_trap_matchingpath(common, cc, ccend, parent);
8234 while (cc < ccend)
8236 switch(*cc)
8238 case OP_SOD:
8239 case OP_SOM:
8240 case OP_NOT_WORD_BOUNDARY:
8241 case OP_WORD_BOUNDARY:
8242 case OP_NOT_DIGIT:
8243 case OP_DIGIT:
8244 case OP_NOT_WHITESPACE:
8245 case OP_WHITESPACE:
8246 case OP_NOT_WORDCHAR:
8247 case OP_WORDCHAR:
8248 case OP_ANY:
8249 case OP_ALLANY:
8250 case OP_ANYBYTE:
8251 case OP_NOTPROP:
8252 case OP_PROP:
8253 case OP_ANYNL:
8254 case OP_NOT_HSPACE:
8255 case OP_HSPACE:
8256 case OP_NOT_VSPACE:
8257 case OP_VSPACE:
8258 case OP_EXTUNI:
8259 case OP_EODN:
8260 case OP_EOD:
8261 case OP_CIRC:
8262 case OP_CIRCM:
8263 case OP_DOLL:
8264 case OP_DOLLM:
8265 case OP_NOT:
8266 case OP_NOTI:
8267 case OP_REVERSE:
8268 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
8269 break;
8271 case OP_SET_SOM:
8272 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
8273 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
8274 allocate_stack(common, 1);
8275 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), STR_PTR, 0);
8276 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
8277 cc++;
8278 break;
8280 case OP_CHAR:
8281 case OP_CHARI:
8282 if (common->mode == JIT_COMPILE)
8283 cc = compile_charn_matchingpath(common, cc, ccend, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
8284 else
8285 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
8286 break;
8288 case OP_STAR:
8289 case OP_MINSTAR:
8290 case OP_PLUS:
8291 case OP_MINPLUS:
8292 case OP_QUERY:
8293 case OP_MINQUERY:
8294 case OP_UPTO:
8295 case OP_MINUPTO:
8296 case OP_EXACT:
8297 case OP_POSSTAR:
8298 case OP_POSPLUS:
8299 case OP_POSQUERY:
8300 case OP_POSUPTO:
8301 case OP_STARI:
8302 case OP_MINSTARI:
8303 case OP_PLUSI:
8304 case OP_MINPLUSI:
8305 case OP_QUERYI:
8306 case OP_MINQUERYI:
8307 case OP_UPTOI:
8308 case OP_MINUPTOI:
8309 case OP_EXACTI:
8310 case OP_POSSTARI:
8311 case OP_POSPLUSI:
8312 case OP_POSQUERYI:
8313 case OP_POSUPTOI:
8314 case OP_NOTSTAR:
8315 case OP_NOTMINSTAR:
8316 case OP_NOTPLUS:
8317 case OP_NOTMINPLUS:
8318 case OP_NOTQUERY:
8319 case OP_NOTMINQUERY:
8320 case OP_NOTUPTO:
8321 case OP_NOTMINUPTO:
8322 case OP_NOTEXACT:
8323 case OP_NOTPOSSTAR:
8324 case OP_NOTPOSPLUS:
8325 case OP_NOTPOSQUERY:
8326 case OP_NOTPOSUPTO:
8327 case OP_NOTSTARI:
8328 case OP_NOTMINSTARI:
8329 case OP_NOTPLUSI:
8330 case OP_NOTMINPLUSI:
8331 case OP_NOTQUERYI:
8332 case OP_NOTMINQUERYI:
8333 case OP_NOTUPTOI:
8334 case OP_NOTMINUPTOI:
8335 case OP_NOTEXACTI:
8336 case OP_NOTPOSSTARI:
8337 case OP_NOTPOSPLUSI:
8338 case OP_NOTPOSQUERYI:
8339 case OP_NOTPOSUPTOI:
8340 case OP_TYPESTAR:
8341 case OP_TYPEMINSTAR:
8342 case OP_TYPEPLUS:
8343 case OP_TYPEMINPLUS:
8344 case OP_TYPEQUERY:
8345 case OP_TYPEMINQUERY:
8346 case OP_TYPEUPTO:
8347 case OP_TYPEMINUPTO:
8348 case OP_TYPEEXACT:
8349 case OP_TYPEPOSSTAR:
8350 case OP_TYPEPOSPLUS:
8351 case OP_TYPEPOSQUERY:
8352 case OP_TYPEPOSUPTO:
8353 cc = compile_iterator_matchingpath(common, cc, parent);
8354 break;
8356 case OP_CLASS:
8357 case OP_NCLASS:
8358 if (cc[1 + (32 / sizeof(pcre_uchar))] >= OP_CRSTAR && cc[1 + (32 / sizeof(pcre_uchar))] <= OP_CRPOSRANGE)
8359 cc = compile_iterator_matchingpath(common, cc, parent);
8360 else
8361 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
8362 break;
8364 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
8365 case OP_XCLASS:
8366 if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRPOSRANGE)
8367 cc = compile_iterator_matchingpath(common, cc, parent);
8368 else
8369 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
8370 break;
8371 #endif
8373 case OP_REF:
8374 case OP_REFI:
8375 if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRPOSRANGE)
8376 cc = compile_ref_iterator_matchingpath(common, cc, parent);
8377 else
8379 compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
8380 cc += 1 + IMM2_SIZE;
8382 break;
8384 case OP_DNREF:
8385 case OP_DNREFI:
8386 if (cc[1 + 2 * IMM2_SIZE] >= OP_CRSTAR && cc[1 + 2 * IMM2_SIZE] <= OP_CRPOSRANGE)
8387 cc = compile_ref_iterator_matchingpath(common, cc, parent);
8388 else
8390 compile_dnref_search(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
8391 compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
8392 cc += 1 + 2 * IMM2_SIZE;
8394 break;
8396 case OP_RECURSE:
8397 cc = compile_recurse_matchingpath(common, cc, parent);
8398 break;
8400 case OP_CALLOUT:
8401 cc = compile_callout_matchingpath(common, cc, parent);
8402 break;
8404 case OP_ASSERT:
8405 case OP_ASSERT_NOT:
8406 case OP_ASSERTBACK:
8407 case OP_ASSERTBACK_NOT:
8408 PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
8409 cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
8410 break;
8412 case OP_BRAMINZERO:
8413 PUSH_BACKTRACK_NOVALUE(sizeof(braminzero_backtrack), cc);
8414 cc = bracketend(cc + 1);
8415 if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN)
8417 allocate_stack(common, 1);
8418 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8420 else
8422 allocate_stack(common, 2);
8423 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
8424 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0);
8426 BACKTRACK_AS(braminzero_backtrack)->matchingpath = LABEL();
8427 if (cc[1] > OP_ASSERTBACK_NOT)
8428 count_match(common);
8429 break;
8431 case OP_ONCE:
8432 case OP_ONCE_NC:
8433 case OP_BRA:
8434 case OP_CBRA:
8435 case OP_COND:
8436 case OP_SBRA:
8437 case OP_SCBRA:
8438 case OP_SCOND:
8439 cc = compile_bracket_matchingpath(common, cc, parent);
8440 break;
8442 case OP_BRAZERO:
8443 if (cc[1] > OP_ASSERTBACK_NOT)
8444 cc = compile_bracket_matchingpath(common, cc, parent);
8445 else
8447 PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
8448 cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
8450 break;
8452 case OP_BRAPOS:
8453 case OP_CBRAPOS:
8454 case OP_SBRAPOS:
8455 case OP_SCBRAPOS:
8456 case OP_BRAPOSZERO:
8457 cc = compile_bracketpos_matchingpath(common, cc, parent);
8458 break;
8460 case OP_MARK:
8461 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
8462 SLJIT_ASSERT(common->mark_ptr != 0);
8463 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
8464 allocate_stack(common, common->has_skip_arg ? 5 : 1);
8465 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8466 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0), TMP2, 0);
8467 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
8468 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP2, 0);
8469 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
8470 if (common->has_skip_arg)
8472 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
8473 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, STACK_TOP, 0);
8474 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, type_mark);
8475 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), SLJIT_IMM, (sljit_sw)(cc + 2));
8476 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);
8477 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
8479 cc += 1 + 2 + cc[1];
8480 break;
8482 case OP_PRUNE:
8483 case OP_PRUNE_ARG:
8484 case OP_SKIP:
8485 case OP_SKIP_ARG:
8486 case OP_THEN:
8487 case OP_THEN_ARG:
8488 case OP_COMMIT:
8489 cc = compile_control_verb_matchingpath(common, cc, parent);
8490 break;
8492 case OP_FAIL:
8493 case OP_ACCEPT:
8494 case OP_ASSERT_ACCEPT:
8495 cc = compile_fail_accept_matchingpath(common, cc, parent);
8496 break;
8498 case OP_CLOSE:
8499 cc = compile_close_matchingpath(common, cc);
8500 break;
8502 case OP_SKIPZERO:
8503 cc = bracketend(cc + 1);
8504 break;
8506 default:
8507 SLJIT_ASSERT_STOP();
8508 return;
8510 if (cc == NULL)
8511 return;
8514 if (has_then_trap)
8516 /* Head item on backtrack. */
8517 PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
8518 BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
8519 BACKTRACK_AS(then_trap_backtrack)->then_trap = common->then_trap;
8520 common->then_trap = save_then_trap;
8522 SLJIT_ASSERT(cc == ccend);
8525 #undef PUSH_BACKTRACK
8526 #undef PUSH_BACKTRACK_NOVALUE
8527 #undef BACKTRACK_AS
8529 #define COMPILE_BACKTRACKINGPATH(current) \
8530 do \
8532 compile_backtrackingpath(common, (current)); \
8533 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
8534 return; \
8536 while (0)
8538 #define CURRENT_AS(type) ((type *)current)
8540 static void compile_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
8542 DEFINE_COMPILER;
8543 pcre_uchar *cc = current->cc;
8544 pcre_uchar opcode;
8545 pcre_uchar type;
8546 int max = -1, min = -1;
8547 struct sljit_label *label = NULL;
8548 struct sljit_jump *jump = NULL;
8549 jump_list *jumplist = NULL;
8550 int private_data_ptr = PRIVATE_DATA(cc);
8551 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_LOCALS_REG);
8552 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
8553 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
8555 cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &min, NULL);
8557 switch(opcode)
8559 case OP_STAR:
8560 case OP_PLUS:
8561 case OP_UPTO:
8562 case OP_CRRANGE:
8563 if (type == OP_ANYNL || type == OP_EXTUNI)
8565 SLJIT_ASSERT(private_data_ptr == 0);
8566 set_jumps(current->topbacktracks, LABEL());
8567 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8568 free_stack(common, 1);
8569 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->matchingpath);
8571 else
8573 if (opcode == OP_UPTO)
8574 min = 0;
8575 if (opcode <= OP_PLUS)
8577 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
8578 jump = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, base, offset1);
8580 else
8582 OP1(SLJIT_MOV, TMP1, 0, base, offset1);
8583 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
8584 jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, min + 1);
8585 OP2(SLJIT_SUB, base, offset1, TMP1, 0, SLJIT_IMM, 1);
8587 skip_char_back(common);
8588 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
8589 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath);
8590 if (opcode == OP_CRRANGE)
8591 set_jumps(current->topbacktracks, LABEL());
8592 JUMPHERE(jump);
8593 if (private_data_ptr == 0)
8594 free_stack(common, 2);
8595 if (opcode == OP_PLUS)
8596 set_jumps(current->topbacktracks, LABEL());
8598 break;
8600 case OP_MINSTAR:
8601 case OP_MINPLUS:
8602 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
8603 compile_char1_matchingpath(common, type, cc, &jumplist);
8604 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
8605 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath);
8606 set_jumps(jumplist, LABEL());
8607 if (private_data_ptr == 0)
8608 free_stack(common, 1);
8609 if (opcode == OP_MINPLUS)
8610 set_jumps(current->topbacktracks, LABEL());
8611 break;
8613 case OP_MINUPTO:
8614 case OP_CRMINRANGE:
8615 if (opcode == OP_CRMINRANGE)
8617 label = LABEL();
8618 set_jumps(current->topbacktracks, label);
8620 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
8621 compile_char1_matchingpath(common, type, cc, &jumplist);
8623 OP1(SLJIT_MOV, TMP1, 0, base, offset1);
8624 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
8625 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
8626 OP1(SLJIT_MOV, base, offset1, TMP1, 0);
8628 if (opcode == OP_CRMINRANGE)
8629 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min + 1, label);
8631 if (opcode == OP_CRMINRANGE && max == 0)
8632 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath);
8633 else
8634 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, max + 2, CURRENT_AS(iterator_backtrack)->matchingpath);
8636 set_jumps(jumplist, LABEL());
8637 if (private_data_ptr == 0)
8638 free_stack(common, 2);
8639 break;
8641 case OP_QUERY:
8642 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
8643 OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
8644 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->matchingpath);
8645 jump = JUMP(SLJIT_JUMP);
8646 set_jumps(current->topbacktracks, LABEL());
8647 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
8648 OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
8649 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath);
8650 JUMPHERE(jump);
8651 if (private_data_ptr == 0)
8652 free_stack(common, 1);
8653 break;
8655 case OP_MINQUERY:
8656 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
8657 OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
8658 jump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
8659 compile_char1_matchingpath(common, type, cc, &jumplist);
8660 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath);
8661 set_jumps(jumplist, LABEL());
8662 JUMPHERE(jump);
8663 if (private_data_ptr == 0)
8664 free_stack(common, 1);
8665 break;
8667 case OP_EXACT:
8668 case OP_POSPLUS:
8669 case OP_CRPOSRANGE:
8670 set_jumps(current->topbacktracks, LABEL());
8671 break;
8673 case OP_POSSTAR:
8674 case OP_POSQUERY:
8675 case OP_POSUPTO:
8676 break;
8678 default:
8679 SLJIT_ASSERT_STOP();
8680 break;
8684 static SLJIT_INLINE void compile_ref_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
8686 DEFINE_COMPILER;
8687 pcre_uchar *cc = current->cc;
8688 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
8689 pcre_uchar type;
8691 type = cc[ref ? 1 + IMM2_SIZE : 1 + 2 * IMM2_SIZE];
8693 if ((type & 0x1) == 0)
8695 /* Maximize case. */
8696 set_jumps(current->topbacktracks, LABEL());
8697 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8698 free_stack(common, 1);
8699 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->matchingpath);
8700 return;
8703 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8704 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->matchingpath);
8705 set_jumps(current->topbacktracks, LABEL());
8706 free_stack(common, ref ? 2 : 3);
8709 static SLJIT_INLINE void compile_recurse_backtrackingpath(compiler_common *common, struct backtrack_common *current)
8711 DEFINE_COMPILER;
8713 if (CURRENT_AS(recurse_backtrack)->inlined_pattern)
8714 compile_backtrackingpath(common, current->top);
8715 set_jumps(current->topbacktracks, LABEL());
8716 if (CURRENT_AS(recurse_backtrack)->inlined_pattern)
8717 return;
8719 if (common->has_set_som && common->mark_ptr != 0)
8721 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8722 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
8723 free_stack(common, 2);
8724 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP2, 0);
8725 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP1, 0);
8727 else if (common->has_set_som || common->mark_ptr != 0)
8729 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8730 free_stack(common, 1);
8731 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr, TMP2, 0);
8735 static void compile_assert_backtrackingpath(compiler_common *common, struct backtrack_common *current)
8737 DEFINE_COMPILER;
8738 pcre_uchar *cc = current->cc;
8739 pcre_uchar bra = OP_BRA;
8740 struct sljit_jump *brajump = NULL;
8742 SLJIT_ASSERT(*cc != OP_BRAMINZERO);
8743 if (*cc == OP_BRAZERO)
8745 bra = *cc;
8746 cc++;
8749 if (bra == OP_BRAZERO)
8751 SLJIT_ASSERT(current->topbacktracks == NULL);
8752 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8755 if (CURRENT_AS(assert_backtrack)->framesize < 0)
8757 set_jumps(current->topbacktracks, LABEL());
8759 if (bra == OP_BRAZERO)
8761 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
8762 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
8763 free_stack(common, 1);
8765 return;
8768 if (bra == OP_BRAZERO)
8770 if (*cc == OP_ASSERT_NOT || *cc == OP_ASSERTBACK_NOT)
8772 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
8773 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
8774 free_stack(common, 1);
8775 return;
8777 free_stack(common, 1);
8778 brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
8781 if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK)
8783 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(assert_backtrack)->private_data_ptr);
8784 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
8785 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(assert_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(assert_backtrack)->framesize * sizeof(sljit_sw));
8787 set_jumps(current->topbacktracks, LABEL());
8789 else
8790 set_jumps(current->topbacktracks, LABEL());
8792 if (bra == OP_BRAZERO)
8794 /* We know there is enough place on the stack. */
8795 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
8796 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
8797 JUMPTO(SLJIT_JUMP, CURRENT_AS(assert_backtrack)->matchingpath);
8798 JUMPHERE(brajump);
8802 static void compile_bracket_backtrackingpath(compiler_common *common, struct backtrack_common *current)
8804 DEFINE_COMPILER;
8805 int opcode, stacksize, alt_count, alt_max;
8806 int offset = 0;
8807 int private_data_ptr = CURRENT_AS(bracket_backtrack)->private_data_ptr;
8808 int repeat_ptr = 0, repeat_type = 0, repeat_count = 0;
8809 pcre_uchar *cc = current->cc;
8810 pcre_uchar *ccbegin;
8811 pcre_uchar *ccprev;
8812 pcre_uchar bra = OP_BRA;
8813 pcre_uchar ket;
8814 assert_backtrack *assert;
8815 BOOL has_alternatives;
8816 BOOL needs_control_head = FALSE;
8817 struct sljit_jump *brazero = NULL;
8818 struct sljit_jump *alt1 = NULL;
8819 struct sljit_jump *alt2 = NULL;
8820 struct sljit_jump *once = NULL;
8821 struct sljit_jump *cond = NULL;
8822 struct sljit_label *rmin_label = NULL;
8823 struct sljit_label *exact_label = NULL;
8825 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
8827 bra = *cc;
8828 cc++;
8831 opcode = *cc;
8832 ccbegin = bracketend(cc) - 1 - LINK_SIZE;
8833 ket = *ccbegin;
8834 if (ket == OP_KET && PRIVATE_DATA(ccbegin) != 0)
8836 repeat_ptr = PRIVATE_DATA(ccbegin);
8837 repeat_type = PRIVATE_DATA(ccbegin + 2);
8838 repeat_count = PRIVATE_DATA(ccbegin + 3);
8839 SLJIT_ASSERT(repeat_type != 0 && repeat_count != 0);
8840 if (repeat_type == OP_UPTO)
8841 ket = OP_KETRMAX;
8842 if (repeat_type == OP_MINUPTO)
8843 ket = OP_KETRMIN;
8845 ccbegin = cc;
8846 cc += GET(cc, 1);
8847 has_alternatives = *cc == OP_ALT;
8848 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
8849 has_alternatives = (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) || CURRENT_AS(bracket_backtrack)->u.condfailed != NULL;
8850 if (opcode == OP_CBRA || opcode == OP_SCBRA)
8851 offset = (GET2(ccbegin, 1 + LINK_SIZE)) << 1;
8852 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
8853 opcode = OP_SCOND;
8854 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
8855 opcode = OP_ONCE;
8857 alt_max = has_alternatives ? no_alternatives(ccbegin) : 0;
8859 /* Decoding the needs_control_head in framesize. */
8860 if (opcode == OP_ONCE)
8862 needs_control_head = (CURRENT_AS(bracket_backtrack)->u.framesize & 0x1) != 0;
8863 CURRENT_AS(bracket_backtrack)->u.framesize >>= 1;
8866 if (ket != OP_KET && repeat_type != 0)
8868 /* TMP1 is used in OP_KETRMIN below. */
8869 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8870 free_stack(common, 1);
8871 if (repeat_type == OP_UPTO)
8872 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, TMP1, 0, SLJIT_IMM, 1);
8873 else
8874 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, TMP1, 0);
8877 if (ket == OP_KETRMAX)
8879 if (bra == OP_BRAZERO)
8881 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8882 free_stack(common, 1);
8883 brazero = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0);
8886 else if (ket == OP_KETRMIN)
8888 if (bra != OP_BRAMINZERO)
8890 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8891 if (repeat_type != 0)
8893 /* TMP1 was set a few lines above. */
8894 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
8895 /* Drop STR_PTR for non-greedy plus quantifier. */
8896 if (opcode != OP_ONCE)
8897 free_stack(common, 1);
8899 else if (opcode >= OP_SBRA || opcode == OP_ONCE)
8901 /* Checking zero-length iteration. */
8902 if (opcode != OP_ONCE || CURRENT_AS(bracket_backtrack)->u.framesize < 0)
8903 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
8904 else
8906 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
8907 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (CURRENT_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw), CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
8909 /* Drop STR_PTR for non-greedy plus quantifier. */
8910 if (opcode != OP_ONCE)
8911 free_stack(common, 1);
8913 else
8914 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
8916 rmin_label = LABEL();
8917 if (repeat_type != 0)
8918 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_IMM, 1);
8920 else if (bra == OP_BRAZERO)
8922 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8923 free_stack(common, 1);
8924 brazero = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
8926 else if (repeat_type == OP_EXACT)
8928 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_IMM, 1);
8929 exact_label = LABEL();
8932 if (offset != 0)
8934 if (common->capture_last_ptr != 0)
8936 SLJIT_ASSERT(common->optimized_cbracket[offset >> 1] == 0);
8937 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8938 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
8939 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, TMP1, 0);
8940 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
8941 free_stack(common, 3);
8942 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP2, 0);
8943 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
8945 else if (common->optimized_cbracket[offset >> 1] == 0)
8947 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8948 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
8949 free_stack(common, 2);
8950 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
8951 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP2, 0);
8955 if (SLJIT_UNLIKELY(opcode == OP_ONCE))
8957 if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
8959 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
8960 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
8962 once = JUMP(SLJIT_JUMP);
8964 else if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
8966 if (has_alternatives)
8968 /* Always exactly one alternative. */
8969 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8970 free_stack(common, 1);
8972 alt_max = 2;
8973 alt1 = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, sizeof(sljit_uw));
8976 else if (has_alternatives)
8978 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8979 free_stack(common, 1);
8981 if (alt_max > 4)
8983 /* Table jump if alt_max is greater than 4. */
8984 sljit_emit_ijump(compiler, SLJIT_JUMP, SLJIT_MEM1(TMP1), (sljit_sw)common->read_only_data_ptr);
8985 add_label_addr(common);
8987 else
8989 if (alt_max == 4)
8990 alt2 = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw));
8991 alt1 = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, sizeof(sljit_uw));
8995 COMPILE_BACKTRACKINGPATH(current->top);
8996 if (current->topbacktracks)
8997 set_jumps(current->topbacktracks, LABEL());
8999 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
9001 /* Conditional block always has at most one alternative. */
9002 if (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT)
9004 SLJIT_ASSERT(has_alternatives);
9005 assert = CURRENT_AS(bracket_backtrack)->u.assert;
9006 if (assert->framesize >= 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK))
9008 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->private_data_ptr);
9009 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9010 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->private_data_ptr, SLJIT_MEM1(STACK_TOP), assert->framesize * sizeof(sljit_sw));
9012 cond = JUMP(SLJIT_JUMP);
9013 set_jumps(CURRENT_AS(bracket_backtrack)->u.assert->condfailed, LABEL());
9015 else if (CURRENT_AS(bracket_backtrack)->u.condfailed != NULL)
9017 SLJIT_ASSERT(has_alternatives);
9018 cond = JUMP(SLJIT_JUMP);
9019 set_jumps(CURRENT_AS(bracket_backtrack)->u.condfailed, LABEL());
9021 else
9022 SLJIT_ASSERT(!has_alternatives);
9025 if (has_alternatives)
9027 alt_count = sizeof(sljit_uw);
9030 current->top = NULL;
9031 current->topbacktracks = NULL;
9032 current->nextbacktracks = NULL;
9033 /* Conditional blocks always have an additional alternative, even if it is empty. */
9034 if (*cc == OP_ALT)
9036 ccprev = cc + 1 + LINK_SIZE;
9037 cc += GET(cc, 1);
9038 if (opcode != OP_COND && opcode != OP_SCOND)
9040 if (opcode != OP_ONCE)
9042 if (private_data_ptr != 0)
9043 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
9044 else
9045 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9047 else
9048 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(needs_control_head ? 1 : 0));
9050 compile_matchingpath(common, ccprev, cc, current);
9051 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9052 return;
9055 /* Instructions after the current alternative is successfully matched. */
9056 /* There is a similar code in compile_bracket_matchingpath. */
9057 if (opcode == OP_ONCE)
9058 match_once_common(common, ket, CURRENT_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
9060 stacksize = 0;
9061 if (repeat_type == OP_MINUPTO)
9063 /* We need to preserve the counter. TMP2 will be used below. */
9064 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr);
9065 stacksize++;
9067 if (ket != OP_KET || bra != OP_BRA)
9068 stacksize++;
9069 if (offset != 0)
9071 if (common->capture_last_ptr != 0)
9072 stacksize++;
9073 if (common->optimized_cbracket[offset >> 1] == 0)
9074 stacksize += 2;
9076 if (opcode != OP_ONCE)
9077 stacksize++;
9079 if (stacksize > 0)
9080 allocate_stack(common, stacksize);
9082 stacksize = 0;
9083 if (repeat_type == OP_MINUPTO)
9085 /* TMP2 was set above. */
9086 OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
9087 stacksize++;
9090 if (ket != OP_KET || bra != OP_BRA)
9092 if (ket != OP_KET)
9093 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
9094 else
9095 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
9096 stacksize++;
9099 if (offset != 0)
9100 stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
9102 if (opcode != OP_ONCE)
9103 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, alt_count);
9105 if (offset != 0 && ket == OP_KETRMAX && common->optimized_cbracket[offset >> 1] != 0)
9107 /* If ket is not OP_KETRMAX, this code path is executed after the jump to alternative_matchingpath. */
9108 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
9109 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
9112 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->alternative_matchingpath);
9114 if (opcode != OP_ONCE)
9116 if (alt_max > 4)
9117 add_label_addr(common);
9118 else
9120 if (alt_count != 2 * sizeof(sljit_uw))
9122 JUMPHERE(alt1);
9123 if (alt_max == 3 && alt_count == sizeof(sljit_uw))
9124 alt2 = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw));
9126 else
9128 JUMPHERE(alt2);
9129 if (alt_max == 4)
9130 alt1 = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_uw));
9133 alt_count += sizeof(sljit_uw);
9136 COMPILE_BACKTRACKINGPATH(current->top);
9137 if (current->topbacktracks)
9138 set_jumps(current->topbacktracks, LABEL());
9139 SLJIT_ASSERT(!current->nextbacktracks);
9141 while (*cc == OP_ALT);
9143 if (cond != NULL)
9145 SLJIT_ASSERT(opcode == OP_COND || opcode == OP_SCOND);
9146 assert = CURRENT_AS(bracket_backtrack)->u.assert;
9147 if ((ccbegin[1 + LINK_SIZE] == OP_ASSERT_NOT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK_NOT) && assert->framesize >= 0)
9149 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->private_data_ptr);
9150 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9151 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->private_data_ptr, SLJIT_MEM1(STACK_TOP), assert->framesize * sizeof(sljit_sw));
9153 JUMPHERE(cond);
9156 /* Free the STR_PTR. */
9157 if (private_data_ptr == 0)
9158 free_stack(common, 1);
9161 if (offset != 0)
9163 /* Using both tmp register is better for instruction scheduling. */
9164 if (common->optimized_cbracket[offset >> 1] != 0)
9166 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9167 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
9168 free_stack(common, 2);
9169 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
9170 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP2, 0);
9172 else
9174 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9175 free_stack(common, 1);
9176 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
9179 else if (opcode == OP_SBRA || opcode == OP_SCOND)
9181 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(0));
9182 free_stack(common, 1);
9184 else if (opcode == OP_ONCE)
9186 cc = ccbegin + GET(ccbegin, 1);
9187 stacksize = needs_control_head ? 1 : 0;
9189 if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
9191 /* Reset head and drop saved frame. */
9192 stacksize += CURRENT_AS(bracket_backtrack)->u.framesize + ((ket != OP_KET || *cc == OP_ALT) ? 2 : 1);
9194 else if (ket == OP_KETRMAX || (*cc == OP_ALT && ket != OP_KETRMIN))
9196 /* The STR_PTR must be released. */
9197 stacksize++;
9199 free_stack(common, stacksize);
9201 JUMPHERE(once);
9202 /* Restore previous private_data_ptr */
9203 if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
9204 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(bracket_backtrack)->u.framesize * sizeof(sljit_sw));
9205 else if (ket == OP_KETRMIN)
9207 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
9208 /* See the comment below. */
9209 free_stack(common, 2);
9210 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
9214 if (repeat_type == OP_EXACT)
9216 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_IMM, 1);
9217 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, TMP1, 0);
9218 CMPTO(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, repeat_count, exact_label);
9220 else if (ket == OP_KETRMAX)
9222 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9223 if (bra != OP_BRAZERO)
9224 free_stack(common, 1);
9226 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
9227 if (bra == OP_BRAZERO)
9229 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
9230 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
9231 JUMPHERE(brazero);
9232 free_stack(common, 1);
9235 else if (ket == OP_KETRMIN)
9237 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9239 /* OP_ONCE removes everything in case of a backtrack, so we don't
9240 need to explicitly release the STR_PTR. The extra release would
9241 affect badly the free_stack(2) above. */
9242 if (opcode != OP_ONCE)
9243 free_stack(common, 1);
9244 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, rmin_label);
9245 if (opcode == OP_ONCE)
9246 free_stack(common, bra == OP_BRAMINZERO ? 2 : 1);
9247 else if (bra == OP_BRAMINZERO)
9248 free_stack(common, 1);
9250 else if (bra == OP_BRAZERO)
9252 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9253 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
9254 JUMPHERE(brazero);
9258 static SLJIT_INLINE void compile_bracketpos_backtrackingpath(compiler_common *common, struct backtrack_common *current)
9260 DEFINE_COMPILER;
9261 int offset;
9262 struct sljit_jump *jump;
9264 if (CURRENT_AS(bracketpos_backtrack)->framesize < 0)
9266 if (*current->cc == OP_CBRAPOS || *current->cc == OP_SCBRAPOS)
9268 offset = (GET2(current->cc, 1 + LINK_SIZE)) << 1;
9269 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9270 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
9271 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
9272 if (common->capture_last_ptr != 0)
9273 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
9274 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP2, 0);
9275 if (common->capture_last_ptr != 0)
9276 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, TMP1, 0);
9278 set_jumps(current->topbacktracks, LABEL());
9279 free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
9280 return;
9283 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(bracketpos_backtrack)->private_data_ptr);
9284 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9286 if (current->topbacktracks)
9288 jump = JUMP(SLJIT_JUMP);
9289 set_jumps(current->topbacktracks, LABEL());
9290 /* Drop the stack frame. */
9291 free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
9292 JUMPHERE(jump);
9294 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(bracketpos_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(bracketpos_backtrack)->framesize * sizeof(sljit_sw));
9297 static SLJIT_INLINE void compile_braminzero_backtrackingpath(compiler_common *common, struct backtrack_common *current)
9299 assert_backtrack backtrack;
9301 current->top = NULL;
9302 current->topbacktracks = NULL;
9303 current->nextbacktracks = NULL;
9304 if (current->cc[1] > OP_ASSERTBACK_NOT)
9306 /* Manual call of compile_bracket_matchingpath and compile_bracket_backtrackingpath. */
9307 compile_bracket_matchingpath(common, current->cc, current);
9308 compile_bracket_backtrackingpath(common, current->top);
9310 else
9312 memset(&backtrack, 0, sizeof(backtrack));
9313 backtrack.common.cc = current->cc;
9314 backtrack.matchingpath = CURRENT_AS(braminzero_backtrack)->matchingpath;
9315 /* Manual call of compile_assert_matchingpath. */
9316 compile_assert_matchingpath(common, current->cc, &backtrack, FALSE);
9318 SLJIT_ASSERT(!current->nextbacktracks && !current->topbacktracks);
9321 static SLJIT_INLINE void compile_control_verb_backtrackingpath(compiler_common *common, struct backtrack_common *current)
9323 DEFINE_COMPILER;
9324 pcre_uchar opcode = *current->cc;
9325 struct sljit_label *loop;
9326 struct sljit_jump *jump;
9328 if (opcode == OP_THEN || opcode == OP_THEN_ARG)
9330 if (common->then_trap != NULL)
9332 SLJIT_ASSERT(common->control_head_ptr != 0);
9334 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
9335 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, type_then_trap);
9336 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, common->then_trap->start);
9337 jump = JUMP(SLJIT_JUMP);
9339 loop = LABEL();
9340 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), -(int)sizeof(sljit_sw));
9341 JUMPHERE(jump);
9342 CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), -(int)(2 * sizeof(sljit_sw)), TMP1, 0, loop);
9343 CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), -(int)(3 * sizeof(sljit_sw)), TMP2, 0, loop);
9344 add_jump(compiler, &common->then_trap->quit, JUMP(SLJIT_JUMP));
9345 return;
9347 else if (common->positive_assert)
9349 add_jump(compiler, &common->positive_assert_quit, JUMP(SLJIT_JUMP));
9350 return;
9354 if (common->local_exit)
9356 if (common->quit_label == NULL)
9357 add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
9358 else
9359 JUMPTO(SLJIT_JUMP, common->quit_label);
9360 return;
9363 if (opcode == OP_SKIP_ARG)
9365 SLJIT_ASSERT(common->control_head_ptr != 0);
9366 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
9367 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
9368 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, (sljit_sw)(current->cc + 2));
9369 sljit_emit_ijump(compiler, SLJIT_CALL2, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_search_mark));
9370 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
9372 OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
9373 add_jump(compiler, &common->reset_match, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, -1));
9374 return;
9377 if (opcode == OP_SKIP)
9378 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9379 else
9380 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_IMM, 0);
9381 add_jump(compiler, &common->reset_match, JUMP(SLJIT_JUMP));
9384 static SLJIT_INLINE void compile_then_trap_backtrackingpath(compiler_common *common, struct backtrack_common *current)
9386 DEFINE_COMPILER;
9387 struct sljit_jump *jump;
9388 int size;
9390 if (CURRENT_AS(then_trap_backtrack)->then_trap)
9392 common->then_trap = CURRENT_AS(then_trap_backtrack)->then_trap;
9393 return;
9396 size = CURRENT_AS(then_trap_backtrack)->framesize;
9397 size = 3 + (size < 0 ? 0 : size);
9399 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(size - 3));
9400 free_stack(common, size);
9401 jump = JUMP(SLJIT_JUMP);
9403 set_jumps(CURRENT_AS(then_trap_backtrack)->quit, LABEL());
9404 /* STACK_TOP is set by THEN. */
9405 if (CURRENT_AS(then_trap_backtrack)->framesize >= 0)
9406 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9407 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9408 free_stack(common, 3);
9410 JUMPHERE(jump);
9411 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, TMP1, 0);
9414 static void compile_backtrackingpath(compiler_common *common, struct backtrack_common *current)
9416 DEFINE_COMPILER;
9417 then_trap_backtrack *save_then_trap = common->then_trap;
9419 while (current)
9421 if (current->nextbacktracks != NULL)
9422 set_jumps(current->nextbacktracks, LABEL());
9423 switch(*current->cc)
9425 case OP_SET_SOM:
9426 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9427 free_stack(common, 1);
9428 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP1, 0);
9429 break;
9431 case OP_STAR:
9432 case OP_MINSTAR:
9433 case OP_PLUS:
9434 case OP_MINPLUS:
9435 case OP_QUERY:
9436 case OP_MINQUERY:
9437 case OP_UPTO:
9438 case OP_MINUPTO:
9439 case OP_EXACT:
9440 case OP_POSSTAR:
9441 case OP_POSPLUS:
9442 case OP_POSQUERY:
9443 case OP_POSUPTO:
9444 case OP_STARI:
9445 case OP_MINSTARI:
9446 case OP_PLUSI:
9447 case OP_MINPLUSI:
9448 case OP_QUERYI:
9449 case OP_MINQUERYI:
9450 case OP_UPTOI:
9451 case OP_MINUPTOI:
9452 case OP_EXACTI:
9453 case OP_POSSTARI:
9454 case OP_POSPLUSI:
9455 case OP_POSQUERYI:
9456 case OP_POSUPTOI:
9457 case OP_NOTSTAR:
9458 case OP_NOTMINSTAR:
9459 case OP_NOTPLUS:
9460 case OP_NOTMINPLUS:
9461 case OP_NOTQUERY:
9462 case OP_NOTMINQUERY:
9463 case OP_NOTUPTO:
9464 case OP_NOTMINUPTO:
9465 case OP_NOTEXACT:
9466 case OP_NOTPOSSTAR:
9467 case OP_NOTPOSPLUS:
9468 case OP_NOTPOSQUERY:
9469 case OP_NOTPOSUPTO:
9470 case OP_NOTSTARI:
9471 case OP_NOTMINSTARI:
9472 case OP_NOTPLUSI:
9473 case OP_NOTMINPLUSI:
9474 case OP_NOTQUERYI:
9475 case OP_NOTMINQUERYI:
9476 case OP_NOTUPTOI:
9477 case OP_NOTMINUPTOI:
9478 case OP_NOTEXACTI:
9479 case OP_NOTPOSSTARI:
9480 case OP_NOTPOSPLUSI:
9481 case OP_NOTPOSQUERYI:
9482 case OP_NOTPOSUPTOI:
9483 case OP_TYPESTAR:
9484 case OP_TYPEMINSTAR:
9485 case OP_TYPEPLUS:
9486 case OP_TYPEMINPLUS:
9487 case OP_TYPEQUERY:
9488 case OP_TYPEMINQUERY:
9489 case OP_TYPEUPTO:
9490 case OP_TYPEMINUPTO:
9491 case OP_TYPEEXACT:
9492 case OP_TYPEPOSSTAR:
9493 case OP_TYPEPOSPLUS:
9494 case OP_TYPEPOSQUERY:
9495 case OP_TYPEPOSUPTO:
9496 case OP_CLASS:
9497 case OP_NCLASS:
9498 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
9499 case OP_XCLASS:
9500 #endif
9501 compile_iterator_backtrackingpath(common, current);
9502 break;
9504 case OP_REF:
9505 case OP_REFI:
9506 case OP_DNREF:
9507 case OP_DNREFI:
9508 compile_ref_iterator_backtrackingpath(common, current);
9509 break;
9511 case OP_RECURSE:
9512 compile_recurse_backtrackingpath(common, current);
9513 break;
9515 case OP_ASSERT:
9516 case OP_ASSERT_NOT:
9517 case OP_ASSERTBACK:
9518 case OP_ASSERTBACK_NOT:
9519 compile_assert_backtrackingpath(common, current);
9520 break;
9522 case OP_ONCE:
9523 case OP_ONCE_NC:
9524 case OP_BRA:
9525 case OP_CBRA:
9526 case OP_COND:
9527 case OP_SBRA:
9528 case OP_SCBRA:
9529 case OP_SCOND:
9530 compile_bracket_backtrackingpath(common, current);
9531 break;
9533 case OP_BRAZERO:
9534 if (current->cc[1] > OP_ASSERTBACK_NOT)
9535 compile_bracket_backtrackingpath(common, current);
9536 else
9537 compile_assert_backtrackingpath(common, current);
9538 break;
9540 case OP_BRAPOS:
9541 case OP_CBRAPOS:
9542 case OP_SBRAPOS:
9543 case OP_SCBRAPOS:
9544 case OP_BRAPOSZERO:
9545 compile_bracketpos_backtrackingpath(common, current);
9546 break;
9548 case OP_BRAMINZERO:
9549 compile_braminzero_backtrackingpath(common, current);
9550 break;
9552 case OP_MARK:
9553 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0));
9554 if (common->has_skip_arg)
9555 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9556 free_stack(common, common->has_skip_arg ? 5 : 1);
9557 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP1, 0);
9558 if (common->has_skip_arg)
9559 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, TMP2, 0);
9560 break;
9562 case OP_THEN:
9563 case OP_THEN_ARG:
9564 case OP_PRUNE:
9565 case OP_PRUNE_ARG:
9566 case OP_SKIP:
9567 case OP_SKIP_ARG:
9568 compile_control_verb_backtrackingpath(common, current);
9569 break;
9571 case OP_COMMIT:
9572 if (!common->local_exit)
9573 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
9574 if (common->quit_label == NULL)
9575 add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
9576 else
9577 JUMPTO(SLJIT_JUMP, common->quit_label);
9578 break;
9580 case OP_CALLOUT:
9581 case OP_FAIL:
9582 case OP_ACCEPT:
9583 case OP_ASSERT_ACCEPT:
9584 set_jumps(current->topbacktracks, LABEL());
9585 break;
9587 case OP_THEN_TRAP:
9588 /* A virtual opcode for then traps. */
9589 compile_then_trap_backtrackingpath(common, current);
9590 break;
9592 default:
9593 SLJIT_ASSERT_STOP();
9594 break;
9596 current = current->prev;
9598 common->then_trap = save_then_trap;
9601 static SLJIT_INLINE void compile_recurse(compiler_common *common)
9603 DEFINE_COMPILER;
9604 pcre_uchar *cc = common->start + common->currententry->start;
9605 pcre_uchar *ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE);
9606 pcre_uchar *ccend = bracketend(cc);
9607 BOOL needs_control_head;
9608 int framesize = get_framesize(common, cc, NULL, TRUE, &needs_control_head);
9609 int private_data_size = get_private_data_copy_length(common, ccbegin, ccend, needs_control_head);
9610 int alternativesize;
9611 BOOL needs_frame;
9612 backtrack_common altbacktrack;
9613 struct sljit_jump *jump;
9615 /* Recurse captures then. */
9616 common->then_trap = NULL;
9618 SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA || *cc == OP_CBRAPOS || *cc == OP_SCBRA || *cc == OP_SCBRAPOS);
9619 needs_frame = framesize >= 0;
9620 if (!needs_frame)
9621 framesize = 0;
9622 alternativesize = *(cc + GET(cc, 1)) == OP_ALT ? 1 : 0;
9624 SLJIT_ASSERT(common->currententry->entry == NULL && common->recursive_head_ptr != 0);
9625 common->currententry->entry = LABEL();
9626 set_jumps(common->currententry->calls, common->currententry->entry);
9628 sljit_emit_fast_enter(compiler, TMP2, 0);
9629 allocate_stack(common, private_data_size + framesize + alternativesize);
9630 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(private_data_size + framesize + alternativesize - 1), TMP2, 0);
9631 copy_private_data(common, ccbegin, ccend, TRUE, private_data_size + framesize + alternativesize, framesize + alternativesize, needs_control_head);
9632 if (needs_control_head)
9633 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
9634 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->recursive_head_ptr, STACK_TOP, 0);
9635 if (needs_frame)
9636 init_frame(common, cc, NULL, framesize + alternativesize - 1, alternativesize, TRUE);
9638 if (alternativesize > 0)
9639 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9641 memset(&altbacktrack, 0, sizeof(backtrack_common));
9642 common->quit_label = NULL;
9643 common->accept_label = NULL;
9644 common->quit = NULL;
9645 common->accept = NULL;
9646 altbacktrack.cc = ccbegin;
9647 cc += GET(cc, 1);
9648 while (1)
9650 altbacktrack.top = NULL;
9651 altbacktrack.topbacktracks = NULL;
9653 if (altbacktrack.cc != ccbegin)
9654 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9656 compile_matchingpath(common, altbacktrack.cc, cc, &altbacktrack);
9657 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9658 return;
9660 add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
9662 compile_backtrackingpath(common, altbacktrack.top);
9663 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9664 return;
9665 set_jumps(altbacktrack.topbacktracks, LABEL());
9667 if (*cc != OP_ALT)
9668 break;
9670 altbacktrack.cc = cc + 1 + LINK_SIZE;
9671 cc += GET(cc, 1);
9674 /* None of them matched. */
9675 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
9676 jump = JUMP(SLJIT_JUMP);
9678 if (common->quit != NULL)
9680 set_jumps(common->quit, LABEL());
9681 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->recursive_head_ptr);
9682 if (needs_frame)
9684 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
9685 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9686 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
9688 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
9689 common->quit = NULL;
9690 add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
9693 set_jumps(common->accept, LABEL());
9694 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->recursive_head_ptr);
9695 if (needs_frame)
9697 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
9698 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9699 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
9701 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 1);
9703 JUMPHERE(jump);
9704 if (common->quit != NULL)
9705 set_jumps(common->quit, LABEL());
9706 copy_private_data(common, ccbegin, ccend, FALSE, private_data_size + framesize + alternativesize, framesize + alternativesize, needs_control_head);
9707 free_stack(common, private_data_size + framesize + alternativesize);
9708 if (needs_control_head)
9710 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), 2 * sizeof(sljit_sw));
9711 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), sizeof(sljit_sw));
9712 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->recursive_head_ptr, TMP1, 0);
9713 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
9714 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, TMP2, 0);
9716 else
9718 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), sizeof(sljit_sw));
9719 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
9720 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->recursive_head_ptr, TMP2, 0);
9722 sljit_emit_fast_return(compiler, SLJIT_MEM1(STACK_TOP), 0);
9725 #undef COMPILE_BACKTRACKINGPATH
9726 #undef CURRENT_AS
9728 void
9729 PRIV(jit_compile)(const REAL_PCRE *re, PUBL(extra) *extra, int mode)
9731 struct sljit_compiler *compiler;
9732 backtrack_common rootbacktrack;
9733 compiler_common common_data;
9734 compiler_common *common = &common_data;
9735 const pcre_uint8 *tables = re->tables;
9736 pcre_study_data *study;
9737 int private_data_size;
9738 pcre_uchar *ccend;
9739 executable_functions *functions;
9740 void *executable_func;
9741 sljit_uw executable_size;
9742 sljit_uw total_length;
9743 label_addr_list *label_addr;
9744 struct sljit_label *mainloop_label = NULL;
9745 struct sljit_label *continue_match_label;
9746 struct sljit_label *empty_match_found_label = NULL;
9747 struct sljit_label *empty_match_backtrack_label = NULL;
9748 struct sljit_label *reset_match_label;
9749 struct sljit_label *quit_label;
9750 struct sljit_jump *jump;
9751 struct sljit_jump *minlength_check_failed = NULL;
9752 struct sljit_jump *reqbyte_notfound = NULL;
9753 struct sljit_jump *empty_match = NULL;
9755 SLJIT_ASSERT((extra->flags & PCRE_EXTRA_STUDY_DATA) != 0);
9756 study = extra->study_data;
9758 if (!tables)
9759 tables = PRIV(default_tables);
9761 memset(&rootbacktrack, 0, sizeof(backtrack_common));
9762 memset(common, 0, sizeof(compiler_common));
9763 rootbacktrack.cc = (pcre_uchar *)re + re->name_table_offset + re->name_count * re->name_entry_size;
9765 common->start = rootbacktrack.cc;
9766 common->read_only_data = NULL;
9767 common->read_only_data_size = 0;
9768 common->read_only_data_ptr = NULL;
9769 common->fcc = tables + fcc_offset;
9770 common->lcc = (sljit_sw)(tables + lcc_offset);
9771 common->mode = mode;
9772 common->might_be_empty = study->minlength == 0;
9773 common->nltype = NLTYPE_FIXED;
9774 switch(re->options & PCRE_NEWLINE_BITS)
9776 case 0:
9777 /* Compile-time default */
9778 switch(NEWLINE)
9780 case -1: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
9781 case -2: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
9782 default: common->newline = NEWLINE; break;
9784 break;
9785 case PCRE_NEWLINE_CR: common->newline = CHAR_CR; break;
9786 case PCRE_NEWLINE_LF: common->newline = CHAR_NL; break;
9787 case PCRE_NEWLINE_CR+
9788 PCRE_NEWLINE_LF: common->newline = (CHAR_CR << 8) | CHAR_NL; break;
9789 case PCRE_NEWLINE_ANY: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
9790 case PCRE_NEWLINE_ANYCRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
9791 default: return;
9793 common->nlmax = READ_CHAR_MAX;
9794 common->nlmin = 0;
9795 if ((re->options & PCRE_BSR_ANYCRLF) != 0)
9796 common->bsr_nltype = NLTYPE_ANYCRLF;
9797 else if ((re->options & PCRE_BSR_UNICODE) != 0)
9798 common->bsr_nltype = NLTYPE_ANY;
9799 else
9801 #ifdef BSR_ANYCRLF
9802 common->bsr_nltype = NLTYPE_ANYCRLF;
9803 #else
9804 common->bsr_nltype = NLTYPE_ANY;
9805 #endif
9807 common->bsr_nlmax = READ_CHAR_MAX;
9808 common->bsr_nlmin = 0;
9809 common->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
9810 common->ctypes = (sljit_sw)(tables + ctypes_offset);
9811 common->name_table = ((pcre_uchar *)re) + re->name_table_offset;
9812 common->name_count = re->name_count;
9813 common->name_entry_size = re->name_entry_size;
9814 common->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
9815 #ifdef SUPPORT_UTF
9816 /* PCRE_UTF[16|32] have the same value as PCRE_UTF8. */
9817 common->utf = (re->options & PCRE_UTF8) != 0;
9818 #ifdef SUPPORT_UCP
9819 common->use_ucp = (re->options & PCRE_UCP) != 0;
9820 #endif
9821 if (common->utf)
9823 if (common->nltype == NLTYPE_ANY)
9824 common->nlmax = 0x2029;
9825 else if (common->nltype == NLTYPE_ANYCRLF)
9826 common->nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
9827 else
9829 /* We only care about the first newline character. */
9830 common->nlmax = common->newline & 0xff;
9833 if (common->nltype == NLTYPE_FIXED)
9834 common->nlmin = common->newline & 0xff;
9835 else
9836 common->nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
9838 if (common->bsr_nltype == NLTYPE_ANY)
9839 common->bsr_nlmax = 0x2029;
9840 else
9841 common->bsr_nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
9842 common->bsr_nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
9844 #endif /* SUPPORT_UTF */
9845 ccend = bracketend(common->start);
9847 /* Calculate the local space size on the stack. */
9848 common->ovector_start = LIMIT_MATCH + sizeof(sljit_sw);
9849 common->optimized_cbracket = (pcre_uint8 *)SLJIT_MALLOC(re->top_bracket + 1);
9850 if (!common->optimized_cbracket)
9851 return;
9852 #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 1
9853 memset(common->optimized_cbracket, 0, re->top_bracket + 1);
9854 #else
9855 memset(common->optimized_cbracket, 1, re->top_bracket + 1);
9856 #endif
9858 SLJIT_ASSERT(*common->start == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET);
9859 #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 2
9860 common->capture_last_ptr = common->ovector_start;
9861 common->ovector_start += sizeof(sljit_sw);
9862 #endif
9863 if (!check_opcode_types(common, common->start, ccend))
9865 SLJIT_FREE(common->optimized_cbracket);
9866 return;
9869 /* Checking flags and updating ovector_start. */
9870 if (mode == JIT_COMPILE && (re->flags & PCRE_REQCHSET) != 0 && (re->options & PCRE_NO_START_OPTIMIZE) == 0)
9872 common->req_char_ptr = common->ovector_start;
9873 common->ovector_start += sizeof(sljit_sw);
9875 if (mode != JIT_COMPILE)
9877 common->start_used_ptr = common->ovector_start;
9878 common->ovector_start += sizeof(sljit_sw);
9879 if (mode == JIT_PARTIAL_SOFT_COMPILE)
9881 common->hit_start = common->ovector_start;
9882 common->ovector_start += 2 * sizeof(sljit_sw);
9884 else
9886 SLJIT_ASSERT(mode == JIT_PARTIAL_HARD_COMPILE);
9887 common->needs_start_ptr = TRUE;
9890 if ((re->options & PCRE_FIRSTLINE) != 0)
9892 common->first_line_end = common->ovector_start;
9893 common->ovector_start += sizeof(sljit_sw);
9895 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
9896 common->control_head_ptr = 1;
9897 #endif
9898 if (common->control_head_ptr != 0)
9900 common->control_head_ptr = common->ovector_start;
9901 common->ovector_start += sizeof(sljit_sw);
9903 if (common->needs_start_ptr && common->has_set_som)
9905 /* Saving the real start pointer is necessary. */
9906 common->start_ptr = common->ovector_start;
9907 common->ovector_start += sizeof(sljit_sw);
9909 else
9910 common->needs_start_ptr = FALSE;
9912 /* Aligning ovector to even number of sljit words. */
9913 if ((common->ovector_start & sizeof(sljit_sw)) != 0)
9914 common->ovector_start += sizeof(sljit_sw);
9916 if (common->start_ptr == 0)
9917 common->start_ptr = OVECTOR(0);
9919 /* Capturing brackets cannot be optimized if callouts are allowed. */
9920 if (common->capture_last_ptr != 0)
9921 memset(common->optimized_cbracket, 0, re->top_bracket + 1);
9923 SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0));
9924 common->cbra_ptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_sw);
9926 total_length = ccend - common->start;
9927 common->private_data_ptrs = (sljit_si *)SLJIT_MALLOC(total_length * (sizeof(sljit_si) + (common->has_then ? 1 : 0)));
9928 if (!common->private_data_ptrs)
9930 SLJIT_FREE(common->optimized_cbracket);
9931 return;
9933 memset(common->private_data_ptrs, 0, total_length * sizeof(sljit_si));
9935 private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw);
9936 set_private_data_ptrs(common, &private_data_size, ccend);
9937 if (private_data_size > SLJIT_MAX_LOCAL_SIZE)
9939 SLJIT_FREE(common->private_data_ptrs);
9940 SLJIT_FREE(common->optimized_cbracket);
9941 return;
9944 if (common->has_then)
9946 common->then_offsets = (pcre_uint8 *)(common->private_data_ptrs + total_length);
9947 memset(common->then_offsets, 0, total_length);
9948 set_then_offsets(common, common->start, NULL);
9951 if (common->read_only_data_size > 0)
9953 common->read_only_data = (sljit_uw *)SLJIT_MALLOC(common->read_only_data_size);
9954 if (common->read_only_data == NULL)
9956 SLJIT_FREE(common->optimized_cbracket);
9957 SLJIT_FREE(common->private_data_ptrs);
9958 return;
9960 common->read_only_data_ptr = common->read_only_data;
9963 compiler = sljit_create_compiler();
9964 if (!compiler)
9966 SLJIT_FREE(common->optimized_cbracket);
9967 SLJIT_FREE(common->private_data_ptrs);
9968 if (common->read_only_data)
9969 SLJIT_FREE(common->read_only_data);
9970 return;
9972 common->compiler = compiler;
9974 /* Main pcre_jit_exec entry. */
9975 sljit_emit_enter(compiler, 1, 5, 5, private_data_size);
9977 /* Register init. */
9978 reset_ovector(common, (re->top_bracket + 1) * 2);
9979 if (common->req_char_ptr != 0)
9980 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr, SLJIT_SCRATCH_REG1, 0);
9982 OP1(SLJIT_MOV, ARGUMENTS, 0, SLJIT_SAVED_REG1, 0);
9983 OP1(SLJIT_MOV, TMP1, 0, SLJIT_SAVED_REG1, 0);
9984 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
9985 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, end));
9986 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
9987 OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, limit_match));
9988 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, base));
9989 OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, limit));
9990 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LIMIT_MATCH, TMP1, 0);
9992 if (mode == JIT_PARTIAL_SOFT_COMPILE)
9993 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
9994 if (common->mark_ptr != 0)
9995 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, SLJIT_IMM, 0);
9996 if (common->control_head_ptr != 0)
9997 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
9999 /* Main part of the matching */
10000 if ((re->options & PCRE_ANCHORED) == 0)
10002 mainloop_label = mainloop_entry(common, (re->flags & PCRE_HASCRORLF) != 0, (re->options & PCRE_FIRSTLINE) != 0);
10003 continue_match_label = LABEL();
10004 /* Forward search if possible. */
10005 if ((re->options & PCRE_NO_START_OPTIMIZE) == 0)
10007 if (mode == JIT_COMPILE && fast_forward_first_n_chars(common, (re->options & PCRE_FIRSTLINE) != 0))
10009 /* If read_only_data is reallocated, we might have an allocation failure. */
10010 if (common->read_only_data_size > 0 && common->read_only_data == NULL)
10012 sljit_free_compiler(compiler);
10013 SLJIT_FREE(common->optimized_cbracket);
10014 SLJIT_FREE(common->private_data_ptrs);
10015 return;
10018 else if ((re->flags & PCRE_FIRSTSET) != 0)
10019 fast_forward_first_char(common, (pcre_uchar)re->first_char, (re->flags & PCRE_FCH_CASELESS) != 0, (re->options & PCRE_FIRSTLINE) != 0);
10020 else if ((re->flags & PCRE_STARTLINE) != 0)
10021 fast_forward_newline(common, (re->options & PCRE_FIRSTLINE) != 0);
10022 else if ((re->flags & PCRE_STARTLINE) == 0 && study != NULL && (study->flags & PCRE_STUDY_MAPPED) != 0)
10023 fast_forward_start_bits(common, study->start_bits, (re->options & PCRE_FIRSTLINE) != 0);
10026 else
10027 continue_match_label = LABEL();
10029 if (mode == JIT_COMPILE && study->minlength > 0 && (re->options & PCRE_NO_START_OPTIMIZE) == 0)
10031 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
10032 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(study->minlength));
10033 minlength_check_failed = CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0);
10035 if (common->req_char_ptr != 0)
10036 reqbyte_notfound = search_requested_char(common, (pcre_uchar)re->req_char, (re->flags & PCRE_RCH_CASELESS) != 0, (re->flags & PCRE_FIRSTSET) != 0);
10038 /* Store the current STR_PTR in OVECTOR(0). */
10039 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), STR_PTR, 0);
10040 /* Copy the limit of allowed recursions. */
10041 OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LIMIT_MATCH);
10042 if (common->capture_last_ptr != 0)
10043 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, -1);
10045 if (common->needs_start_ptr)
10047 SLJIT_ASSERT(common->start_ptr != OVECTOR(0));
10048 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_ptr, STR_PTR, 0);
10050 else
10051 SLJIT_ASSERT(common->start_ptr == OVECTOR(0));
10053 /* Copy the beginning of the string. */
10054 if (mode == JIT_PARTIAL_SOFT_COMPILE)
10056 jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
10057 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
10058 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start + sizeof(sljit_sw), STR_PTR, 0);
10059 JUMPHERE(jump);
10061 else if (mode == JIT_PARTIAL_HARD_COMPILE)
10062 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
10064 compile_matchingpath(common, common->start, ccend, &rootbacktrack);
10065 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10067 sljit_free_compiler(compiler);
10068 SLJIT_FREE(common->optimized_cbracket);
10069 SLJIT_FREE(common->private_data_ptrs);
10070 if (common->read_only_data)
10071 SLJIT_FREE(common->read_only_data);
10072 return;
10075 if (common->might_be_empty)
10077 empty_match = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
10078 empty_match_found_label = LABEL();
10081 common->accept_label = LABEL();
10082 if (common->accept != NULL)
10083 set_jumps(common->accept, common->accept_label);
10085 /* This means we have a match. Update the ovector. */
10086 copy_ovector(common, re->top_bracket + 1);
10087 common->quit_label = common->forced_quit_label = LABEL();
10088 if (common->quit != NULL)
10089 set_jumps(common->quit, common->quit_label);
10090 if (common->forced_quit != NULL)
10091 set_jumps(common->forced_quit, common->forced_quit_label);
10092 if (minlength_check_failed != NULL)
10093 SET_LABEL(minlength_check_failed, common->forced_quit_label);
10094 sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0);
10096 if (mode != JIT_COMPILE)
10098 common->partialmatchlabel = LABEL();
10099 set_jumps(common->partialmatch, common->partialmatchlabel);
10100 return_with_partial_match(common, common->quit_label);
10103 if (common->might_be_empty)
10104 empty_match_backtrack_label = LABEL();
10105 compile_backtrackingpath(common, rootbacktrack.top);
10106 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10108 sljit_free_compiler(compiler);
10109 SLJIT_FREE(common->optimized_cbracket);
10110 SLJIT_FREE(common->private_data_ptrs);
10111 if (common->read_only_data)
10112 SLJIT_FREE(common->read_only_data);
10113 return;
10116 SLJIT_ASSERT(rootbacktrack.prev == NULL);
10117 reset_match_label = LABEL();
10119 if (mode == JIT_PARTIAL_SOFT_COMPILE)
10121 /* Update hit_start only in the first time. */
10122 jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
10123 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr);
10124 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1);
10125 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, TMP1, 0);
10126 JUMPHERE(jump);
10129 /* Check we have remaining characters. */
10130 if ((re->options & PCRE_ANCHORED) == 0 && (re->options & PCRE_FIRSTLINE) != 0)
10132 SLJIT_ASSERT(common->first_line_end != 0);
10133 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
10136 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_ptr);
10138 if ((re->options & PCRE_ANCHORED) == 0)
10140 if (common->ff_newline_shortcut != NULL)
10142 if ((re->options & PCRE_FIRSTLINE) == 0)
10143 CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, common->ff_newline_shortcut);
10144 /* There cannot be more newlines here. */
10146 else
10148 if ((re->options & PCRE_FIRSTLINE) == 0)
10149 CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop_label);
10150 else
10151 CMPTO(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0, mainloop_label);
10155 /* No more remaining characters. */
10156 if (reqbyte_notfound != NULL)
10157 JUMPHERE(reqbyte_notfound);
10159 if (mode == JIT_PARTIAL_SOFT_COMPILE)
10160 CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1, common->partialmatchlabel);
10162 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
10163 JUMPTO(SLJIT_JUMP, common->quit_label);
10165 flush_stubs(common);
10167 if (common->might_be_empty)
10169 JUMPHERE(empty_match);
10170 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
10171 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
10172 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_backtrack_label);
10173 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));
10174 CMPTO(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_found_label);
10175 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
10176 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found_label);
10177 JUMPTO(SLJIT_JUMP, empty_match_backtrack_label);
10180 common->currententry = common->entries;
10181 common->local_exit = TRUE;
10182 quit_label = common->quit_label;
10183 while (common->currententry != NULL)
10185 /* Might add new entries. */
10186 compile_recurse(common);
10187 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10189 sljit_free_compiler(compiler);
10190 SLJIT_FREE(common->optimized_cbracket);
10191 SLJIT_FREE(common->private_data_ptrs);
10192 if (common->read_only_data)
10193 SLJIT_FREE(common->read_only_data);
10194 return;
10196 flush_stubs(common);
10197 common->currententry = common->currententry->next;
10199 common->local_exit = FALSE;
10200 common->quit_label = quit_label;
10202 /* Allocating stack, returns with PCRE_ERROR_JIT_STACKLIMIT if fails. */
10203 /* This is a (really) rare case. */
10204 set_jumps(common->stackalloc, LABEL());
10205 /* RETURN_ADDR is not a saved register. */
10206 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
10207 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
10208 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
10209 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
10210 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, top), STACK_TOP, 0);
10211 OP2(SLJIT_ADD, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, limit), SLJIT_IMM, STACK_GROWTH_RATE);
10213 sljit_emit_ijump(compiler, SLJIT_CALL2, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_stack_resize));
10214 jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
10215 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
10216 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
10217 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, top));
10218 OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, limit));
10219 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
10220 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
10222 /* Allocation failed. */
10223 JUMPHERE(jump);
10224 /* We break the return address cache here, but this is a really rare case. */
10225 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_JIT_STACKLIMIT);
10226 JUMPTO(SLJIT_JUMP, common->quit_label);
10228 /* Call limit reached. */
10229 set_jumps(common->calllimit, LABEL());
10230 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_MATCHLIMIT);
10231 JUMPTO(SLJIT_JUMP, common->quit_label);
10233 if (common->revertframes != NULL)
10235 set_jumps(common->revertframes, LABEL());
10236 do_revertframes(common);
10238 if (common->wordboundary != NULL)
10240 set_jumps(common->wordboundary, LABEL());
10241 check_wordboundary(common);
10243 if (common->anynewline != NULL)
10245 set_jumps(common->anynewline, LABEL());
10246 check_anynewline(common);
10248 if (common->hspace != NULL)
10250 set_jumps(common->hspace, LABEL());
10251 check_hspace(common);
10253 if (common->vspace != NULL)
10255 set_jumps(common->vspace, LABEL());
10256 check_vspace(common);
10258 if (common->casefulcmp != NULL)
10260 set_jumps(common->casefulcmp, LABEL());
10261 do_casefulcmp(common);
10263 if (common->caselesscmp != NULL)
10265 set_jumps(common->caselesscmp, LABEL());
10266 do_caselesscmp(common);
10268 if (common->reset_match != NULL)
10270 set_jumps(common->reset_match, LABEL());
10271 do_reset_match(common, (re->top_bracket + 1) * 2);
10272 CMPTO(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0, continue_match_label);
10273 OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
10274 JUMPTO(SLJIT_JUMP, reset_match_label);
10276 #ifdef SUPPORT_UTF
10277 #ifdef COMPILE_PCRE8
10278 if (common->utfreadchar != NULL)
10280 set_jumps(common->utfreadchar, LABEL());
10281 do_utfreadchar(common);
10283 if (common->utfreadchar16 != NULL)
10285 set_jumps(common->utfreadchar16, LABEL());
10286 do_utfreadchar16(common);
10288 if (common->utfreadtype8 != NULL)
10290 set_jumps(common->utfreadtype8, LABEL());
10291 do_utfreadtype8(common);
10293 #endif /* COMPILE_PCRE8 */
10294 #endif /* SUPPORT_UTF */
10295 #ifdef SUPPORT_UCP
10296 if (common->getucd != NULL)
10298 set_jumps(common->getucd, LABEL());
10299 do_getucd(common);
10301 #endif
10303 SLJIT_ASSERT(common->read_only_data + (common->read_only_data_size >> SLJIT_WORD_SHIFT) == common->read_only_data_ptr);
10304 SLJIT_FREE(common->optimized_cbracket);
10305 SLJIT_FREE(common->private_data_ptrs);
10307 executable_func = sljit_generate_code(compiler);
10308 executable_size = sljit_get_generated_code_size(compiler);
10309 label_addr = common->label_addrs;
10310 while (label_addr != NULL)
10312 *label_addr->addr = sljit_get_label_addr(label_addr->label);
10313 label_addr = label_addr->next;
10315 sljit_free_compiler(compiler);
10316 if (executable_func == NULL)
10318 if (common->read_only_data)
10319 SLJIT_FREE(common->read_only_data);
10320 return;
10323 /* Reuse the function descriptor if possible. */
10324 if ((extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 && extra->executable_jit != NULL)
10325 functions = (executable_functions *)extra->executable_jit;
10326 else
10328 /* Note: If your memory-checker has flagged the allocation below as a
10329 * memory leak, it is probably because you either forgot to call
10330 * pcre_free_study() (or pcre16_free_study()) on the pcre_extra (or
10331 * pcre16_extra) object, or you called said function after having
10332 * cleared the PCRE_EXTRA_EXECUTABLE_JIT bit from the "flags" field
10333 * of the object. (The function will only free the JIT data if the
10334 * bit remains set, as the bit indicates that the pointer to the data
10335 * is valid.)
10337 functions = SLJIT_MALLOC(sizeof(executable_functions));
10338 if (functions == NULL)
10340 /* This case is highly unlikely since we just recently
10341 freed a lot of memory. Not impossible though. */
10342 sljit_free_code(executable_func);
10343 if (common->read_only_data)
10344 SLJIT_FREE(common->read_only_data);
10345 return;
10347 memset(functions, 0, sizeof(executable_functions));
10348 functions->top_bracket = (re->top_bracket + 1) * 2;
10349 functions->limit_match = (re->flags & PCRE_MLSET) != 0 ? re->limit_match : 0;
10350 extra->executable_jit = functions;
10351 extra->flags |= PCRE_EXTRA_EXECUTABLE_JIT;
10354 functions->executable_funcs[mode] = executable_func;
10355 functions->read_only_data[mode] = common->read_only_data;
10356 functions->executable_sizes[mode] = executable_size;
10359 static int jit_machine_stack_exec(jit_arguments *arguments, void* executable_func)
10361 union {
10362 void* executable_func;
10363 jit_function call_executable_func;
10364 } convert_executable_func;
10365 pcre_uint8 local_space[MACHINE_STACK_SIZE];
10366 struct sljit_stack local_stack;
10368 local_stack.top = (sljit_sw)&local_space;
10369 local_stack.base = local_stack.top;
10370 local_stack.limit = local_stack.base + MACHINE_STACK_SIZE;
10371 local_stack.max_limit = local_stack.limit;
10372 arguments->stack = &local_stack;
10373 convert_executable_func.executable_func = executable_func;
10374 return convert_executable_func.call_executable_func(arguments);
10378 PRIV(jit_exec)(const PUBL(extra) *extra_data, const pcre_uchar *subject,
10379 int length, int start_offset, int options, int *offsets, int offset_count)
10381 executable_functions *functions = (executable_functions *)extra_data->executable_jit;
10382 union {
10383 void* executable_func;
10384 jit_function call_executable_func;
10385 } convert_executable_func;
10386 jit_arguments arguments;
10387 int max_offset_count;
10388 int retval;
10389 int mode = JIT_COMPILE;
10391 if ((options & PCRE_PARTIAL_HARD) != 0)
10392 mode = JIT_PARTIAL_HARD_COMPILE;
10393 else if ((options & PCRE_PARTIAL_SOFT) != 0)
10394 mode = JIT_PARTIAL_SOFT_COMPILE;
10396 if (functions->executable_funcs[mode] == NULL)
10397 return PCRE_ERROR_JIT_BADOPTION;
10399 /* Sanity checks should be handled by pcre_exec. */
10400 arguments.str = subject + start_offset;
10401 arguments.begin = subject;
10402 arguments.end = subject + length;
10403 arguments.mark_ptr = NULL;
10404 /* JIT decreases this value less frequently than the interpreter. */
10405 arguments.limit_match = ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0) ? MATCH_LIMIT : (pcre_uint32)(extra_data->match_limit);
10406 if (functions->limit_match != 0 && functions->limit_match < arguments.limit_match)
10407 arguments.limit_match = functions->limit_match;
10408 arguments.notbol = (options & PCRE_NOTBOL) != 0;
10409 arguments.noteol = (options & PCRE_NOTEOL) != 0;
10410 arguments.notempty = (options & PCRE_NOTEMPTY) != 0;
10411 arguments.notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
10412 arguments.offsets = offsets;
10413 arguments.callout_data = (extra_data->flags & PCRE_EXTRA_CALLOUT_DATA) != 0 ? extra_data->callout_data : NULL;
10414 arguments.real_offset_count = offset_count;
10416 /* pcre_exec() rounds offset_count to a multiple of 3, and then uses only 2/3 of
10417 the output vector for storing captured strings, with the remainder used as
10418 workspace. We don't need the workspace here. For compatibility, we limit the
10419 number of captured strings in the same way as pcre_exec(), so that the user
10420 gets the same result with and without JIT. */
10422 if (offset_count != 2)
10423 offset_count = ((offset_count - (offset_count % 3)) * 2) / 3;
10424 max_offset_count = functions->top_bracket;
10425 if (offset_count > max_offset_count)
10426 offset_count = max_offset_count;
10427 arguments.offset_count = offset_count;
10429 if (functions->callback)
10430 arguments.stack = (struct sljit_stack *)functions->callback(functions->userdata);
10431 else
10432 arguments.stack = (struct sljit_stack *)functions->userdata;
10434 if (arguments.stack == NULL)
10435 retval = jit_machine_stack_exec(&arguments, functions->executable_funcs[mode]);
10436 else
10438 convert_executable_func.executable_func = functions->executable_funcs[mode];
10439 retval = convert_executable_func.call_executable_func(&arguments);
10442 if (retval * 2 > offset_count)
10443 retval = 0;
10444 if ((extra_data->flags & PCRE_EXTRA_MARK) != 0)
10445 *(extra_data->mark) = arguments.mark_ptr;
10447 return retval;
10450 #if defined COMPILE_PCRE8
10451 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
10452 pcre_jit_exec(const pcre *argument_re, const pcre_extra *extra_data,
10453 PCRE_SPTR subject, int length, int start_offset, int options,
10454 int *offsets, int offset_count, pcre_jit_stack *stack)
10455 #elif defined COMPILE_PCRE16
10456 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
10457 pcre16_jit_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
10458 PCRE_SPTR16 subject, int length, int start_offset, int options,
10459 int *offsets, int offset_count, pcre16_jit_stack *stack)
10460 #elif defined COMPILE_PCRE32
10461 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
10462 pcre32_jit_exec(const pcre32 *argument_re, const pcre32_extra *extra_data,
10463 PCRE_SPTR32 subject, int length, int start_offset, int options,
10464 int *offsets, int offset_count, pcre32_jit_stack *stack)
10465 #endif
10467 pcre_uchar *subject_ptr = (pcre_uchar *)subject;
10468 executable_functions *functions = (executable_functions *)extra_data->executable_jit;
10469 union {
10470 void* executable_func;
10471 jit_function call_executable_func;
10472 } convert_executable_func;
10473 jit_arguments arguments;
10474 int max_offset_count;
10475 int retval;
10476 int mode = JIT_COMPILE;
10478 SLJIT_UNUSED_ARG(argument_re);
10480 /* Plausibility checks */
10481 if ((options & ~PUBLIC_JIT_EXEC_OPTIONS) != 0) return PCRE_ERROR_JIT_BADOPTION;
10483 if ((options & PCRE_PARTIAL_HARD) != 0)
10484 mode = JIT_PARTIAL_HARD_COMPILE;
10485 else if ((options & PCRE_PARTIAL_SOFT) != 0)
10486 mode = JIT_PARTIAL_SOFT_COMPILE;
10488 if (functions->executable_funcs[mode] == NULL)
10489 return PCRE_ERROR_JIT_BADOPTION;
10491 /* Sanity checks should be handled by pcre_exec. */
10492 arguments.stack = (struct sljit_stack *)stack;
10493 arguments.str = subject_ptr + start_offset;
10494 arguments.begin = subject_ptr;
10495 arguments.end = subject_ptr + length;
10496 arguments.mark_ptr = NULL;
10497 /* JIT decreases this value less frequently than the interpreter. */
10498 arguments.limit_match = ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0) ? MATCH_LIMIT : (pcre_uint32)(extra_data->match_limit);
10499 if (functions->limit_match != 0 && functions->limit_match < arguments.limit_match)
10500 arguments.limit_match = functions->limit_match;
10501 arguments.notbol = (options & PCRE_NOTBOL) != 0;
10502 arguments.noteol = (options & PCRE_NOTEOL) != 0;
10503 arguments.notempty = (options & PCRE_NOTEMPTY) != 0;
10504 arguments.notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
10505 arguments.offsets = offsets;
10506 arguments.callout_data = (extra_data->flags & PCRE_EXTRA_CALLOUT_DATA) != 0 ? extra_data->callout_data : NULL;
10507 arguments.real_offset_count = offset_count;
10509 /* pcre_exec() rounds offset_count to a multiple of 3, and then uses only 2/3 of
10510 the output vector for storing captured strings, with the remainder used as
10511 workspace. We don't need the workspace here. For compatibility, we limit the
10512 number of captured strings in the same way as pcre_exec(), so that the user
10513 gets the same result with and without JIT. */
10515 if (offset_count != 2)
10516 offset_count = ((offset_count - (offset_count % 3)) * 2) / 3;
10517 max_offset_count = functions->top_bracket;
10518 if (offset_count > max_offset_count)
10519 offset_count = max_offset_count;
10520 arguments.offset_count = offset_count;
10522 convert_executable_func.executable_func = functions->executable_funcs[mode];
10523 retval = convert_executable_func.call_executable_func(&arguments);
10525 if (retval * 2 > offset_count)
10526 retval = 0;
10527 if ((extra_data->flags & PCRE_EXTRA_MARK) != 0)
10528 *(extra_data->mark) = arguments.mark_ptr;
10530 return retval;
10533 void
10534 PRIV(jit_free)(void *executable_funcs)
10536 int i;
10537 executable_functions *functions = (executable_functions *)executable_funcs;
10538 for (i = 0; i < JIT_NUMBER_OF_COMPILE_MODES; i++)
10540 if (functions->executable_funcs[i] != NULL)
10541 sljit_free_code(functions->executable_funcs[i]);
10542 if (functions->read_only_data[i] != NULL)
10543 SLJIT_FREE(functions->read_only_data[i]);
10545 SLJIT_FREE(functions);
10549 PRIV(jit_get_size)(void *executable_funcs)
10551 int i;
10552 sljit_uw size = 0;
10553 sljit_uw *executable_sizes = ((executable_functions *)executable_funcs)->executable_sizes;
10554 for (i = 0; i < JIT_NUMBER_OF_COMPILE_MODES; i++)
10555 size += executable_sizes[i];
10556 return (int)size;
10559 const char*
10560 PRIV(jit_get_target)(void)
10562 return sljit_get_platform_name();
10565 #if defined COMPILE_PCRE8
10566 PCRE_EXP_DECL pcre_jit_stack *
10567 pcre_jit_stack_alloc(int startsize, int maxsize)
10568 #elif defined COMPILE_PCRE16
10569 PCRE_EXP_DECL pcre16_jit_stack *
10570 pcre16_jit_stack_alloc(int startsize, int maxsize)
10571 #elif defined COMPILE_PCRE32
10572 PCRE_EXP_DECL pcre32_jit_stack *
10573 pcre32_jit_stack_alloc(int startsize, int maxsize)
10574 #endif
10576 if (startsize < 1 || maxsize < 1)
10577 return NULL;
10578 if (startsize > maxsize)
10579 startsize = maxsize;
10580 startsize = (startsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1);
10581 maxsize = (maxsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1);
10582 return (PUBL(jit_stack)*)sljit_allocate_stack(startsize, maxsize);
10585 #if defined COMPILE_PCRE8
10586 PCRE_EXP_DECL void
10587 pcre_jit_stack_free(pcre_jit_stack *stack)
10588 #elif defined COMPILE_PCRE16
10589 PCRE_EXP_DECL void
10590 pcre16_jit_stack_free(pcre16_jit_stack *stack)
10591 #elif defined COMPILE_PCRE32
10592 PCRE_EXP_DECL void
10593 pcre32_jit_stack_free(pcre32_jit_stack *stack)
10594 #endif
10596 sljit_free_stack((struct sljit_stack *)stack);
10599 #if defined COMPILE_PCRE8
10600 PCRE_EXP_DECL void
10601 pcre_assign_jit_stack(pcre_extra *extra, pcre_jit_callback callback, void *userdata)
10602 #elif defined COMPILE_PCRE16
10603 PCRE_EXP_DECL void
10604 pcre16_assign_jit_stack(pcre16_extra *extra, pcre16_jit_callback callback, void *userdata)
10605 #elif defined COMPILE_PCRE32
10606 PCRE_EXP_DECL void
10607 pcre32_assign_jit_stack(pcre32_extra *extra, pcre32_jit_callback callback, void *userdata)
10608 #endif
10610 executable_functions *functions;
10611 if (extra != NULL &&
10612 (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 &&
10613 extra->executable_jit != NULL)
10615 functions = (executable_functions *)extra->executable_jit;
10616 functions->callback = callback;
10617 functions->userdata = userdata;
10621 #if defined COMPILE_PCRE8
10622 PCRE_EXP_DECL void
10623 pcre_jit_free_unused_memory(void)
10624 #elif defined COMPILE_PCRE16
10625 PCRE_EXP_DECL void
10626 pcre16_jit_free_unused_memory(void)
10627 #elif defined COMPILE_PCRE32
10628 PCRE_EXP_DECL void
10629 pcre32_jit_free_unused_memory(void)
10630 #endif
10632 sljit_free_unused_memory_exec();
10635 #else /* SUPPORT_JIT */
10637 /* These are dummy functions to avoid linking errors when JIT support is not
10638 being compiled. */
10640 #if defined COMPILE_PCRE8
10641 PCRE_EXP_DECL pcre_jit_stack *
10642 pcre_jit_stack_alloc(int startsize, int maxsize)
10643 #elif defined COMPILE_PCRE16
10644 PCRE_EXP_DECL pcre16_jit_stack *
10645 pcre16_jit_stack_alloc(int startsize, int maxsize)
10646 #elif defined COMPILE_PCRE32
10647 PCRE_EXP_DECL pcre32_jit_stack *
10648 pcre32_jit_stack_alloc(int startsize, int maxsize)
10649 #endif
10651 (void)startsize;
10652 (void)maxsize;
10653 return NULL;
10656 #if defined COMPILE_PCRE8
10657 PCRE_EXP_DECL void
10658 pcre_jit_stack_free(pcre_jit_stack *stack)
10659 #elif defined COMPILE_PCRE16
10660 PCRE_EXP_DECL void
10661 pcre16_jit_stack_free(pcre16_jit_stack *stack)
10662 #elif defined COMPILE_PCRE32
10663 PCRE_EXP_DECL void
10664 pcre32_jit_stack_free(pcre32_jit_stack *stack)
10665 #endif
10667 (void)stack;
10670 #if defined COMPILE_PCRE8
10671 PCRE_EXP_DECL void
10672 pcre_assign_jit_stack(pcre_extra *extra, pcre_jit_callback callback, void *userdata)
10673 #elif defined COMPILE_PCRE16
10674 PCRE_EXP_DECL void
10675 pcre16_assign_jit_stack(pcre16_extra *extra, pcre16_jit_callback callback, void *userdata)
10676 #elif defined COMPILE_PCRE32
10677 PCRE_EXP_DECL void
10678 pcre32_assign_jit_stack(pcre32_extra *extra, pcre32_jit_callback callback, void *userdata)
10679 #endif
10681 (void)extra;
10682 (void)callback;
10683 (void)userdata;
10686 #if defined COMPILE_PCRE8
10687 PCRE_EXP_DECL void
10688 pcre_jit_free_unused_memory(void)
10689 #elif defined COMPILE_PCRE16
10690 PCRE_EXP_DECL void
10691 pcre16_jit_free_unused_memory(void)
10692 #elif defined COMPILE_PCRE32
10693 PCRE_EXP_DECL void
10694 pcre32_jit_free_unused_memory(void)
10695 #endif
10699 #endif
10701 /* End of pcre_jit_compile.c */