1 /**********************************************************************
2 regcomp.c - Onigmo (Oniguruma-mod) (regular expression library)
3 **********************************************************************/
5 * Copyright (c) 2002-2013 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
6 * Copyright (c) 2011-2016 K.Takata <kentkt AT csc DOT jp>
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 OnigCaseFoldType OnigDefaultCaseFoldFlag
= ONIGENC_CASE_FOLD_MIN
;
35 extern OnigCaseFoldType
36 onig_get_default_case_fold_flag(void)
38 return OnigDefaultCaseFoldFlag
;
42 onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag
)
44 OnigDefaultCaseFoldFlag
= case_fold_flag
;
49 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
50 static unsigned char PadBuf
[WORD_ALIGNMENT_SIZE
];
55 str_dup(UChar
* s
, UChar
* end
)
57 ptrdiff_t len
= end
- s
;
60 UChar
* r
= (UChar
* )xmalloc(len
+ 1);
71 swap_node(Node
* a
, Node
* b
)
74 c
= *a
; *a
= *b
; *b
= c
;
76 if (NTYPE(a
) == NT_STR
) {
77 StrNode
* sn
= NSTR(a
);
79 size_t len
= sn
->end
- sn
->s
;
81 sn
->end
= sn
->s
+ len
;
85 if (NTYPE(b
) == NT_STR
) {
86 StrNode
* sn
= NSTR(b
);
88 size_t len
= sn
->end
- sn
->s
;
90 sn
->end
= sn
->s
+ len
;
96 distance_add(OnigDistance d1
, OnigDistance d2
)
98 if (d1
== ONIG_INFINITE_DISTANCE
|| d2
== ONIG_INFINITE_DISTANCE
)
99 return ONIG_INFINITE_DISTANCE
;
101 if (d1
<= ONIG_INFINITE_DISTANCE
- d2
) return d1
+ d2
;
102 else return ONIG_INFINITE_DISTANCE
;
107 distance_multiply(OnigDistance d
, int m
)
109 if (m
== 0) return 0;
111 if (d
< ONIG_INFINITE_DISTANCE
/ m
)
114 return ONIG_INFINITE_DISTANCE
;
118 bitset_is_empty(BitSetRef bs
)
121 for (i
= 0; i
< BITSET_SIZE
; i
++) {
122 if (bs
[i
] != 0) return 0;
129 bitset_on_num(BitSetRef bs
)
134 for (i
= 0; i
< SINGLE_BYTE_SIZE
; i
++) {
135 if (BITSET_AT(bs
, i
)) n
++;
141 // Attempt to right size allocated buffers for a regex post compile
143 onig_reg_resize(regex_t
*reg
)
151 else if (reg
->alloc
> reg
->used
) {
152 unsigned char *new_ptr
= xrealloc(reg
->p
, reg
->used
);
153 // Skip the right size optimization if memory allocation fails
155 reg
->alloc
= reg
->used
;
159 } while ((reg
= reg
->chain
) != 0);
163 onig_bbuf_init(BBuf
* buf
, OnigDistance size
)
170 buf
->p
= (UChar
* )xmalloc(size
);
171 if (IS_NULL(buf
->p
)) return(ONIGERR_MEMORY
);
174 buf
->alloc
= (unsigned int )size
;
180 #ifdef USE_SUBEXP_CALL
183 unset_addr_list_init(UnsetAddrList
* uslist
, int size
)
187 p
= (UnsetAddr
* )xmalloc(sizeof(UnsetAddr
)* size
);
188 CHECK_NULL_RETURN_MEMERR(p
);
190 uslist
->alloc
= size
;
196 unset_addr_list_end(UnsetAddrList
* uslist
)
202 unset_addr_list_add(UnsetAddrList
* uslist
, int offset
, struct _Node
* node
)
207 if (uslist
->num
>= uslist
->alloc
) {
208 size
= uslist
->alloc
* 2;
209 p
= (UnsetAddr
* )xrealloc(uslist
->us
, sizeof(UnsetAddr
) * size
);
210 CHECK_NULL_RETURN_MEMERR(p
);
211 uslist
->alloc
= size
;
215 uslist
->us
[uslist
->num
].offset
= offset
;
216 uslist
->us
[uslist
->num
].target
= node
;
220 #endif /* USE_SUBEXP_CALL */
224 add_opcode(regex_t
* reg
, int opcode
)
226 BBUF_ADD1(reg
, opcode
);
230 #ifdef USE_COMBINATION_EXPLOSION_CHECK
232 add_state_check_num(regex_t
* reg
, int num
)
234 StateCheckNumType n
= (StateCheckNumType
)num
;
236 BBUF_ADD(reg
, &n
, SIZE_STATE_CHECK_NUM
);
242 add_rel_addr(regex_t
* reg
, int addr
)
244 RelAddrType ra
= (RelAddrType
)addr
;
246 BBUF_ADD(reg
, &ra
, SIZE_RELADDR
);
251 add_abs_addr(regex_t
* reg
, int addr
)
253 AbsAddrType ra
= (AbsAddrType
)addr
;
255 BBUF_ADD(reg
, &ra
, SIZE_ABSADDR
);
260 add_length(regex_t
* reg
, OnigDistance len
)
262 LengthType l
= (LengthType
)len
;
264 BBUF_ADD(reg
, &l
, SIZE_LENGTH
);
269 add_mem_num(regex_t
* reg
, int num
)
271 MemNumType n
= (MemNumType
)num
;
273 BBUF_ADD(reg
, &n
, SIZE_MEMNUM
);
279 add_pointer(regex_t
* reg
, void* addr
)
281 PointerType ptr
= (PointerType
)addr
;
283 BBUF_ADD(reg
, &ptr
, SIZE_POINTER
);
289 add_option(regex_t
* reg
, OnigOptionType option
)
291 BBUF_ADD(reg
, &option
, SIZE_OPTION
);
296 add_opcode_rel_addr(regex_t
* reg
, int opcode
, int addr
)
300 r
= add_opcode(reg
, opcode
);
302 r
= add_rel_addr(reg
, addr
);
307 add_bytes(regex_t
* reg
, UChar
* bytes
, OnigDistance len
)
309 BBUF_ADD(reg
, bytes
, len
);
314 add_bitset(regex_t
* reg
, BitSetRef bs
)
316 BBUF_ADD(reg
, bs
, SIZE_BITSET
);
321 add_opcode_option(regex_t
* reg
, int opcode
, OnigOptionType option
)
325 r
= add_opcode(reg
, opcode
);
327 r
= add_option(reg
, option
);
331 static int compile_length_tree(Node
* node
, regex_t
* reg
);
332 static int compile_tree(Node
* node
, regex_t
* reg
);
335 #define IS_NEED_STR_LEN_OP_EXACT(op) \
336 ((op) == OP_EXACTN || (op) == OP_EXACTMB2N ||\
337 (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC)
340 select_str_opcode(int mb_len
, OnigDistance byte_len
, int ignore_case
)
343 OnigDistance str_len
= roomof(byte_len
, mb_len
);
347 case 1: op
= OP_EXACT1_IC
; break;
348 default: op
= OP_EXACTN_IC
; break;
355 case 1: op
= OP_EXACT1
; break;
356 case 2: op
= OP_EXACT2
; break;
357 case 3: op
= OP_EXACT3
; break;
358 case 4: op
= OP_EXACT4
; break;
359 case 5: op
= OP_EXACT5
; break;
360 default: op
= OP_EXACTN
; break;
366 case 1: op
= OP_EXACTMB2N1
; break;
367 case 2: op
= OP_EXACTMB2N2
; break;
368 case 3: op
= OP_EXACTMB2N3
; break;
369 default: op
= OP_EXACTMB2N
; break;
386 compile_tree_empty_check(Node
* node
, regex_t
* reg
, int empty_info
)
389 int saved_num_null_check
= reg
->num_null_check
;
391 if (empty_info
!= 0) {
392 r
= add_opcode(reg
, OP_NULL_CHECK_START
);
394 r
= add_mem_num(reg
, reg
->num_null_check
); /* NULL CHECK ID */
396 reg
->num_null_check
++;
399 r
= compile_tree(node
, reg
);
402 if (empty_info
!= 0) {
403 if (empty_info
== NQ_TARGET_IS_EMPTY
)
404 r
= add_opcode(reg
, OP_NULL_CHECK_END
);
405 else if (empty_info
== NQ_TARGET_IS_EMPTY_MEM
)
406 r
= add_opcode(reg
, OP_NULL_CHECK_END_MEMST
);
407 else if (empty_info
== NQ_TARGET_IS_EMPTY_REC
)
408 r
= add_opcode(reg
, OP_NULL_CHECK_END_MEMST_PUSH
);
411 r
= add_mem_num(reg
, saved_num_null_check
); /* NULL CHECK ID */
416 #ifdef USE_SUBEXP_CALL
418 compile_call(CallNode
* node
, regex_t
* reg
)
422 r
= add_opcode(reg
, OP_CALL
);
424 r
= unset_addr_list_add(node
->unset_addr_list
, BBUF_GET_OFFSET_POS(reg
),
427 r
= add_abs_addr(reg
, 0 /*dummy addr.*/);
433 compile_tree_n_times(Node
* node
, int n
, regex_t
* reg
)
437 for (i
= 0; i
< n
; i
++) {
438 r
= compile_tree(node
, reg
);
445 add_compile_string_length(UChar
* s ARG_UNUSED
, int mb_len
, OnigDistance byte_len
,
446 regex_t
* reg ARG_UNUSED
, int ignore_case
)
449 int op
= select_str_opcode(mb_len
, byte_len
, ignore_case
);
453 if (op
== OP_EXACTMBN
) len
+= SIZE_LENGTH
;
454 if (IS_NEED_STR_LEN_OP_EXACT(op
))
457 len
+= (int )byte_len
;
462 add_compile_string(UChar
* s
, int mb_len
, OnigDistance byte_len
,
463 regex_t
* reg
, int ignore_case
)
465 int op
= select_str_opcode(mb_len
, byte_len
, ignore_case
);
468 if (op
== OP_EXACTMBN
)
469 add_length(reg
, mb_len
);
471 if (IS_NEED_STR_LEN_OP_EXACT(op
)) {
472 if (op
== OP_EXACTN_IC
)
473 add_length(reg
, byte_len
);
475 add_length(reg
, byte_len
/ mb_len
);
478 add_bytes(reg
, s
, byte_len
);
484 compile_length_string_node(Node
* node
, regex_t
* reg
)
486 int rlen
, r
, len
, prev_len
, blen
, ambig
;
487 OnigEncoding enc
= reg
->enc
;
492 if (sn
->end
<= sn
->s
)
495 ambig
= NSTRING_IS_AMBIG(node
);
498 prev_len
= enclen(enc
, p
, sn
->end
);
503 for (; p
< sn
->end
; ) {
504 len
= enclen(enc
, p
, sn
->end
);
505 if (len
== prev_len
|| ambig
) {
509 r
= add_compile_string_length(prev
, prev_len
, blen
, reg
, ambig
);
517 r
= add_compile_string_length(prev
, prev_len
, blen
, reg
, ambig
);
523 compile_length_string_raw_node(StrNode
* sn
, regex_t
* reg
)
525 if (sn
->end
<= sn
->s
)
528 return add_compile_string_length(sn
->s
, 1 /* sb */, sn
->end
- sn
->s
, reg
, 0);
532 compile_string_node(Node
* node
, regex_t
* reg
)
534 int r
, len
, prev_len
, blen
, ambig
;
535 OnigEncoding enc
= reg
->enc
;
536 UChar
*p
, *prev
, *end
;
540 if (sn
->end
<= sn
->s
)
544 ambig
= NSTRING_IS_AMBIG(node
);
547 prev_len
= enclen(enc
, p
, end
);
552 len
= enclen(enc
, p
, end
);
553 if (len
== prev_len
|| ambig
) {
557 r
= add_compile_string(prev
, prev_len
, blen
, reg
, ambig
);
567 return add_compile_string(prev
, prev_len
, blen
, reg
, ambig
);
571 compile_string_raw_node(StrNode
* sn
, regex_t
* reg
)
573 if (sn
->end
<= sn
->s
)
576 return add_compile_string(sn
->s
, 1 /* sb */, sn
->end
- sn
->s
, reg
, 0);
580 add_multi_byte_cclass(BBuf
* mbuf
, regex_t
* reg
)
582 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
583 add_length(reg
, mbuf
->used
);
584 return add_bytes(reg
, mbuf
->p
, mbuf
->used
);
587 UChar
* p
= BBUF_GET_ADD_ADDRESS(reg
) + SIZE_LENGTH
;
589 GET_ALIGNMENT_PAD_SIZE(p
, pad_size
);
590 add_length(reg
, mbuf
->used
+ (WORD_ALIGNMENT_SIZE
- 1));
591 if (pad_size
!= 0) add_bytes(reg
, PadBuf
, pad_size
);
593 r
= add_bytes(reg
, mbuf
->p
, mbuf
->used
);
595 /* padding for return value from compile_length_cclass_node() to be fix. */
596 pad_size
= (WORD_ALIGNMENT_SIZE
- 1) - pad_size
;
597 if (pad_size
!= 0) add_bytes(reg
, PadBuf
, pad_size
);
603 compile_length_cclass_node(CClassNode
* cc
, regex_t
* reg
)
607 if (IS_NULL(cc
->mbuf
)) {
608 len
= SIZE_OPCODE
+ SIZE_BITSET
;
611 if (ONIGENC_MBC_MINLEN(reg
->enc
) > 1 || bitset_is_empty(cc
->bs
)) {
615 len
= SIZE_OPCODE
+ SIZE_BITSET
;
617 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
618 len
+= SIZE_LENGTH
+ cc
->mbuf
->used
;
620 len
+= SIZE_LENGTH
+ cc
->mbuf
->used
+ (WORD_ALIGNMENT_SIZE
- 1);
628 compile_cclass_node(CClassNode
* cc
, regex_t
* reg
)
632 if (IS_NULL(cc
->mbuf
)) {
633 if (IS_NCCLASS_NOT(cc
))
634 add_opcode(reg
, OP_CCLASS_NOT
);
636 add_opcode(reg
, OP_CCLASS
);
638 r
= add_bitset(reg
, cc
->bs
);
641 if (ONIGENC_MBC_MINLEN(reg
->enc
) > 1 || bitset_is_empty(cc
->bs
)) {
642 if (IS_NCCLASS_NOT(cc
))
643 add_opcode(reg
, OP_CCLASS_MB_NOT
);
645 add_opcode(reg
, OP_CCLASS_MB
);
647 r
= add_multi_byte_cclass(cc
->mbuf
, reg
);
650 if (IS_NCCLASS_NOT(cc
))
651 add_opcode(reg
, OP_CCLASS_MIX_NOT
);
653 add_opcode(reg
, OP_CCLASS_MIX
);
655 r
= add_bitset(reg
, cc
->bs
);
657 r
= add_multi_byte_cclass(cc
->mbuf
, reg
);
665 entry_repeat_range(regex_t
* reg
, int id
, int lower
, int upper
)
667 #define REPEAT_RANGE_ALLOC 4
671 if (reg
->repeat_range_alloc
== 0) {
672 p
= (OnigRepeatRange
* )xmalloc(sizeof(OnigRepeatRange
) * REPEAT_RANGE_ALLOC
);
673 CHECK_NULL_RETURN_MEMERR(p
);
674 reg
->repeat_range
= p
;
675 reg
->repeat_range_alloc
= REPEAT_RANGE_ALLOC
;
677 else if (reg
->repeat_range_alloc
<= id
) {
679 n
= reg
->repeat_range_alloc
+ REPEAT_RANGE_ALLOC
;
680 p
= (OnigRepeatRange
* )xrealloc(reg
->repeat_range
,
681 sizeof(OnigRepeatRange
) * n
);
682 CHECK_NULL_RETURN_MEMERR(p
);
683 reg
->repeat_range
= p
;
684 reg
->repeat_range_alloc
= n
;
687 p
= reg
->repeat_range
;
691 p
[id
].upper
= (IS_REPEAT_INFINITE(upper
) ? 0x7fffffff : upper
);
696 compile_range_repeat_node(QtfrNode
* qn
, int target_len
, int empty_info
,
700 int num_repeat
= reg
->num_repeat
;
702 r
= add_opcode(reg
, qn
->greedy
? OP_REPEAT
: OP_REPEAT_NG
);
704 r
= add_mem_num(reg
, num_repeat
); /* OP_REPEAT ID */
707 r
= add_rel_addr(reg
, target_len
+ SIZE_OP_REPEAT_INC
);
710 r
= entry_repeat_range(reg
, num_repeat
, qn
->lower
, qn
->upper
);
713 r
= compile_tree_empty_check(qn
->target
, reg
, empty_info
);
717 #ifdef USE_SUBEXP_CALL
720 IS_QUANTIFIER_IN_REPEAT(qn
)) {
721 r
= add_opcode(reg
, qn
->greedy
? OP_REPEAT_INC_SG
: OP_REPEAT_INC_NG_SG
);
724 r
= add_opcode(reg
, qn
->greedy
? OP_REPEAT_INC
: OP_REPEAT_INC_NG
);
727 r
= add_mem_num(reg
, num_repeat
); /* OP_REPEAT ID */
732 is_anychar_star_quantifier(QtfrNode
* qn
)
734 if (qn
->greedy
&& IS_REPEAT_INFINITE(qn
->upper
) &&
735 NTYPE(qn
->target
) == NT_CANY
)
741 #define QUANTIFIER_EXPAND_LIMIT_SIZE 50
742 #define CKN_ON (ckn > 0)
744 #ifdef USE_COMBINATION_EXPLOSION_CHECK
747 compile_length_quantifier_node(QtfrNode
* qn
, regex_t
* reg
)
749 int len
, mod_tlen
, cklen
;
751 int infinite
= IS_REPEAT_INFINITE(qn
->upper
);
752 int empty_info
= qn
->target_empty_info
;
753 int tlen
= compile_length_tree(qn
->target
, reg
);
755 if (tlen
< 0) return tlen
;
757 ckn
= ((reg
->num_comb_exp_check
> 0) ? qn
->comb_exp_check_num
: 0);
759 cklen
= (CKN_ON
? SIZE_STATE_CHECK_NUM
: 0);
762 if (NTYPE(qn
->target
) == NT_CANY
) {
763 if (qn
->greedy
&& infinite
) {
764 if (IS_NOT_NULL(qn
->next_head_exact
) && !CKN_ON
)
765 return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT
+ tlen
* qn
->lower
+ cklen
;
767 return SIZE_OP_ANYCHAR_STAR
+ tlen
* qn
->lower
+ cklen
;
772 mod_tlen
= tlen
+ (SIZE_OP_NULL_CHECK_START
+ SIZE_OP_NULL_CHECK_END
);
776 if (infinite
&& qn
->lower
<= 1) {
783 len
+= SIZE_OP_PUSH
+ cklen
+ mod_tlen
+ SIZE_OP_JUMP
;
791 len
+= mod_tlen
+ SIZE_OP_PUSH
+ cklen
;
794 else if (qn
->upper
== 0) {
795 if (qn
->is_referred
!= 0) /* /(?<n>..){0}/ */
796 len
= SIZE_OP_JUMP
+ tlen
;
800 else if (qn
->upper
== 1 && qn
->greedy
) {
801 if (qn
->lower
== 0) {
803 len
= SIZE_OP_STATE_CHECK_PUSH
+ tlen
;
806 len
= SIZE_OP_PUSH
+ tlen
;
813 else if (!qn
->greedy
&& qn
->upper
== 1 && qn
->lower
== 0) { /* '??' */
814 len
= SIZE_OP_PUSH
+ cklen
+ SIZE_OP_JUMP
+ tlen
;
817 len
= SIZE_OP_REPEAT_INC
818 + mod_tlen
+ SIZE_OPCODE
+ SIZE_RELADDR
+ SIZE_MEMNUM
;
820 len
+= SIZE_OP_STATE_CHECK
;
827 compile_quantifier_node(QtfrNode
* qn
, regex_t
* reg
)
831 int infinite
= IS_REPEAT_INFINITE(qn
->upper
);
832 int empty_info
= qn
->target_empty_info
;
833 int tlen
= compile_length_tree(qn
->target
, reg
);
835 if (tlen
< 0) return tlen
;
837 ckn
= ((reg
->num_comb_exp_check
> 0) ? qn
->comb_exp_check_num
: 0);
839 if (is_anychar_star_quantifier(qn
)) {
840 r
= compile_tree_n_times(qn
->target
, qn
->lower
, reg
);
842 if (IS_NOT_NULL(qn
->next_head_exact
) && !CKN_ON
) {
843 if (IS_MULTILINE(reg
->options
))
844 r
= add_opcode(reg
, OP_ANYCHAR_ML_STAR_PEEK_NEXT
);
846 r
= add_opcode(reg
, OP_ANYCHAR_STAR_PEEK_NEXT
);
849 r
= add_state_check_num(reg
, ckn
);
853 return add_bytes(reg
, NSTR(qn
->next_head_exact
)->s
, 1);
856 if (IS_MULTILINE(reg
->options
)) {
857 r
= add_opcode(reg
, (CKN_ON
?
858 OP_STATE_CHECK_ANYCHAR_ML_STAR
859 : OP_ANYCHAR_ML_STAR
));
862 r
= add_opcode(reg
, (CKN_ON
?
863 OP_STATE_CHECK_ANYCHAR_STAR
868 r
= add_state_check_num(reg
, ckn
);
875 mod_tlen
= tlen
+ (SIZE_OP_NULL_CHECK_START
+ SIZE_OP_NULL_CHECK_END
);
879 if (infinite
&& qn
->lower
<= 1) {
881 if (qn
->lower
== 1) {
882 r
= add_opcode_rel_addr(reg
, OP_JUMP
,
883 (CKN_ON
? SIZE_OP_STATE_CHECK_PUSH
: SIZE_OP_PUSH
));
888 r
= add_opcode(reg
, OP_STATE_CHECK_PUSH
);
890 r
= add_state_check_num(reg
, ckn
);
892 r
= add_rel_addr(reg
, mod_tlen
+ SIZE_OP_JUMP
);
895 r
= add_opcode_rel_addr(reg
, OP_PUSH
, mod_tlen
+ SIZE_OP_JUMP
);
898 r
= compile_tree_empty_check(qn
->target
, reg
, empty_info
);
900 r
= add_opcode_rel_addr(reg
, OP_JUMP
,
901 -(mod_tlen
+ (int )SIZE_OP_JUMP
902 + (int )(CKN_ON
? SIZE_OP_STATE_CHECK_PUSH
: SIZE_OP_PUSH
)));
905 if (qn
->lower
== 0) {
906 r
= add_opcode_rel_addr(reg
, OP_JUMP
, mod_tlen
);
909 r
= compile_tree_empty_check(qn
->target
, reg
, empty_info
);
912 r
= add_opcode(reg
, OP_STATE_CHECK_PUSH_OR_JUMP
);
914 r
= add_state_check_num(reg
, ckn
);
916 r
= add_rel_addr(reg
,
917 -(mod_tlen
+ (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP
));
920 r
= add_opcode_rel_addr(reg
, OP_PUSH
, -(mod_tlen
+ (int )SIZE_OP_PUSH
));
923 else if (qn
->upper
== 0) {
924 if (qn
->is_referred
!= 0) { /* /(?<n>..){0}/ */
925 r
= add_opcode_rel_addr(reg
, OP_JUMP
, tlen
);
927 r
= compile_tree(qn
->target
, reg
);
932 else if (qn
->upper
== 1 && qn
->greedy
) {
933 if (qn
->lower
== 0) {
935 r
= add_opcode(reg
, OP_STATE_CHECK_PUSH
);
937 r
= add_state_check_num(reg
, ckn
);
939 r
= add_rel_addr(reg
, tlen
);
942 r
= add_opcode_rel_addr(reg
, OP_PUSH
, tlen
);
947 r
= compile_tree(qn
->target
, reg
);
949 else if (!qn
->greedy
&& qn
->upper
== 1 && qn
->lower
== 0) { /* '??' */
951 r
= add_opcode(reg
, OP_STATE_CHECK_PUSH
);
953 r
= add_state_check_num(reg
, ckn
);
955 r
= add_rel_addr(reg
, SIZE_OP_JUMP
);
958 r
= add_opcode_rel_addr(reg
, OP_PUSH
, SIZE_OP_JUMP
);
962 r
= add_opcode_rel_addr(reg
, OP_JUMP
, tlen
);
964 r
= compile_tree(qn
->target
, reg
);
967 r
= compile_range_repeat_node(qn
, mod_tlen
, empty_info
, reg
);
970 r
= add_opcode(reg
, OP_STATE_CHECK
);
972 r
= add_state_check_num(reg
, ckn
);
978 #else /* USE_COMBINATION_EXPLOSION_CHECK */
981 compile_length_quantifier_node(QtfrNode
* qn
, regex_t
* reg
)
984 int infinite
= IS_REPEAT_INFINITE(qn
->upper
);
985 int empty_info
= qn
->target_empty_info
;
986 int tlen
= compile_length_tree(qn
->target
, reg
);
988 if (tlen
< 0) return tlen
;
991 if (NTYPE(qn
->target
) == NT_CANY
) {
992 if (qn
->greedy
&& infinite
) {
993 if (IS_NOT_NULL(qn
->next_head_exact
))
994 return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT
+ tlen
* qn
->lower
;
996 return SIZE_OP_ANYCHAR_STAR
+ tlen
* qn
->lower
;
1000 if (empty_info
!= 0)
1001 mod_tlen
= tlen
+ (SIZE_OP_NULL_CHECK_START
+ SIZE_OP_NULL_CHECK_END
);
1006 (qn
->lower
<= 1 || tlen
* qn
->lower
<= QUANTIFIER_EXPAND_LIMIT_SIZE
)) {
1007 if (qn
->lower
== 1 && tlen
> QUANTIFIER_EXPAND_LIMIT_SIZE
) {
1011 len
= tlen
* qn
->lower
;
1015 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
1016 if (IS_NOT_NULL(qn
->head_exact
))
1017 len
+= SIZE_OP_PUSH_OR_JUMP_EXACT1
+ mod_tlen
+ SIZE_OP_JUMP
;
1020 if (IS_NOT_NULL(qn
->next_head_exact
))
1021 len
+= SIZE_OP_PUSH_IF_PEEK_NEXT
+ mod_tlen
+ SIZE_OP_JUMP
;
1023 len
+= SIZE_OP_PUSH
+ mod_tlen
+ SIZE_OP_JUMP
;
1026 len
+= SIZE_OP_JUMP
+ mod_tlen
+ SIZE_OP_PUSH
;
1028 else if (qn
->upper
== 0 && qn
->is_referred
!= 0) { /* /(?<n>..){0}/ */
1029 len
= SIZE_OP_JUMP
+ tlen
;
1031 else if (!infinite
&& qn
->greedy
&&
1032 (qn
->upper
== 1 || (tlen
+ SIZE_OP_PUSH
) * qn
->upper
1033 <= QUANTIFIER_EXPAND_LIMIT_SIZE
)) {
1034 len
= tlen
* qn
->lower
;
1035 len
+= (SIZE_OP_PUSH
+ tlen
) * (qn
->upper
- qn
->lower
);
1037 else if (!qn
->greedy
&& qn
->upper
== 1 && qn
->lower
== 0) { /* '??' */
1038 len
= SIZE_OP_PUSH
+ SIZE_OP_JUMP
+ tlen
;
1041 len
= SIZE_OP_REPEAT_INC
1042 + mod_tlen
+ SIZE_OPCODE
+ SIZE_RELADDR
+ SIZE_MEMNUM
;
1049 compile_quantifier_node(QtfrNode
* qn
, regex_t
* reg
)
1052 int infinite
= IS_REPEAT_INFINITE(qn
->upper
);
1053 int empty_info
= qn
->target_empty_info
;
1054 int tlen
= compile_length_tree(qn
->target
, reg
);
1056 if (tlen
< 0) return tlen
;
1058 if (is_anychar_star_quantifier(qn
)) {
1059 r
= compile_tree_n_times(qn
->target
, qn
->lower
, reg
);
1061 if (IS_NOT_NULL(qn
->next_head_exact
)) {
1062 if (IS_MULTILINE(reg
->options
))
1063 r
= add_opcode(reg
, OP_ANYCHAR_ML_STAR_PEEK_NEXT
);
1065 r
= add_opcode(reg
, OP_ANYCHAR_STAR_PEEK_NEXT
);
1067 return add_bytes(reg
, NSTR(qn
->next_head_exact
)->s
, 1);
1070 if (IS_MULTILINE(reg
->options
))
1071 return add_opcode(reg
, OP_ANYCHAR_ML_STAR
);
1073 return add_opcode(reg
, OP_ANYCHAR_STAR
);
1077 if (empty_info
!= 0)
1078 mod_tlen
= tlen
+ (SIZE_OP_NULL_CHECK_START
+ SIZE_OP_NULL_CHECK_END
);
1083 (qn
->lower
<= 1 || tlen
* qn
->lower
<= QUANTIFIER_EXPAND_LIMIT_SIZE
)) {
1084 if (qn
->lower
== 1 && tlen
> QUANTIFIER_EXPAND_LIMIT_SIZE
) {
1086 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
1087 if (IS_NOT_NULL(qn
->head_exact
))
1088 r
= add_opcode_rel_addr(reg
, OP_JUMP
, SIZE_OP_PUSH_OR_JUMP_EXACT1
);
1091 if (IS_NOT_NULL(qn
->next_head_exact
))
1092 r
= add_opcode_rel_addr(reg
, OP_JUMP
, SIZE_OP_PUSH_IF_PEEK_NEXT
);
1094 r
= add_opcode_rel_addr(reg
, OP_JUMP
, SIZE_OP_PUSH
);
1097 r
= add_opcode_rel_addr(reg
, OP_JUMP
, SIZE_OP_JUMP
);
1102 r
= compile_tree_n_times(qn
->target
, qn
->lower
, reg
);
1107 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
1108 if (IS_NOT_NULL(qn
->head_exact
)) {
1109 r
= add_opcode_rel_addr(reg
, OP_PUSH_OR_JUMP_EXACT1
,
1110 mod_tlen
+ SIZE_OP_JUMP
);
1112 add_bytes(reg
, NSTR(qn
->head_exact
)->s
, 1);
1113 r
= compile_tree_empty_check(qn
->target
, reg
, empty_info
);
1115 r
= add_opcode_rel_addr(reg
, OP_JUMP
,
1116 -(mod_tlen
+ (int )SIZE_OP_JUMP
+ (int )SIZE_OP_PUSH_OR_JUMP_EXACT1
));
1120 if (IS_NOT_NULL(qn
->next_head_exact
)) {
1121 r
= add_opcode_rel_addr(reg
, OP_PUSH_IF_PEEK_NEXT
,
1122 mod_tlen
+ SIZE_OP_JUMP
);
1124 add_bytes(reg
, NSTR(qn
->next_head_exact
)->s
, 1);
1125 r
= compile_tree_empty_check(qn
->target
, reg
, empty_info
);
1127 r
= add_opcode_rel_addr(reg
, OP_JUMP
,
1128 -(mod_tlen
+ (int )SIZE_OP_JUMP
+ (int )SIZE_OP_PUSH_IF_PEEK_NEXT
));
1131 r
= add_opcode_rel_addr(reg
, OP_PUSH
, mod_tlen
+ SIZE_OP_JUMP
);
1133 r
= compile_tree_empty_check(qn
->target
, reg
, empty_info
);
1135 r
= add_opcode_rel_addr(reg
, OP_JUMP
,
1136 -(mod_tlen
+ (int )SIZE_OP_JUMP
+ (int )SIZE_OP_PUSH
));
1140 r
= add_opcode_rel_addr(reg
, OP_JUMP
, mod_tlen
);
1142 r
= compile_tree_empty_check(qn
->target
, reg
, empty_info
);
1144 r
= add_opcode_rel_addr(reg
, OP_PUSH
, -(mod_tlen
+ (int )SIZE_OP_PUSH
));
1147 else if (qn
->upper
== 0 && qn
->is_referred
!= 0) { /* /(?<n>..){0}/ */
1148 r
= add_opcode_rel_addr(reg
, OP_JUMP
, tlen
);
1150 r
= compile_tree(qn
->target
, reg
);
1152 else if (!infinite
&& qn
->greedy
&&
1153 (qn
->upper
== 1 || (tlen
+ SIZE_OP_PUSH
) * qn
->upper
1154 <= QUANTIFIER_EXPAND_LIMIT_SIZE
)) {
1155 int n
= qn
->upper
- qn
->lower
;
1157 r
= compile_tree_n_times(qn
->target
, qn
->lower
, reg
);
1160 for (i
= 0; i
< n
; i
++) {
1161 r
= add_opcode_rel_addr(reg
, OP_PUSH
,
1162 (n
- i
) * tlen
+ (n
- i
- 1) * SIZE_OP_PUSH
);
1164 r
= compile_tree(qn
->target
, reg
);
1168 else if (!qn
->greedy
&& qn
->upper
== 1 && qn
->lower
== 0) { /* '??' */
1169 r
= add_opcode_rel_addr(reg
, OP_PUSH
, SIZE_OP_JUMP
);
1171 r
= add_opcode_rel_addr(reg
, OP_JUMP
, tlen
);
1173 r
= compile_tree(qn
->target
, reg
);
1176 r
= compile_range_repeat_node(qn
, mod_tlen
, empty_info
, reg
);
1180 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
1183 compile_length_option_node(EncloseNode
* node
, regex_t
* reg
)
1186 OnigOptionType prev
= reg
->options
;
1188 reg
->options
= node
->option
;
1189 tlen
= compile_length_tree(node
->target
, reg
);
1190 reg
->options
= prev
;
1192 if (tlen
< 0) return tlen
;
1194 if (IS_DYNAMIC_OPTION(prev
^ node
->option
)) {
1195 return SIZE_OP_SET_OPTION_PUSH
+ SIZE_OP_SET_OPTION
+ SIZE_OP_FAIL
1196 + tlen
+ SIZE_OP_SET_OPTION
;
1203 compile_option_node(EncloseNode
* node
, regex_t
* reg
)
1206 OnigOptionType prev
= reg
->options
;
1208 if (IS_DYNAMIC_OPTION(prev
^ node
->option
)) {
1209 r
= add_opcode_option(reg
, OP_SET_OPTION_PUSH
, node
->option
);
1211 r
= add_opcode_option(reg
, OP_SET_OPTION
, prev
);
1213 r
= add_opcode(reg
, OP_FAIL
);
1217 reg
->options
= node
->option
;
1218 r
= compile_tree(node
->target
, reg
);
1219 reg
->options
= prev
;
1221 if (IS_DYNAMIC_OPTION(prev
^ node
->option
)) {
1223 r
= add_opcode_option(reg
, OP_SET_OPTION
, prev
);
1229 compile_length_enclose_node(EncloseNode
* node
, regex_t
* reg
)
1234 if (node
->type
== ENCLOSE_OPTION
)
1235 return compile_length_option_node(node
, reg
);
1238 tlen
= compile_length_tree(node
->target
, reg
);
1239 if (tlen
< 0) return tlen
;
1244 switch (node
->type
) {
1245 case ENCLOSE_MEMORY
:
1246 #ifdef USE_SUBEXP_CALL
1247 if (IS_ENCLOSE_CALLED(node
)) {
1248 len
= SIZE_OP_MEMORY_START_PUSH
+ tlen
1249 + SIZE_OP_CALL
+ SIZE_OP_JUMP
+ SIZE_OP_RETURN
;
1250 if (BIT_STATUS_AT(reg
->bt_mem_end
, node
->regnum
))
1251 len
+= (IS_ENCLOSE_RECURSION(node
)
1252 ? SIZE_OP_MEMORY_END_PUSH_REC
: SIZE_OP_MEMORY_END_PUSH
);
1254 len
+= (IS_ENCLOSE_RECURSION(node
)
1255 ? SIZE_OP_MEMORY_END_REC
: SIZE_OP_MEMORY_END
);
1257 else if (IS_ENCLOSE_RECURSION(node
)) {
1258 len
= SIZE_OP_MEMORY_START_PUSH
;
1259 len
+= tlen
+ (BIT_STATUS_AT(reg
->bt_mem_end
, node
->regnum
)
1260 ? SIZE_OP_MEMORY_END_PUSH_REC
: SIZE_OP_MEMORY_END_REC
);
1265 if (BIT_STATUS_AT(reg
->bt_mem_start
, node
->regnum
))
1266 len
= SIZE_OP_MEMORY_START_PUSH
;
1268 len
= SIZE_OP_MEMORY_START
;
1270 len
+= tlen
+ (BIT_STATUS_AT(reg
->bt_mem_end
, node
->regnum
)
1271 ? SIZE_OP_MEMORY_END_PUSH
: SIZE_OP_MEMORY_END
);
1275 case ENCLOSE_STOP_BACKTRACK
:
1276 /* Disable POP_STOP_BT optimization for simple repeat under the match cache */
1277 /* optimization because the match cache optimization pushes an extra item to */
1278 /* the stack and it breaks the assumption for this optimization. */
1279 #ifndef USE_MATCH_CACHE
1280 if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node
)) {
1281 QtfrNode
* qn
= NQTFR(node
->target
);
1282 tlen
= compile_length_tree(qn
->target
, reg
);
1283 if (tlen
< 0) return tlen
;
1285 len
= tlen
* qn
->lower
1286 + SIZE_OP_PUSH
+ tlen
+ SIZE_OP_POP
+ SIZE_OP_JUMP
;
1290 len
= SIZE_OP_PUSH_STOP_BT
+ tlen
+ SIZE_OP_POP_STOP_BT
;
1291 #ifndef USE_MATCH_CACHE
1296 case ENCLOSE_CONDITION
:
1297 len
= SIZE_OP_CONDITION
;
1298 if (NTYPE(node
->target
) == NT_ALT
) {
1299 Node
* x
= node
->target
;
1301 tlen
= compile_length_tree(NCAR(x
), reg
); /* yes-node */
1302 if (tlen
< 0) return tlen
;
1303 len
+= tlen
+ SIZE_OP_JUMP
;
1304 if (NCDR(x
) == NULL
) return ONIGERR_PARSER_BUG
;
1306 tlen
= compile_length_tree(NCAR(x
), reg
); /* no-node */
1307 if (tlen
< 0) return tlen
;
1309 if (NCDR(x
) != NULL
) return ONIGERR_INVALID_CONDITION_PATTERN
;
1312 return ONIGERR_PARSER_BUG
;
1316 case ENCLOSE_ABSENT
:
1317 len
= SIZE_OP_PUSH_ABSENT_POS
+ SIZE_OP_ABSENT
+ tlen
+ SIZE_OP_ABSENT_END
;
1321 return ONIGERR_TYPE_BUG
;
1328 static int get_char_length_tree(Node
* node
, regex_t
* reg
, int* len
);
1331 compile_enclose_node(EncloseNode
* node
, regex_t
* reg
)
1335 if (node
->type
== ENCLOSE_OPTION
)
1336 return compile_option_node(node
, reg
);
1338 switch (node
->type
) {
1339 case ENCLOSE_MEMORY
:
1340 #ifdef USE_SUBEXP_CALL
1341 if (IS_ENCLOSE_CALLED(node
)) {
1342 r
= add_opcode(reg
, OP_CALL
);
1344 node
->call_addr
= BBUF_GET_OFFSET_POS(reg
) + SIZE_ABSADDR
+ SIZE_OP_JUMP
;
1345 node
->state
|= NST_ADDR_FIXED
;
1346 r
= add_abs_addr(reg
, (int )node
->call_addr
);
1348 len
= compile_length_tree(node
->target
, reg
);
1349 len
+= (SIZE_OP_MEMORY_START_PUSH
+ SIZE_OP_RETURN
);
1350 if (BIT_STATUS_AT(reg
->bt_mem_end
, node
->regnum
))
1351 len
+= (IS_ENCLOSE_RECURSION(node
)
1352 ? SIZE_OP_MEMORY_END_PUSH_REC
: SIZE_OP_MEMORY_END_PUSH
);
1354 len
+= (IS_ENCLOSE_RECURSION(node
)
1355 ? SIZE_OP_MEMORY_END_REC
: SIZE_OP_MEMORY_END
);
1357 r
= add_opcode_rel_addr(reg
, OP_JUMP
, len
);
1361 if (BIT_STATUS_AT(reg
->bt_mem_start
, node
->regnum
))
1362 r
= add_opcode(reg
, OP_MEMORY_START_PUSH
);
1364 r
= add_opcode(reg
, OP_MEMORY_START
);
1366 r
= add_mem_num(reg
, node
->regnum
);
1368 r
= compile_tree(node
->target
, reg
);
1370 #ifdef USE_SUBEXP_CALL
1371 if (IS_ENCLOSE_CALLED(node
)) {
1372 if (BIT_STATUS_AT(reg
->bt_mem_end
, node
->regnum
))
1373 r
= add_opcode(reg
, (IS_ENCLOSE_RECURSION(node
)
1374 ? OP_MEMORY_END_PUSH_REC
: OP_MEMORY_END_PUSH
));
1376 r
= add_opcode(reg
, (IS_ENCLOSE_RECURSION(node
)
1377 ? OP_MEMORY_END_REC
: OP_MEMORY_END
));
1380 r
= add_mem_num(reg
, node
->regnum
);
1382 r
= add_opcode(reg
, OP_RETURN
);
1384 else if (IS_ENCLOSE_RECURSION(node
)) {
1385 if (BIT_STATUS_AT(reg
->bt_mem_end
, node
->regnum
))
1386 r
= add_opcode(reg
, OP_MEMORY_END_PUSH_REC
);
1388 r
= add_opcode(reg
, OP_MEMORY_END_REC
);
1390 r
= add_mem_num(reg
, node
->regnum
);
1395 if (BIT_STATUS_AT(reg
->bt_mem_end
, node
->regnum
))
1396 r
= add_opcode(reg
, OP_MEMORY_END_PUSH
);
1398 r
= add_opcode(reg
, OP_MEMORY_END
);
1400 r
= add_mem_num(reg
, node
->regnum
);
1404 case ENCLOSE_STOP_BACKTRACK
:
1405 /* Disable POP_STOP_BT optimization for simple repeat under the match cache */
1406 /* optimization because the match cache optimization pushes an extra item to */
1407 /* the stack and it breaks the assumption for this optimization. */
1408 #ifndef USE_MATCH_CACHE
1409 if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node
)) {
1410 QtfrNode
* qn
= NQTFR(node
->target
);
1411 r
= compile_tree_n_times(qn
->target
, qn
->lower
, reg
);
1414 len
= compile_length_tree(qn
->target
, reg
);
1415 if (len
< 0) return len
;
1417 r
= add_opcode_rel_addr(reg
, OP_PUSH
, len
+ SIZE_OP_POP
+ SIZE_OP_JUMP
);
1419 r
= compile_tree(qn
->target
, reg
);
1421 r
= add_opcode(reg
, OP_POP
);
1423 r
= add_opcode_rel_addr(reg
, OP_JUMP
,
1424 -((int )SIZE_OP_PUSH
+ len
+ (int )SIZE_OP_POP
+ (int )SIZE_OP_JUMP
));
1428 r
= add_opcode(reg
, OP_PUSH_STOP_BT
);
1430 r
= compile_tree(node
->target
, reg
);
1432 r
= add_opcode(reg
, OP_POP_STOP_BT
);
1433 #ifndef USE_MATCH_CACHE
1438 case ENCLOSE_CONDITION
:
1439 r
= add_opcode(reg
, OP_CONDITION
);
1441 r
= add_mem_num(reg
, node
->regnum
);
1444 if (NTYPE(node
->target
) == NT_ALT
) {
1445 Node
* x
= node
->target
;
1448 len
= compile_length_tree(NCAR(x
), reg
); /* yes-node */
1449 if (len
< 0) return len
;
1450 if (NCDR(x
) == NULL
) return ONIGERR_PARSER_BUG
;
1452 len2
= compile_length_tree(NCAR(x
), reg
); /* no-node */
1453 if (len2
< 0) return len2
;
1454 if (NCDR(x
) != NULL
) return ONIGERR_INVALID_CONDITION_PATTERN
;
1457 r
= add_rel_addr(reg
, len
+ SIZE_OP_JUMP
);
1459 r
= compile_tree(NCAR(x
), reg
); /* yes-node */
1461 r
= add_opcode_rel_addr(reg
, OP_JUMP
, len2
);
1464 r
= compile_tree(NCAR(x
), reg
); /* no-node */
1467 return ONIGERR_PARSER_BUG
;
1471 case ENCLOSE_ABSENT
:
1472 len
= compile_length_tree(node
->target
, reg
);
1473 if (len
< 0) return len
;
1475 r
= add_opcode(reg
, OP_PUSH_ABSENT_POS
);
1477 r
= add_opcode_rel_addr(reg
, OP_ABSENT
, len
+ SIZE_OP_ABSENT_END
);
1479 r
= compile_tree(node
->target
, reg
);
1481 r
= add_opcode(reg
, OP_ABSENT_END
);
1485 return ONIGERR_TYPE_BUG
;
1493 compile_length_anchor_node(AnchorNode
* node
, regex_t
* reg
)
1499 tlen
= compile_length_tree(node
->target
, reg
);
1500 if (tlen
< 0) return tlen
;
1503 switch (node
->type
) {
1504 case ANCHOR_PREC_READ
:
1505 len
= SIZE_OP_PUSH_POS
+ tlen
+ SIZE_OP_POP_POS
;
1507 case ANCHOR_PREC_READ_NOT
:
1508 len
= SIZE_OP_PUSH_POS_NOT
+ tlen
+ SIZE_OP_FAIL_POS
;
1510 case ANCHOR_LOOK_BEHIND
:
1511 len
= SIZE_OP_LOOK_BEHIND
+ tlen
;
1513 case ANCHOR_LOOK_BEHIND_NOT
:
1514 len
= SIZE_OP_PUSH_LOOK_BEHIND_NOT
+ tlen
+ SIZE_OP_FAIL_LOOK_BEHIND_NOT
;
1526 compile_anchor_node(AnchorNode
* node
, regex_t
* reg
)
1530 switch (node
->type
) {
1531 case ANCHOR_BEGIN_BUF
: r
= add_opcode(reg
, OP_BEGIN_BUF
); break;
1532 case ANCHOR_END_BUF
: r
= add_opcode(reg
, OP_END_BUF
); break;
1533 case ANCHOR_BEGIN_LINE
: r
= add_opcode(reg
, OP_BEGIN_LINE
); break;
1534 case ANCHOR_END_LINE
: r
= add_opcode(reg
, OP_END_LINE
); break;
1535 case ANCHOR_SEMI_END_BUF
: r
= add_opcode(reg
, OP_SEMI_END_BUF
); break;
1536 case ANCHOR_BEGIN_POSITION
: r
= add_opcode(reg
, OP_BEGIN_POSITION
); break;
1538 case ANCHOR_WORD_BOUND
:
1539 if (node
->ascii_range
) r
= add_opcode(reg
, OP_ASCII_WORD_BOUND
);
1540 else r
= add_opcode(reg
, OP_WORD_BOUND
);
1542 case ANCHOR_NOT_WORD_BOUND
:
1543 if (node
->ascii_range
) r
= add_opcode(reg
, OP_NOT_ASCII_WORD_BOUND
);
1544 else r
= add_opcode(reg
, OP_NOT_WORD_BOUND
);
1546 #ifdef USE_WORD_BEGIN_END
1547 case ANCHOR_WORD_BEGIN
:
1548 if (node
->ascii_range
) r
= add_opcode(reg
, OP_ASCII_WORD_BEGIN
);
1549 else r
= add_opcode(reg
, OP_WORD_BEGIN
);
1551 case ANCHOR_WORD_END
:
1552 if (node
->ascii_range
) r
= add_opcode(reg
, OP_ASCII_WORD_END
);
1553 else r
= add_opcode(reg
, OP_WORD_END
);
1556 case ANCHOR_KEEP
: r
= add_opcode(reg
, OP_KEEP
); break;
1558 case ANCHOR_PREC_READ
:
1559 r
= add_opcode(reg
, OP_PUSH_POS
);
1561 r
= compile_tree(node
->target
, reg
);
1563 r
= add_opcode(reg
, OP_POP_POS
);
1566 case ANCHOR_PREC_READ_NOT
:
1567 len
= compile_length_tree(node
->target
, reg
);
1568 if (len
< 0) return len
;
1569 r
= add_opcode_rel_addr(reg
, OP_PUSH_POS_NOT
, len
+ SIZE_OP_FAIL_POS
);
1571 r
= compile_tree(node
->target
, reg
);
1573 r
= add_opcode(reg
, OP_FAIL_POS
);
1576 case ANCHOR_LOOK_BEHIND
:
1579 r
= add_opcode(reg
, OP_LOOK_BEHIND
);
1581 if (node
->char_len
< 0) {
1582 r
= get_char_length_tree(node
->target
, reg
, &n
);
1583 if (r
) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN
;
1587 r
= add_length(reg
, n
);
1589 r
= compile_tree(node
->target
, reg
);
1593 case ANCHOR_LOOK_BEHIND_NOT
:
1596 len
= compile_length_tree(node
->target
, reg
);
1597 r
= add_opcode_rel_addr(reg
, OP_PUSH_LOOK_BEHIND_NOT
,
1598 len
+ SIZE_OP_FAIL_LOOK_BEHIND_NOT
);
1600 if (node
->char_len
< 0) {
1601 r
= get_char_length_tree(node
->target
, reg
, &n
);
1602 if (r
) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN
;
1606 r
= add_length(reg
, n
);
1608 r
= compile_tree(node
->target
, reg
);
1610 r
= add_opcode(reg
, OP_FAIL_LOOK_BEHIND_NOT
);
1615 return ONIGERR_TYPE_BUG
;
1623 compile_length_tree(Node
* node
, regex_t
* reg
)
1632 r
= compile_length_tree(NCAR(node
), reg
);
1633 if (r
< 0) return r
;
1635 } while (IS_NOT_NULL(node
= NCDR(node
)));
1644 r
= compile_length_tree(NCAR(node
), reg
);
1645 if (r
< 0) return r
;
1648 } while (IS_NOT_NULL(node
= NCDR(node
)));
1650 r
+= (SIZE_OP_PUSH
+ SIZE_OP_JUMP
) * (n
- 1);
1655 if (NSTRING_IS_RAW(node
))
1656 r
= compile_length_string_raw_node(NSTR(node
), reg
);
1658 r
= compile_length_string_node(node
, reg
);
1662 r
= compile_length_cclass_node(NCCLASS(node
), reg
);
1672 BRefNode
* br
= NBREF(node
);
1674 #ifdef USE_BACKREF_WITH_LEVEL
1675 if (IS_BACKREF_NEST_LEVEL(br
)) {
1676 r
= SIZE_OPCODE
+ SIZE_OPTION
+ SIZE_LENGTH
+
1677 SIZE_LENGTH
+ (SIZE_MEMNUM
* br
->back_num
);
1681 if (br
->back_num
== 1) {
1682 r
= ((!IS_IGNORECASE(reg
->options
) && br
->back_static
[0] <= 2)
1683 ? SIZE_OPCODE
: (SIZE_OPCODE
+ SIZE_MEMNUM
));
1686 r
= SIZE_OPCODE
+ SIZE_LENGTH
+ (SIZE_MEMNUM
* br
->back_num
);
1691 #ifdef USE_SUBEXP_CALL
1698 r
= compile_length_quantifier_node(NQTFR(node
), reg
);
1702 r
= compile_length_enclose_node(NENCLOSE(node
), reg
);
1706 r
= compile_length_anchor_node(NANCHOR(node
), reg
);
1710 return ONIGERR_TYPE_BUG
;
1718 compile_tree(Node
* node
, regex_t
* reg
)
1720 int n
, type
, len
, pos
, r
= 0;
1726 r
= compile_tree(NCAR(node
), reg
);
1727 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
1735 len
+= compile_length_tree(NCAR(x
), reg
);
1736 if (NCDR(x
) != NULL
) {
1737 len
+= SIZE_OP_PUSH
+ SIZE_OP_JUMP
;
1739 } while (IS_NOT_NULL(x
= NCDR(x
)));
1740 pos
= reg
->used
+ len
; /* goal position */
1743 len
= compile_length_tree(NCAR(node
), reg
);
1744 if (IS_NOT_NULL(NCDR(node
))) {
1745 r
= add_opcode_rel_addr(reg
, OP_PUSH
, len
+ SIZE_OP_JUMP
);
1748 r
= compile_tree(NCAR(node
), reg
);
1750 if (IS_NOT_NULL(NCDR(node
))) {
1751 len
= pos
- (reg
->used
+ SIZE_OP_JUMP
);
1752 r
= add_opcode_rel_addr(reg
, OP_JUMP
, len
);
1755 } while (IS_NOT_NULL(node
= NCDR(node
)));
1760 if (NSTRING_IS_RAW(node
))
1761 r
= compile_string_raw_node(NSTR(node
), reg
);
1763 r
= compile_string_node(node
, reg
);
1767 r
= compile_cclass_node(NCCLASS(node
), reg
);
1774 switch (NCTYPE(node
)->ctype
) {
1775 case ONIGENC_CTYPE_WORD
:
1776 if (NCTYPE(node
)->ascii_range
!= 0) {
1777 if (NCTYPE(node
)->not != 0) op
= OP_NOT_ASCII_WORD
;
1778 else op
= OP_ASCII_WORD
;
1781 if (NCTYPE(node
)->not != 0) op
= OP_NOT_WORD
;
1786 return ONIGERR_TYPE_BUG
;
1789 r
= add_opcode(reg
, op
);
1794 if (IS_MULTILINE(reg
->options
))
1795 r
= add_opcode(reg
, OP_ANYCHAR_ML
);
1797 r
= add_opcode(reg
, OP_ANYCHAR
);
1802 BRefNode
* br
= NBREF(node
);
1804 #ifdef USE_BACKREF_WITH_LEVEL
1805 if (IS_BACKREF_NEST_LEVEL(br
)) {
1806 r
= add_opcode(reg
, OP_BACKREF_WITH_LEVEL
);
1808 r
= add_option(reg
, (reg
->options
& ONIG_OPTION_IGNORECASE
));
1810 r
= add_length(reg
, br
->nest_level
);
1813 goto add_bacref_mems
;
1817 if (br
->back_num
== 1) {
1818 n
= br
->back_static
[0];
1819 if (IS_IGNORECASE(reg
->options
)) {
1820 r
= add_opcode(reg
, OP_BACKREFN_IC
);
1822 r
= add_mem_num(reg
, n
);
1826 case 1: r
= add_opcode(reg
, OP_BACKREF1
); break;
1827 case 2: r
= add_opcode(reg
, OP_BACKREF2
); break;
1829 r
= add_opcode(reg
, OP_BACKREFN
);
1831 r
= add_mem_num(reg
, n
);
1840 if (IS_IGNORECASE(reg
->options
)) {
1841 r
= add_opcode(reg
, OP_BACKREF_MULTI_IC
);
1844 r
= add_opcode(reg
, OP_BACKREF_MULTI
);
1848 #ifdef USE_BACKREF_WITH_LEVEL
1851 r
= add_length(reg
, br
->back_num
);
1854 for (i
= br
->back_num
- 1; i
>= 0; i
--) {
1855 r
= add_mem_num(reg
, p
[i
]);
1862 #ifdef USE_SUBEXP_CALL
1864 r
= compile_call(NCALL(node
), reg
);
1869 r
= compile_quantifier_node(NQTFR(node
), reg
);
1873 r
= compile_enclose_node(NENCLOSE(node
), reg
);
1877 r
= compile_anchor_node(NANCHOR(node
), reg
);
1882 fprintf(stderr
, "compile_tree: undefined node type %d\n", NTYPE(node
));
1890 #ifdef USE_NAMED_GROUP
1893 noname_disable_map(Node
** plink
, GroupNumRemap
* map
, int* counter
)
1896 Node
* node
= *plink
;
1898 switch (NTYPE(node
)) {
1902 r
= noname_disable_map(&(NCAR(node
)), map
, counter
);
1903 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
1908 Node
** ptarget
= &(NQTFR(node
)->target
);
1909 Node
* old
= *ptarget
;
1910 r
= noname_disable_map(ptarget
, map
, counter
);
1911 if (*ptarget
!= old
&& NTYPE(*ptarget
) == NT_QTFR
) {
1912 onig_reduce_nested_quantifier(node
, *ptarget
);
1919 EncloseNode
* en
= NENCLOSE(node
);
1920 if (en
->type
== ENCLOSE_MEMORY
) {
1921 if (IS_ENCLOSE_NAMED_GROUP(en
)) {
1923 map
[en
->regnum
].new_val
= *counter
;
1924 en
->regnum
= *counter
;
1926 else if (en
->regnum
!= 0) {
1927 *plink
= en
->target
;
1928 en
->target
= NULL_NODE
;
1929 onig_node_free(node
);
1930 r
= noname_disable_map(plink
, map
, counter
);
1934 r
= noname_disable_map(&(en
->target
), map
, counter
);
1939 if (NANCHOR(node
)->target
)
1940 r
= noname_disable_map(&(NANCHOR(node
)->target
), map
, counter
);
1951 renumber_node_backref(Node
* node
, GroupNumRemap
* map
, const int num_mem
)
1953 int i
, pos
, n
, old_num
;
1955 BRefNode
* bn
= NBREF(node
);
1957 if (! IS_BACKREF_NAME_REF(bn
))
1958 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED
;
1960 old_num
= bn
->back_num
;
1961 if (IS_NULL(bn
->back_dynamic
))
1962 backs
= bn
->back_static
;
1964 backs
= bn
->back_dynamic
;
1966 for (i
= 0, pos
= 0; i
< old_num
; i
++) {
1967 if (backs
[i
] > num_mem
) return ONIGERR_INVALID_BACKREF
;
1968 n
= map
[backs
[i
]].new_val
;
1980 renumber_by_map(Node
* node
, GroupNumRemap
* map
, const int num_mem
)
1984 switch (NTYPE(node
)) {
1988 r
= renumber_by_map(NCAR(node
), map
, num_mem
);
1989 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
1992 r
= renumber_by_map(NQTFR(node
)->target
, map
, num_mem
);
1996 EncloseNode
* en
= NENCLOSE(node
);
1997 if (en
->type
== ENCLOSE_CONDITION
) {
1998 if (en
->regnum
> num_mem
) return ONIGERR_INVALID_BACKREF
;
1999 en
->regnum
= map
[en
->regnum
].new_val
;
2001 r
= renumber_by_map(en
->target
, map
, num_mem
);
2006 r
= renumber_node_backref(node
, map
, num_mem
);
2010 if (NANCHOR(node
)->target
)
2011 r
= renumber_by_map(NANCHOR(node
)->target
, map
, num_mem
);
2022 numbered_ref_check(Node
* node
)
2026 switch (NTYPE(node
)) {
2030 r
= numbered_ref_check(NCAR(node
));
2031 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
2034 r
= numbered_ref_check(NQTFR(node
)->target
);
2037 r
= numbered_ref_check(NENCLOSE(node
)->target
);
2041 if (! IS_BACKREF_NAME_REF(NBREF(node
)))
2042 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED
;
2046 if (NANCHOR(node
)->target
)
2047 r
= numbered_ref_check(NANCHOR(node
)->target
);
2058 disable_noname_group_capture(Node
** root
, regex_t
* reg
, ScanEnv
* env
)
2060 int r
, i
, pos
, counter
;
2064 map
= (GroupNumRemap
* )xalloca(sizeof(GroupNumRemap
) * (env
->num_mem
+ 1));
2065 CHECK_NULL_RETURN_MEMERR(map
);
2066 for (i
= 1; i
<= env
->num_mem
; i
++) {
2070 r
= noname_disable_map(root
, map
, &counter
);
2071 if (r
!= 0) return r
;
2073 r
= renumber_by_map(*root
, map
, env
->num_mem
);
2074 if (r
!= 0) return r
;
2076 for (i
= 1, pos
= 1; i
<= env
->num_mem
; i
++) {
2077 if (map
[i
].new_val
> 0) {
2078 SCANENV_MEM_NODES(env
)[pos
] = SCANENV_MEM_NODES(env
)[i
];
2083 loc
= env
->capture_history
;
2084 BIT_STATUS_CLEAR(env
->capture_history
);
2085 for (i
= 1; i
<= ONIG_MAX_CAPTURE_HISTORY_GROUP
; i
++) {
2086 if (BIT_STATUS_AT(loc
, i
)) {
2087 BIT_STATUS_ON_AT_SIMPLE(env
->capture_history
, map
[i
].new_val
);
2091 env
->num_mem
= env
->num_named
;
2092 reg
->num_mem
= env
->num_named
;
2094 return onig_renumber_name_table(reg
, map
);
2096 #endif /* USE_NAMED_GROUP */
2098 #ifdef USE_SUBEXP_CALL
2100 unset_addr_list_fix(UnsetAddrList
* uslist
, regex_t
* reg
)
2106 for (i
= 0; i
< uslist
->num
; i
++) {
2107 en
= NENCLOSE(uslist
->us
[i
].target
);
2108 if (! IS_ENCLOSE_ADDR_FIXED(en
)) return ONIGERR_PARSER_BUG
;
2109 addr
= en
->call_addr
;
2110 offset
= uslist
->us
[i
].offset
;
2112 BBUF_WRITE(reg
, offset
, &addr
, SIZE_ABSADDR
);
2118 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
2120 quantifiers_memory_node_info(Node
* node
)
2124 switch (NTYPE(node
)) {
2130 v
= quantifiers_memory_node_info(NCAR(node
));
2132 } while (v
>= 0 && IS_NOT_NULL(node
= NCDR(node
)));
2136 # ifdef USE_SUBEXP_CALL
2138 if (IS_CALL_RECURSION(NCALL(node
))) {
2139 return NQ_TARGET_IS_EMPTY_REC
; /* tiny version */
2142 r
= quantifiers_memory_node_info(NCALL(node
)->target
);
2148 QtfrNode
* qn
= NQTFR(node
);
2149 if (qn
->upper
!= 0) {
2150 r
= quantifiers_memory_node_info(qn
->target
);
2157 EncloseNode
* en
= NENCLOSE(node
);
2159 case ENCLOSE_MEMORY
:
2160 return NQ_TARGET_IS_EMPTY_MEM
;
2163 case ENCLOSE_OPTION
:
2164 case ENCLOSE_STOP_BACKTRACK
:
2165 case ENCLOSE_CONDITION
:
2166 case ENCLOSE_ABSENT
:
2167 r
= quantifiers_memory_node_info(en
->target
);
2187 #endif /* USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT */
2190 get_min_match_length(Node
* node
, OnigDistance
*min
, ScanEnv
* env
)
2196 switch (NTYPE(node
)) {
2201 Node
** nodes
= SCANENV_MEM_NODES(env
);
2202 BRefNode
* br
= NBREF(node
);
2203 if (br
->state
& NST_RECURSION
) break;
2205 backs
= BACKREFS_P(br
);
2206 if (backs
[0] > env
->num_mem
) return ONIGERR_INVALID_BACKREF
;
2207 r
= get_min_match_length(nodes
[backs
[0]], min
, env
);
2209 for (i
= 1; i
< br
->back_num
; i
++) {
2210 if (backs
[i
] > env
->num_mem
) return ONIGERR_INVALID_BACKREF
;
2211 r
= get_min_match_length(nodes
[backs
[i
]], &tmin
, env
);
2213 if (*min
> tmin
) *min
= tmin
;
2218 #ifdef USE_SUBEXP_CALL
2220 if (IS_CALL_RECURSION(NCALL(node
))) {
2221 EncloseNode
* en
= NENCLOSE(NCALL(node
)->target
);
2222 if (IS_ENCLOSE_MIN_FIXED(en
))
2226 r
= get_min_match_length(NCALL(node
)->target
, min
, env
);
2232 r
= get_min_match_length(NCAR(node
), &tmin
, env
);
2233 if (r
== 0) *min
+= tmin
;
2234 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
2243 r
= get_min_match_length(x
, &tmin
, env
);
2245 if (y
== node
) *min
= tmin
;
2246 else if (*min
> tmin
) *min
= tmin
;
2247 } while (r
== 0 && IS_NOT_NULL(y
= NCDR(y
)));
2253 StrNode
* sn
= NSTR(node
);
2254 *min
= sn
->end
- sn
->s
;
2269 QtfrNode
* qn
= NQTFR(node
);
2271 if (qn
->lower
> 0) {
2272 r
= get_min_match_length(qn
->target
, min
, env
);
2274 *min
= distance_multiply(*min
, qn
->lower
);
2281 EncloseNode
* en
= NENCLOSE(node
);
2283 case ENCLOSE_MEMORY
:
2284 if (IS_ENCLOSE_MIN_FIXED(en
))
2287 if (IS_ENCLOSE_MARK1(NENCLOSE(node
)))
2288 *min
= 0; /* recursive */
2290 SET_ENCLOSE_STATUS(node
, NST_MARK1
);
2291 r
= get_min_match_length(en
->target
, min
, env
);
2292 CLEAR_ENCLOSE_STATUS(node
, NST_MARK1
);
2295 SET_ENCLOSE_STATUS(node
, NST_MIN_FIXED
);
2301 case ENCLOSE_OPTION
:
2302 case ENCLOSE_STOP_BACKTRACK
:
2303 case ENCLOSE_CONDITION
:
2304 r
= get_min_match_length(en
->target
, min
, env
);
2307 case ENCLOSE_ABSENT
:
2322 get_max_match_length(Node
* node
, OnigDistance
*max
, ScanEnv
* env
)
2328 switch (NTYPE(node
)) {
2331 r
= get_max_match_length(NCAR(node
), &tmax
, env
);
2333 *max
= distance_add(*max
, tmax
);
2334 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
2339 r
= get_max_match_length(NCAR(node
), &tmax
, env
);
2340 if (r
== 0 && *max
< tmax
) *max
= tmax
;
2341 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
2346 StrNode
* sn
= NSTR(node
);
2347 *max
= sn
->end
- sn
->s
;
2352 *max
= ONIGENC_MBC_MAXLEN_DIST(env
->enc
);
2357 *max
= ONIGENC_MBC_MAXLEN_DIST(env
->enc
);
2364 Node
** nodes
= SCANENV_MEM_NODES(env
);
2365 BRefNode
* br
= NBREF(node
);
2366 if (br
->state
& NST_RECURSION
) {
2367 *max
= ONIG_INFINITE_DISTANCE
;
2370 backs
= BACKREFS_P(br
);
2371 for (i
= 0; i
< br
->back_num
; i
++) {
2372 if (backs
[i
] > env
->num_mem
) return ONIGERR_INVALID_BACKREF
;
2373 r
= get_max_match_length(nodes
[backs
[i
]], &tmax
, env
);
2375 if (*max
< tmax
) *max
= tmax
;
2380 #ifdef USE_SUBEXP_CALL
2382 if (! IS_CALL_RECURSION(NCALL(node
)))
2383 r
= get_max_match_length(NCALL(node
)->target
, max
, env
);
2385 *max
= ONIG_INFINITE_DISTANCE
;
2391 QtfrNode
* qn
= NQTFR(node
);
2393 if (qn
->upper
!= 0) {
2394 r
= get_max_match_length(qn
->target
, max
, env
);
2395 if (r
== 0 && *max
!= 0) {
2396 if (! IS_REPEAT_INFINITE(qn
->upper
))
2397 *max
= distance_multiply(*max
, qn
->upper
);
2399 *max
= ONIG_INFINITE_DISTANCE
;
2407 EncloseNode
* en
= NENCLOSE(node
);
2409 case ENCLOSE_MEMORY
:
2410 if (IS_ENCLOSE_MAX_FIXED(en
))
2413 if (IS_ENCLOSE_MARK1(NENCLOSE(node
)))
2414 *max
= ONIG_INFINITE_DISTANCE
;
2416 SET_ENCLOSE_STATUS(node
, NST_MARK1
);
2417 r
= get_max_match_length(en
->target
, max
, env
);
2418 CLEAR_ENCLOSE_STATUS(node
, NST_MARK1
);
2421 SET_ENCLOSE_STATUS(node
, NST_MAX_FIXED
);
2427 case ENCLOSE_OPTION
:
2428 case ENCLOSE_STOP_BACKTRACK
:
2429 case ENCLOSE_CONDITION
:
2430 r
= get_max_match_length(en
->target
, max
, env
);
2433 case ENCLOSE_ABSENT
:
2447 #define GET_CHAR_LEN_VARLEN -1
2448 #define GET_CHAR_LEN_TOP_ALT_VARLEN -2
2450 /* fixed size pattern node only */
2452 get_char_length_tree1(Node
* node
, regex_t
* reg
, int* len
, int level
)
2459 switch (NTYPE(node
)) {
2462 r
= get_char_length_tree1(NCAR(node
), reg
, &tlen
, level
);
2464 *len
= (int )distance_add(*len
, tlen
);
2465 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
2473 r
= get_char_length_tree1(NCAR(node
), reg
, &tlen
, level
);
2474 while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
))) {
2475 r
= get_char_length_tree1(NCAR(node
), reg
, &tlen2
, level
);
2484 r
= GET_CHAR_LEN_TOP_ALT_VARLEN
;
2486 r
= GET_CHAR_LEN_VARLEN
;
2496 StrNode
* sn
= NSTR(node
);
2498 while (s
< sn
->end
) {
2499 s
+= enclen(reg
->enc
, s
, sn
->end
);
2507 QtfrNode
* qn
= NQTFR(node
);
2508 if (qn
->lower
== qn
->upper
) {
2509 r
= get_char_length_tree1(qn
->target
, reg
, &tlen
, level
);
2511 *len
= (int )distance_multiply(tlen
, qn
->lower
);
2514 r
= GET_CHAR_LEN_VARLEN
;
2518 #ifdef USE_SUBEXP_CALL
2520 if (! IS_CALL_RECURSION(NCALL(node
)))
2521 r
= get_char_length_tree1(NCALL(node
)->target
, reg
, len
, level
);
2523 r
= GET_CHAR_LEN_VARLEN
;
2538 EncloseNode
* en
= NENCLOSE(node
);
2540 case ENCLOSE_MEMORY
:
2541 #ifdef USE_SUBEXP_CALL
2542 if (IS_ENCLOSE_CLEN_FIXED(en
))
2543 *len
= en
->char_len
;
2545 r
= get_char_length_tree1(en
->target
, reg
, len
, level
);
2547 en
->char_len
= *len
;
2548 SET_ENCLOSE_STATUS(node
, NST_CLEN_FIXED
);
2553 case ENCLOSE_OPTION
:
2554 case ENCLOSE_STOP_BACKTRACK
:
2555 case ENCLOSE_CONDITION
:
2556 r
= get_char_length_tree1(en
->target
, reg
, len
, level
);
2558 case ENCLOSE_ABSENT
:
2569 r
= GET_CHAR_LEN_VARLEN
;
2577 get_char_length_tree(Node
* node
, regex_t
* reg
, int* len
)
2579 return get_char_length_tree1(node
, reg
, len
, 0);
2582 /* x is not included y ==> 1 : 0 */
2584 is_not_included(Node
* x
, Node
* y
, regex_t
* reg
)
2599 if (NCTYPE(y
)->ctype
== NCTYPE(x
)->ctype
&&
2600 NCTYPE(y
)->not != NCTYPE(x
)->not &&
2601 NCTYPE(y
)->ascii_range
== NCTYPE(x
)->ascii_range
)
2611 tmp
= x
; x
= y
; y
= tmp
;
2628 CClassNode
* xc
= NCCLASS(x
);
2631 switch (NCTYPE(y
)->ctype
) {
2632 case ONIGENC_CTYPE_WORD
:
2633 if (NCTYPE(y
)->not == 0) {
2634 if (IS_NULL(xc
->mbuf
) && !IS_NCCLASS_NOT(xc
)) {
2635 for (i
= 0; i
< SINGLE_BYTE_SIZE
; i
++) {
2636 if (BITSET_AT(xc
->bs
, i
)) {
2637 if (NCTYPE(y
)->ascii_range
) {
2638 if (IS_CODE_SB_WORD(reg
->enc
, i
)) return 0;
2641 if (ONIGENC_IS_CODE_WORD(reg
->enc
, i
)) return 0;
2650 if (IS_NOT_NULL(xc
->mbuf
)) return 0;
2651 for (i
= 0; i
< SINGLE_BYTE_SIZE
; i
++) {
2653 if (NCTYPE(y
)->ascii_range
)
2654 is_word
= IS_CODE_SB_WORD(reg
->enc
, i
);
2656 is_word
= ONIGENC_IS_CODE_WORD(reg
->enc
, i
);
2658 if (!IS_NCCLASS_NOT(xc
)) {
2659 if (BITSET_AT(xc
->bs
, i
))
2663 if (! BITSET_AT(xc
->bs
, i
))
2680 CClassNode
* yc
= NCCLASS(y
);
2682 for (i
= 0; i
< SINGLE_BYTE_SIZE
; i
++) {
2683 v
= BITSET_AT(xc
->bs
, i
);
2684 if ((v
!= 0 && !IS_NCCLASS_NOT(xc
)) ||
2685 (v
== 0 && IS_NCCLASS_NOT(xc
))) {
2686 v
= BITSET_AT(yc
->bs
, i
);
2687 if ((v
!= 0 && !IS_NCCLASS_NOT(yc
)) ||
2688 (v
== 0 && IS_NCCLASS_NOT(yc
)))
2692 if ((IS_NULL(xc
->mbuf
) && !IS_NCCLASS_NOT(xc
)) ||
2693 (IS_NULL(yc
->mbuf
) && !IS_NCCLASS_NOT(yc
)))
2711 StrNode
* xs
= NSTR(x
);
2712 if (NSTRING_LEN(x
) == 0)
2717 switch (NCTYPE(y
)->ctype
) {
2718 case ONIGENC_CTYPE_WORD
:
2719 if (NCTYPE(y
)->ascii_range
) {
2720 if (ONIGENC_IS_MBC_ASCII_WORD(reg
->enc
, xs
->s
, xs
->end
))
2721 return NCTYPE(y
)->not;
2723 return !(NCTYPE(y
)->not);
2726 if (ONIGENC_IS_MBC_WORD(reg
->enc
, xs
->s
, xs
->end
))
2727 return NCTYPE(y
)->not;
2729 return !(NCTYPE(y
)->not);
2739 CClassNode
* cc
= NCCLASS(y
);
2741 code
= ONIGENC_MBC_TO_CODE(reg
->enc
, xs
->s
,
2742 xs
->s
+ ONIGENC_MBC_MAXLEN(reg
->enc
));
2743 return (onig_is_code_in_cc(reg
->enc
, code
, cc
) != 0 ? 0 : 1);
2750 StrNode
* ys
= NSTR(y
);
2751 len
= NSTRING_LEN(x
);
2752 if (len
> NSTRING_LEN(y
)) len
= NSTRING_LEN(y
);
2753 if (NSTRING_IS_AMBIG(x
) || NSTRING_IS_AMBIG(y
)) {
2758 for (i
= 0, p
= ys
->s
, q
= xs
->s
; (OnigDistance
)i
< len
; i
++, p
++, q
++) {
2759 if (*p
!= *q
) return 1;
2779 get_head_value_node(Node
* node
, int exact
, regex_t
* reg
)
2781 Node
* n
= NULL_NODE
;
2783 switch (NTYPE(node
)) {
2787 #ifdef USE_SUBEXP_CALL
2800 n
= get_head_value_node(NCAR(node
), exact
, reg
);
2805 StrNode
* sn
= NSTR(node
);
2807 if (sn
->end
<= sn
->s
)
2811 !NSTRING_IS_RAW(node
) && IS_IGNORECASE(reg
->options
)) {
2821 QtfrNode
* qn
= NQTFR(node
);
2822 if (qn
->lower
> 0) {
2823 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
2824 if (IS_NOT_NULL(qn
->head_exact
))
2828 n
= get_head_value_node(qn
->target
, exact
, reg
);
2835 EncloseNode
* en
= NENCLOSE(node
);
2837 case ENCLOSE_OPTION
:
2839 OnigOptionType options
= reg
->options
;
2841 reg
->options
= NENCLOSE(node
)->option
;
2842 n
= get_head_value_node(NENCLOSE(node
)->target
, exact
, reg
);
2843 reg
->options
= options
;
2847 case ENCLOSE_MEMORY
:
2848 case ENCLOSE_STOP_BACKTRACK
:
2849 case ENCLOSE_CONDITION
:
2850 n
= get_head_value_node(en
->target
, exact
, reg
);
2853 case ENCLOSE_ABSENT
:
2860 if (NANCHOR(node
)->type
== ANCHOR_PREC_READ
)
2861 n
= get_head_value_node(NANCHOR(node
)->target
, exact
, reg
);
2872 check_type_tree(Node
* node
, int type_mask
, int enclose_mask
, int anchor_mask
)
2877 if ((NTYPE2BIT(type
) & type_mask
) == 0)
2884 r
= check_type_tree(NCAR(node
), type_mask
, enclose_mask
,
2886 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
2890 r
= check_type_tree(NQTFR(node
)->target
, type_mask
, enclose_mask
,
2896 EncloseNode
* en
= NENCLOSE(node
);
2897 if ((en
->type
& enclose_mask
) == 0)
2900 r
= check_type_tree(en
->target
, type_mask
, enclose_mask
, anchor_mask
);
2905 type
= NANCHOR(node
)->type
;
2906 if ((type
& anchor_mask
) == 0)
2909 if (NANCHOR(node
)->target
)
2910 r
= check_type_tree(NANCHOR(node
)->target
,
2911 type_mask
, enclose_mask
, anchor_mask
);
2920 #ifdef USE_SUBEXP_CALL
2922 # define RECURSION_EXIST 1
2923 # define RECURSION_INFINITE 2
2926 subexp_inf_recursive_check(Node
* node
, ScanEnv
* env
, int head
)
2941 ret
= subexp_inf_recursive_check(NCAR(x
), env
, head
);
2942 if (ret
< 0 || ret
== RECURSION_INFINITE
) return ret
;
2945 ret
= get_min_match_length(NCAR(x
), &min
, env
);
2946 if (ret
!= 0) return ret
;
2947 if (min
!= 0) head
= 0;
2949 } while (IS_NOT_NULL(x
= NCDR(x
)));
2956 r
= RECURSION_EXIST
;
2958 ret
= subexp_inf_recursive_check(NCAR(node
), env
, head
);
2959 if (ret
< 0 || ret
== RECURSION_INFINITE
) return ret
;
2961 } while (IS_NOT_NULL(node
= NCDR(node
)));
2966 r
= subexp_inf_recursive_check(NQTFR(node
)->target
, env
, head
);
2967 if (r
== RECURSION_EXIST
) {
2968 if (NQTFR(node
)->lower
== 0) r
= 0;
2974 AnchorNode
* an
= NANCHOR(node
);
2976 case ANCHOR_PREC_READ
:
2977 case ANCHOR_PREC_READ_NOT
:
2978 case ANCHOR_LOOK_BEHIND
:
2979 case ANCHOR_LOOK_BEHIND_NOT
:
2980 r
= subexp_inf_recursive_check(an
->target
, env
, head
);
2987 r
= subexp_inf_recursive_check(NCALL(node
)->target
, env
, head
);
2991 if (IS_ENCLOSE_MARK2(NENCLOSE(node
)))
2993 else if (IS_ENCLOSE_MARK1(NENCLOSE(node
)))
2994 return (head
== 0 ? RECURSION_EXIST
: RECURSION_INFINITE
);
2996 SET_ENCLOSE_STATUS(node
, NST_MARK2
);
2997 r
= subexp_inf_recursive_check(NENCLOSE(node
)->target
, env
, head
);
2998 CLEAR_ENCLOSE_STATUS(node
, NST_MARK2
);
3010 subexp_inf_recursive_check_trav(Node
* node
, ScanEnv
* env
)
3020 r
= subexp_inf_recursive_check_trav(NCAR(node
), env
);
3021 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
3025 r
= subexp_inf_recursive_check_trav(NQTFR(node
)->target
, env
);
3030 AnchorNode
* an
= NANCHOR(node
);
3032 case ANCHOR_PREC_READ
:
3033 case ANCHOR_PREC_READ_NOT
:
3034 case ANCHOR_LOOK_BEHIND
:
3035 case ANCHOR_LOOK_BEHIND_NOT
:
3036 r
= subexp_inf_recursive_check_trav(an
->target
, env
);
3044 EncloseNode
* en
= NENCLOSE(node
);
3046 if (IS_ENCLOSE_RECURSION(en
)) {
3047 SET_ENCLOSE_STATUS(node
, NST_MARK1
);
3048 r
= subexp_inf_recursive_check(en
->target
, env
, 1);
3049 if (r
> 0) return ONIGERR_NEVER_ENDING_RECURSION
;
3050 CLEAR_ENCLOSE_STATUS(node
, NST_MARK1
);
3052 r
= subexp_inf_recursive_check_trav(en
->target
, env
);
3065 subexp_recursive_check(Node
* node
)
3069 switch (NTYPE(node
)) {
3073 r
|= subexp_recursive_check(NCAR(node
));
3074 } while (IS_NOT_NULL(node
= NCDR(node
)));
3078 r
= subexp_recursive_check(NQTFR(node
)->target
);
3083 AnchorNode
* an
= NANCHOR(node
);
3085 case ANCHOR_PREC_READ
:
3086 case ANCHOR_PREC_READ_NOT
:
3087 case ANCHOR_LOOK_BEHIND
:
3088 case ANCHOR_LOOK_BEHIND_NOT
:
3089 r
= subexp_recursive_check(an
->target
);
3096 r
= subexp_recursive_check(NCALL(node
)->target
);
3097 if (r
!= 0) SET_CALL_RECURSION(node
);
3101 if (IS_ENCLOSE_MARK2(NENCLOSE(node
)))
3103 else if (IS_ENCLOSE_MARK1(NENCLOSE(node
)))
3104 return 1; /* recursion */
3106 SET_ENCLOSE_STATUS(node
, NST_MARK2
);
3107 r
= subexp_recursive_check(NENCLOSE(node
)->target
);
3108 CLEAR_ENCLOSE_STATUS(node
, NST_MARK2
);
3121 subexp_recursive_check_trav(Node
* node
, ScanEnv
* env
)
3123 # define FOUND_CALLED_NODE 1
3135 ret
= subexp_recursive_check_trav(NCAR(node
), env
);
3136 if (ret
== FOUND_CALLED_NODE
) r
= FOUND_CALLED_NODE
;
3137 else if (ret
< 0) return ret
;
3138 } while (IS_NOT_NULL(node
= NCDR(node
)));
3143 r
= subexp_recursive_check_trav(NQTFR(node
)->target
, env
);
3144 if (NQTFR(node
)->upper
== 0) {
3145 if (r
== FOUND_CALLED_NODE
)
3146 NQTFR(node
)->is_referred
= 1;
3152 AnchorNode
* an
= NANCHOR(node
);
3154 case ANCHOR_PREC_READ
:
3155 case ANCHOR_PREC_READ_NOT
:
3156 case ANCHOR_LOOK_BEHIND
:
3157 case ANCHOR_LOOK_BEHIND_NOT
:
3158 r
= subexp_recursive_check_trav(an
->target
, env
);
3166 EncloseNode
* en
= NENCLOSE(node
);
3168 if (! IS_ENCLOSE_RECURSION(en
)) {
3169 if (IS_ENCLOSE_CALLED(en
)) {
3170 SET_ENCLOSE_STATUS(node
, NST_MARK1
);
3171 r
= subexp_recursive_check(en
->target
);
3172 if (r
!= 0) SET_ENCLOSE_STATUS(node
, NST_RECURSION
);
3173 CLEAR_ENCLOSE_STATUS(node
, NST_MARK1
);
3176 r
= subexp_recursive_check_trav(en
->target
, env
);
3177 if (IS_ENCLOSE_CALLED(en
))
3178 r
|= FOUND_CALLED_NODE
;
3190 setup_subexp_call(Node
* node
, ScanEnv
* env
)
3199 r
= setup_subexp_call(NCAR(node
), env
);
3200 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
3205 r
= setup_subexp_call(NCAR(node
), env
);
3206 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
3210 r
= setup_subexp_call(NQTFR(node
)->target
, env
);
3213 r
= setup_subexp_call(NENCLOSE(node
)->target
, env
);
3218 CallNode
* cn
= NCALL(node
);
3219 Node
** nodes
= SCANENV_MEM_NODES(env
);
3221 if (cn
->group_num
!= 0) {
3222 int gnum
= cn
->group_num
;
3224 # ifdef USE_NAMED_GROUP
3225 if (env
->num_named
> 0 &&
3226 IS_SYNTAX_BV(env
->syntax
, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP
) &&
3227 !ONIG_IS_OPTION_ON(env
->option
, ONIG_OPTION_CAPTURE_GROUP
)) {
3228 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED
;
3231 if (gnum
> env
->num_mem
) {
3232 onig_scan_env_set_error_string(env
,
3233 ONIGERR_UNDEFINED_GROUP_REFERENCE
, cn
->name
, cn
->name_end
);
3234 return ONIGERR_UNDEFINED_GROUP_REFERENCE
;
3237 # ifdef USE_NAMED_GROUP
3240 cn
->target
= nodes
[cn
->group_num
];
3241 if (IS_NULL(cn
->target
)) {
3242 onig_scan_env_set_error_string(env
,
3243 ONIGERR_UNDEFINED_NAME_REFERENCE
, cn
->name
, cn
->name_end
);
3244 return ONIGERR_UNDEFINED_NAME_REFERENCE
;
3246 SET_ENCLOSE_STATUS(cn
->target
, NST_CALLED
);
3247 BIT_STATUS_ON_AT(env
->bt_mem_start
, cn
->group_num
);
3248 cn
->unset_addr_list
= env
->unset_addr_list
;
3250 # ifdef USE_NAMED_GROUP
3251 # ifdef USE_PERL_SUBEXP_CALL
3252 else if (cn
->name
== cn
->name_end
) {
3259 int n
= onig_name_to_group_numbers(env
->reg
, cn
->name
, cn
->name_end
,
3262 onig_scan_env_set_error_string(env
,
3263 ONIGERR_UNDEFINED_NAME_REFERENCE
, cn
->name
, cn
->name_end
);
3264 return ONIGERR_UNDEFINED_NAME_REFERENCE
;
3267 ! IS_SYNTAX_BV(env
->syntax
, ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME_CALL
)) {
3268 onig_scan_env_set_error_string(env
,
3269 ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL
, cn
->name
, cn
->name_end
);
3270 return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL
;
3273 cn
->group_num
= refs
[0];
3283 AnchorNode
* an
= NANCHOR(node
);
3286 case ANCHOR_PREC_READ
:
3287 case ANCHOR_PREC_READ_NOT
:
3288 case ANCHOR_LOOK_BEHIND
:
3289 case ANCHOR_LOOK_BEHIND_NOT
:
3290 r
= setup_subexp_call(an
->target
, env
);
3304 /* divide different length alternatives in look-behind.
3305 (?<=A|B) ==> (?<=A)|(?<=B)
3306 (?<!A|B) ==> (?<!A)(?<!B)
3309 divide_look_behind_alternatives(Node
* node
)
3311 Node
*head
, *np
, *insert_node
;
3312 AnchorNode
* an
= NANCHOR(node
);
3313 int anc_type
= an
->type
;
3317 swap_node(node
, head
);
3319 NANCHOR(head
)->target
= np
;
3322 while ((np
= NCDR(np
)) != NULL_NODE
) {
3323 insert_node
= onig_node_new_anchor(anc_type
);
3324 CHECK_NULL_RETURN_MEMERR(insert_node
);
3325 NANCHOR(insert_node
)->target
= NCAR(np
);
3326 NCAR(np
) = insert_node
;
3329 if (anc_type
== ANCHOR_LOOK_BEHIND_NOT
) {
3332 SET_NTYPE(np
, NT_LIST
); /* alt -> list */
3333 } while ((np
= NCDR(np
)) != NULL_NODE
);
3339 setup_look_behind(Node
* node
, regex_t
* reg
, ScanEnv
* env
)
3342 AnchorNode
* an
= NANCHOR(node
);
3344 r
= get_char_length_tree(an
->target
, reg
, &len
);
3347 else if (r
== GET_CHAR_LEN_VARLEN
)
3348 r
= ONIGERR_INVALID_LOOK_BEHIND_PATTERN
;
3349 else if (r
== GET_CHAR_LEN_TOP_ALT_VARLEN
) {
3350 if (IS_SYNTAX_BV(env
->syntax
, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND
))
3351 r
= divide_look_behind_alternatives(node
);
3353 r
= ONIGERR_INVALID_LOOK_BEHIND_PATTERN
;
3360 next_setup(Node
* node
, Node
* next_node
, regex_t
* reg
)
3366 if (type
== NT_QTFR
) {
3367 QtfrNode
* qn
= NQTFR(node
);
3368 if (qn
->greedy
&& IS_REPEAT_INFINITE(qn
->upper
)) {
3369 #ifdef USE_QTFR_PEEK_NEXT
3370 Node
* n
= get_head_value_node(next_node
, 1, reg
);
3371 /* '\0': for UTF-16BE etc... */
3372 if (IS_NOT_NULL(n
) && NSTR(n
)->s
[0] != '\0') {
3373 qn
->next_head_exact
= n
;
3376 /* automatic possessification a*b ==> (?>a*)b */
3377 if (qn
->lower
<= 1) {
3378 int ttype
= NTYPE(qn
->target
);
3379 if (IS_NODE_TYPE_SIMPLE(ttype
)) {
3381 x
= get_head_value_node(qn
->target
, 0, reg
);
3382 if (IS_NOT_NULL(x
)) {
3383 y
= get_head_value_node(next_node
, 0, reg
);
3384 if (IS_NOT_NULL(y
) && is_not_included(x
, y
, reg
)) {
3385 Node
* en
= onig_node_new_enclose(ENCLOSE_STOP_BACKTRACK
);
3386 CHECK_NULL_RETURN_MEMERR(en
);
3387 SET_ENCLOSE_STATUS(en
, NST_STOP_BT_SIMPLE_REPEAT
);
3388 swap_node(node
, en
);
3389 NENCLOSE(node
)->target
= en
;
3396 else if (type
== NT_ENCLOSE
) {
3397 EncloseNode
* en
= NENCLOSE(node
);
3398 if (en
->type
== ENCLOSE_MEMORY
&& !IS_ENCLOSE_CALLED(en
)) {
3408 update_string_node_case_fold(regex_t
* reg
, Node
*node
)
3410 UChar
*p
, *end
, buf
[ONIGENC_MBC_CASE_FOLD_MAXLEN
];
3411 UChar
*sbuf
, *ebuf
, *sp
;
3413 OnigDistance sbuf_size
;
3414 StrNode
* sn
= NSTR(node
);
3417 sbuf_size
= (end
- sn
->s
) * 2;
3418 sbuf
= (UChar
* )xmalloc(sbuf_size
);
3419 CHECK_NULL_RETURN_MEMERR(sbuf
);
3420 ebuf
= sbuf
+ sbuf_size
;
3425 len
= ONIGENC_MBC_CASE_FOLD(reg
->enc
, reg
->case_fold_flag
, &p
, end
, buf
);
3426 for (i
= 0; i
< len
; i
++) {
3428 UChar
* p
= (UChar
* )xrealloc(sbuf
, sbuf_size
* 2);
3431 return ONIGERR_MEMORY
;
3434 sp
= sbuf
+ sbuf_size
;
3436 ebuf
= sbuf
+ sbuf_size
;
3443 r
= onig_node_str_set(node
, sbuf
, sp
);
3450 expand_case_fold_make_rem_string(Node
** rnode
, UChar
*s
, UChar
*end
,
3456 node
= onig_node_new_str(s
, end
);
3457 if (IS_NULL(node
)) return ONIGERR_MEMORY
;
3459 r
= update_string_node_case_fold(reg
, node
);
3461 onig_node_free(node
);
3465 NSTRING_SET_AMBIG(node
);
3466 NSTRING_SET_DONT_GET_OPT_INFO(node
);
3472 is_case_fold_variable_len(int item_num
, OnigCaseFoldCodeItem items
[],
3477 for (i
= 0; i
< item_num
; i
++) {
3478 if (items
[i
].byte_len
!= slen
) {
3481 if (items
[i
].code_len
!= 1) {
3489 expand_case_fold_string_alt(int item_num
, OnigCaseFoldCodeItem items
[],
3490 UChar
*p
, int slen
, UChar
*end
,
3491 regex_t
* reg
, Node
**rnode
)
3493 int r
, i
, j
, len
, varlen
;
3494 Node
*anode
, *var_anode
, *snode
, *xnode
, *an
;
3495 UChar buf
[ONIGENC_CODE_TO_MBC_MAXLEN
];
3497 *rnode
= var_anode
= NULL_NODE
;
3500 for (i
= 0; i
< item_num
; i
++) {
3501 if (items
[i
].byte_len
!= slen
) {
3508 *rnode
= var_anode
= onig_node_new_alt(NULL_NODE
, NULL_NODE
);
3509 if (IS_NULL(var_anode
)) return ONIGERR_MEMORY
;
3511 xnode
= onig_node_new_list(NULL
, NULL
);
3512 if (IS_NULL(xnode
)) goto mem_err
;
3513 NCAR(var_anode
) = xnode
;
3515 anode
= onig_node_new_alt(NULL_NODE
, NULL_NODE
);
3516 if (IS_NULL(anode
)) goto mem_err
;
3517 NCAR(xnode
) = anode
;
3520 *rnode
= anode
= onig_node_new_alt(NULL_NODE
, NULL_NODE
);
3521 if (IS_NULL(anode
)) return ONIGERR_MEMORY
;
3524 snode
= onig_node_new_str(p
, p
+ slen
);
3525 if (IS_NULL(snode
)) goto mem_err
;
3527 NCAR(anode
) = snode
;
3529 for (i
= 0; i
< item_num
; i
++) {
3530 snode
= onig_node_new_str(NULL
, NULL
);
3531 if (IS_NULL(snode
)) goto mem_err
;
3533 for (j
= 0; j
< items
[i
].code_len
; j
++) {
3534 len
= ONIGENC_CODE_TO_MBC(reg
->enc
, items
[i
].code
[j
], buf
);
3540 r
= onig_node_str_cat(snode
, buf
, buf
+ len
);
3541 if (r
!= 0) goto mem_err2
;
3544 an
= onig_node_new_alt(NULL_NODE
, NULL_NODE
);
3549 if (items
[i
].byte_len
!= slen
) {
3551 UChar
*q
= p
+ items
[i
].byte_len
;
3554 r
= expand_case_fold_make_rem_string(&rem
, q
, end
, reg
);
3560 xnode
= onig_node_list_add(NULL_NODE
, snode
);
3561 if (IS_NULL(xnode
)) {
3563 onig_node_free(rem
);
3566 if (IS_NULL(onig_node_list_add(xnode
, rem
))) {
3568 onig_node_free(xnode
);
3569 onig_node_free(rem
);
3579 NCDR(var_anode
) = an
;
3592 onig_node_free(snode
);
3595 onig_node_free(*rnode
);
3597 return ONIGERR_MEMORY
;
3601 expand_case_fold_string(Node
* node
, regex_t
* reg
)
3603 #define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION 8
3605 int r
, n
, len
, alt_num
;
3607 UChar
*start
, *end
, *p
;
3608 Node
*top_root
, *root
, *snode
, *prev_node
;
3609 OnigCaseFoldCodeItem items
[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM
];
3610 StrNode
* sn
= NSTR(node
);
3612 if (NSTRING_IS_AMBIG(node
)) return 0;
3616 if (start
>= end
) return 0;
3619 top_root
= root
= prev_node
= snode
= NULL_NODE
;
3623 n
= ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg
->enc
, reg
->case_fold_flag
,
3630 len
= enclen(reg
->enc
, p
, end
);
3632 varlen
= is_case_fold_variable_len(n
, items
, len
);
3633 if (n
== 0 || varlen
== 0) {
3634 if (IS_NULL(snode
)) {
3635 if (IS_NULL(root
) && IS_NOT_NULL(prev_node
)) {
3636 onig_node_free(top_root
);
3637 top_root
= root
= onig_node_list_add(NULL_NODE
, prev_node
);
3638 if (IS_NULL(root
)) {
3639 onig_node_free(prev_node
);
3644 prev_node
= snode
= onig_node_new_str(NULL
, NULL
);
3645 if (IS_NULL(snode
)) goto mem_err
;
3646 if (IS_NOT_NULL(root
)) {
3647 if (IS_NULL(onig_node_list_add(root
, snode
))) {
3648 onig_node_free(snode
);
3654 r
= onig_node_str_cat(snode
, p
, p
+ len
);
3655 if (r
!= 0) goto err
;
3659 if (alt_num
> THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION
) break;
3661 if (IS_NOT_NULL(snode
)) {
3662 r
= update_string_node_case_fold(reg
, snode
);
3664 NSTRING_SET_AMBIG(snode
);
3667 if (IS_NULL(root
) && IS_NOT_NULL(prev_node
)) {
3668 onig_node_free(top_root
);
3669 top_root
= root
= onig_node_list_add(NULL_NODE
, prev_node
);
3670 if (IS_NULL(root
)) {
3671 onig_node_free(prev_node
);
3676 r
= expand_case_fold_string_alt(n
, items
, p
, len
, end
, reg
, &prev_node
);
3677 if (r
< 0) goto mem_err
;
3679 if (IS_NULL(root
)) {
3680 top_root
= prev_node
;
3683 if (IS_NULL(onig_node_list_add(root
, prev_node
))) {
3684 onig_node_free(prev_node
);
3689 root
= NCAR(prev_node
);
3692 if (IS_NOT_NULL(root
)) {
3693 if (IS_NULL(onig_node_list_add(root
, prev_node
))) {
3694 onig_node_free(prev_node
);
3705 if (IS_NOT_NULL(snode
)) {
3706 r
= update_string_node_case_fold(reg
, snode
);
3708 NSTRING_SET_AMBIG(snode
);
3715 r
= expand_case_fold_make_rem_string(&srem
, p
, end
, reg
);
3716 if (r
!= 0) goto mem_err
;
3718 if (IS_NOT_NULL(prev_node
) && IS_NULL(root
)) {
3719 onig_node_free(top_root
);
3720 top_root
= root
= onig_node_list_add(NULL_NODE
, prev_node
);
3721 if (IS_NULL(root
)) {
3722 onig_node_free(srem
);
3723 onig_node_free(prev_node
);
3728 if (IS_NULL(root
)) {
3732 if (IS_NULL(onig_node_list_add(root
, srem
))) {
3733 onig_node_free(srem
);
3740 top_root
= (IS_NOT_NULL(top_root
) ? top_root
: prev_node
);
3741 swap_node(node
, top_root
);
3742 onig_node_free(top_root
);
3749 onig_node_free(top_root
);
3754 #ifdef USE_COMBINATION_EXPLOSION_CHECK
3756 # define CEC_THRES_NUM_BIG_REPEAT 512
3757 # define CEC_INFINITE_NUM 0x7fffffff
3759 # define CEC_IN_INFINITE_REPEAT (1<<0)
3760 # define CEC_IN_FINITE_REPEAT (1<<1)
3761 # define CEC_CONT_BIG_REPEAT (1<<2)
3764 setup_comb_exp_check(Node
* node
, int state
, ScanEnv
* env
)
3774 r
= setup_comb_exp_check(NCAR(node
), r
, env
);
3775 } while (r
>= 0 && IS_NOT_NULL(node
= NCDR(node
)));
3783 ret
= setup_comb_exp_check(NCAR(node
), state
, env
);
3785 } while (ret
>= 0 && IS_NOT_NULL(node
= NCDR(node
)));
3791 int child_state
= state
;
3793 QtfrNode
* qn
= NQTFR(node
);
3794 Node
* target
= qn
->target
;
3797 if (! IS_REPEAT_INFINITE(qn
->upper
)) {
3798 if (qn
->upper
> 1) {
3799 /* {0,1}, {1,1} are allowed */
3800 child_state
|= CEC_IN_FINITE_REPEAT
;
3802 /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */
3803 if (env
->backrefed_mem
== 0) {
3804 if (NTYPE(qn
->target
) == NT_ENCLOSE
) {
3805 EncloseNode
* en
= NENCLOSE(qn
->target
);
3806 if (en
->type
== ENCLOSE_MEMORY
) {
3807 if (NTYPE(en
->target
) == NT_QTFR
) {
3808 QtfrNode
* q
= NQTFR(en
->target
);
3809 if (IS_REPEAT_INFINITE(q
->upper
)
3810 && q
->greedy
== qn
->greedy
) {
3811 qn
->upper
= (qn
->lower
== 0 ? 1 : qn
->lower
);
3813 child_state
= state
;
3822 if (state
& CEC_IN_FINITE_REPEAT
) {
3823 qn
->comb_exp_check_num
= -1;
3826 if (IS_REPEAT_INFINITE(qn
->upper
)) {
3827 var_num
= CEC_INFINITE_NUM
;
3828 child_state
|= CEC_IN_INFINITE_REPEAT
;
3831 var_num
= qn
->upper
- qn
->lower
;
3834 if (var_num
>= CEC_THRES_NUM_BIG_REPEAT
)
3835 add_state
|= CEC_CONT_BIG_REPEAT
;
3837 if (((state
& CEC_IN_INFINITE_REPEAT
) != 0 && var_num
!= 0) ||
3838 ((state
& CEC_CONT_BIG_REPEAT
) != 0 &&
3839 var_num
>= CEC_THRES_NUM_BIG_REPEAT
)) {
3840 if (qn
->comb_exp_check_num
== 0) {
3841 env
->num_comb_exp_check
++;
3842 qn
->comb_exp_check_num
= env
->num_comb_exp_check
;
3843 if (env
->curr_max_regnum
> env
->comb_exp_max_regnum
)
3844 env
->comb_exp_max_regnum
= env
->curr_max_regnum
;
3849 r
= setup_comb_exp_check(target
, child_state
, env
);
3856 EncloseNode
* en
= NENCLOSE(node
);
3859 case ENCLOSE_MEMORY
:
3861 if (env
->curr_max_regnum
< en
->regnum
)
3862 env
->curr_max_regnum
= en
->regnum
;
3864 r
= setup_comb_exp_check(en
->target
, state
, env
);
3869 r
= setup_comb_exp_check(en
->target
, state
, env
);
3875 # ifdef USE_SUBEXP_CALL
3877 if (IS_CALL_RECURSION(NCALL(node
)))
3878 env
->has_recursion
= 1;
3880 r
= setup_comb_exp_check(NCALL(node
)->target
, state
, env
);
3892 #define IN_ALT (1<<0)
3893 #define IN_NOT (1<<1)
3894 #define IN_REPEAT (1<<2)
3895 #define IN_VAR_REPEAT (1<<3)
3896 #define IN_CALL (1<<4)
3897 #define IN_RECCALL (1<<5)
3899 /* setup_tree does the following work.
3900 1. check empty loop. (set qn->target_empty_info)
3901 2. expand ignore-case in char class.
3902 3. set memory status bit flags. (reg->mem_stats)
3903 4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact].
3904 5. find invalid patterns in look-behind.
3905 6. expand repeated string.
3908 setup_tree(Node
* node
, regex_t
* reg
, int state
, ScanEnv
* env
)
3918 Node
* prev
= NULL_NODE
;
3920 r
= setup_tree(NCAR(node
), reg
, state
, env
);
3921 if (IS_NOT_NULL(prev
) && r
== 0) {
3922 r
= next_setup(prev
, NCAR(node
), reg
);
3925 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
3931 r
= setup_tree(NCAR(node
), reg
, (state
| IN_ALT
), env
);
3932 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
3939 if (IS_IGNORECASE(reg
->options
) && !NSTRING_IS_RAW(node
)) {
3940 r
= expand_case_fold_string(node
, reg
);
3948 #ifdef USE_SUBEXP_CALL
3957 Node
** nodes
= SCANENV_MEM_NODES(env
);
3958 BRefNode
* br
= NBREF(node
);
3960 for (i
= 0; i
< br
->back_num
; i
++) {
3961 if (p
[i
] > env
->num_mem
) return ONIGERR_INVALID_BACKREF
;
3962 BIT_STATUS_ON_AT(env
->backrefed_mem
, p
[i
]);
3963 BIT_STATUS_ON_AT(env
->bt_mem_start
, p
[i
]);
3964 #ifdef USE_BACKREF_WITH_LEVEL
3965 if (IS_BACKREF_NEST_LEVEL(br
)) {
3966 BIT_STATUS_ON_AT(env
->bt_mem_end
, p
[i
]);
3969 SET_ENCLOSE_STATUS(nodes
[p
[i
]], NST_MEM_BACKREFED
);
3977 QtfrNode
* qn
= NQTFR(node
);
3978 Node
* target
= qn
->target
;
3980 if ((state
& IN_REPEAT
) != 0) {
3981 qn
->state
|= NST_IN_REPEAT
;
3984 if (IS_REPEAT_INFINITE(qn
->upper
) || qn
->upper
>= 1) {
3985 r
= get_min_match_length(target
, &d
, env
);
3988 qn
->target_empty_info
= NQ_TARGET_IS_EMPTY
;
3989 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
3990 r
= quantifiers_memory_node_info(target
);
3993 qn
->target_empty_info
= r
;
3997 r
= get_max_match_length(target
, &d
, env
);
3998 if (r
== 0 && d
== 0) {
3999 /* ()* ==> ()?, ()+ ==> () */
4001 if (qn
->lower
> 1) qn
->lower
= 1;
4002 if (NTYPE(target
) == NT_STR
) {
4003 qn
->upper
= qn
->lower
= 0; /* /(?:)+/ ==> // */
4011 if (qn
->lower
!= qn
->upper
)
4012 state
|= IN_VAR_REPEAT
;
4013 r
= setup_tree(target
, reg
, state
, env
);
4017 #define EXPAND_STRING_MAX_LENGTH 100
4018 if (NTYPE(target
) == NT_STR
) {
4019 if (qn
->lower
> 1) {
4020 int i
, n
= qn
->lower
;
4021 OnigDistance len
= NSTRING_LEN(target
);
4022 StrNode
* sn
= NSTR(target
);
4025 np
= onig_node_new_str(sn
->s
, sn
->end
);
4026 if (IS_NULL(np
)) return ONIGERR_MEMORY
;
4027 NSTR(np
)->flag
= sn
->flag
;
4029 for (i
= 1; i
< n
&& (i
+1) * len
<= EXPAND_STRING_MAX_LENGTH
; i
++) {
4030 r
= onig_node_str_cat(np
, sn
->s
, sn
->end
);
4036 if (i
< qn
->upper
|| IS_REPEAT_INFINITE(qn
->upper
)) {
4040 if (! IS_REPEAT_INFINITE(qn
->upper
))
4043 np1
= onig_node_new_list(np
, NULL
);
4046 return ONIGERR_MEMORY
;
4048 swap_node(np1
, node
);
4049 np2
= onig_node_list_add(node
, np1
);
4051 onig_node_free(np1
);
4052 return ONIGERR_MEMORY
;
4056 swap_node(np
, node
);
4059 break; /* break case NT_QTFR: */
4063 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
4064 if (qn
->greedy
&& (qn
->target_empty_info
!= 0)) {
4065 if (NTYPE(target
) == NT_QTFR
) {
4066 QtfrNode
* tqn
= NQTFR(target
);
4067 if (IS_NOT_NULL(tqn
->head_exact
)) {
4068 qn
->head_exact
= tqn
->head_exact
;
4069 tqn
->head_exact
= NULL
;
4073 qn
->head_exact
= get_head_value_node(qn
->target
, 1, reg
);
4082 EncloseNode
* en
= NENCLOSE(node
);
4085 case ENCLOSE_OPTION
:
4087 OnigOptionType options
= reg
->options
;
4088 reg
->options
= NENCLOSE(node
)->option
;
4089 r
= setup_tree(NENCLOSE(node
)->target
, reg
, state
, env
);
4090 reg
->options
= options
;
4094 case ENCLOSE_MEMORY
:
4095 if ((state
& (IN_ALT
| IN_NOT
| IN_VAR_REPEAT
| IN_CALL
)) != 0) {
4096 BIT_STATUS_ON_AT(env
->bt_mem_start
, en
->regnum
);
4097 /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */
4099 if (IS_ENCLOSE_CALLED(en
))
4101 if (IS_ENCLOSE_RECURSION(en
))
4102 state
|= IN_RECCALL
;
4103 else if ((state
& IN_RECCALL
) != 0)
4104 SET_CALL_RECURSION(node
);
4105 r
= setup_tree(en
->target
, reg
, state
, env
);
4108 case ENCLOSE_STOP_BACKTRACK
:
4110 Node
* target
= en
->target
;
4111 r
= setup_tree(target
, reg
, state
, env
);
4112 if (NTYPE(target
) == NT_QTFR
) {
4113 QtfrNode
* tqn
= NQTFR(target
);
4114 if (IS_REPEAT_INFINITE(tqn
->upper
) && tqn
->lower
<= 1 &&
4115 tqn
->greedy
!= 0) { /* (?>a*), a*+ etc... */
4116 int qtype
= NTYPE(tqn
->target
);
4117 if (IS_NODE_TYPE_SIMPLE(qtype
))
4118 SET_ENCLOSE_STATUS(node
, NST_STOP_BT_SIMPLE_REPEAT
);
4124 case ENCLOSE_CONDITION
:
4125 #ifdef USE_NAMED_GROUP
4126 if (! IS_ENCLOSE_NAME_REF(NENCLOSE(node
)) &&
4127 env
->num_named
> 0 &&
4128 IS_SYNTAX_BV(env
->syntax
, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP
) &&
4129 !ONIG_IS_OPTION_ON(env
->option
, ONIG_OPTION_CAPTURE_GROUP
)) {
4130 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED
;
4133 if (NENCLOSE(node
)->regnum
> env
->num_mem
)
4134 return ONIGERR_INVALID_BACKREF
;
4135 r
= setup_tree(NENCLOSE(node
)->target
, reg
, state
, env
);
4138 case ENCLOSE_ABSENT
:
4139 r
= setup_tree(NENCLOSE(node
)->target
, reg
, state
, env
);
4147 AnchorNode
* an
= NANCHOR(node
);
4150 case ANCHOR_PREC_READ
:
4151 r
= setup_tree(an
->target
, reg
, state
, env
);
4153 case ANCHOR_PREC_READ_NOT
:
4154 r
= setup_tree(an
->target
, reg
, (state
| IN_NOT
), env
);
4157 /* allowed node types in look-behind */
4158 #define ALLOWED_TYPE_IN_LB \
4159 ( BIT_NT_LIST | BIT_NT_ALT | BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE | \
4160 BIT_NT_CANY | BIT_NT_ANCHOR | BIT_NT_ENCLOSE | BIT_NT_QTFR | BIT_NT_CALL )
4162 #define ALLOWED_ENCLOSE_IN_LB ( ENCLOSE_MEMORY | ENCLOSE_OPTION )
4163 #define ALLOWED_ENCLOSE_IN_LB_NOT ENCLOSE_OPTION
4165 #define ALLOWED_ANCHOR_IN_LB \
4166 ( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | \
4167 ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_KEEP | \
4168 ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | \
4169 ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
4170 #define ALLOWED_ANCHOR_IN_LB_NOT \
4171 ( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | \
4172 ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_KEEP | \
4173 ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | \
4174 ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
4176 case ANCHOR_LOOK_BEHIND
:
4178 r
= check_type_tree(an
->target
, ALLOWED_TYPE_IN_LB
,
4179 ALLOWED_ENCLOSE_IN_LB
, ALLOWED_ANCHOR_IN_LB
);
4180 if (r
< 0) return r
;
4181 if (r
> 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN
;
4182 if (NTYPE(node
) != NT_ANCHOR
) goto restart
;
4183 r
= setup_tree(an
->target
, reg
, state
, env
);
4184 if (r
!= 0) return r
;
4185 r
= setup_look_behind(node
, reg
, env
);
4189 case ANCHOR_LOOK_BEHIND_NOT
:
4191 r
= check_type_tree(an
->target
, ALLOWED_TYPE_IN_LB
,
4192 ALLOWED_ENCLOSE_IN_LB_NOT
, ALLOWED_ANCHOR_IN_LB_NOT
);
4193 if (r
< 0) return r
;
4194 if (r
> 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN
;
4195 if (NTYPE(node
) != NT_ANCHOR
) goto restart
;
4196 r
= setup_tree(an
->target
, reg
, (state
| IN_NOT
), env
);
4197 if (r
!= 0) return r
;
4198 r
= setup_look_behind(node
, reg
, env
);
4212 #ifndef USE_SUNDAY_QUICK_SEARCH
4213 /* set skip map for Boyer-Moore search */
4215 set_bm_skip(UChar
* s
, UChar
* end
, regex_t
* reg
,
4216 UChar skip
[], int** int_skip
, int ignore_case
)
4218 OnigDistance i
, len
;
4219 int clen
, flen
, n
, j
, k
;
4220 UChar
*p
, buf
[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM
][ONIGENC_MBC_CASE_FOLD_MAXLEN
];
4221 OnigCaseFoldCodeItem items
[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM
];
4222 OnigEncoding enc
= reg
->enc
;
4225 if (len
< ONIG_CHAR_TABLE_SIZE
) {
4226 for (i
= 0; i
< ONIG_CHAR_TABLE_SIZE
; i
++) skip
[i
] = (UChar
)len
;
4229 for (i
= 0; i
< len
- 1; i
+= clen
) {
4232 n
= ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc
, reg
->case_fold_flag
,
4234 clen
= enclen(enc
, p
, end
);
4236 clen
= (int )(end
- p
);
4238 for (j
= 0; j
< n
; j
++) {
4239 if ((items
[j
].code_len
!= 1) || (items
[j
].byte_len
!= clen
))
4240 return 1; /* different length isn't supported. */
4241 flen
= ONIGENC_CODE_TO_MBC(enc
, items
[j
].code
[0], buf
[j
]);
4243 return 1; /* different length isn't supported. */
4245 for (j
= 0; j
< clen
; j
++) {
4246 skip
[s
[i
+ j
]] = (UChar
)(len
- 1 - i
- j
);
4247 for (k
= 0; k
< n
; k
++) {
4248 skip
[buf
[k
][j
]] = (UChar
)(len
- 1 - i
- j
);
4254 # if OPT_EXACT_MAXLEN < ONIG_CHAR_TABLE_SIZE
4255 /* This should not happen. */
4256 return ONIGERR_TYPE_BUG
;
4258 if (IS_NULL(*int_skip
)) {
4259 *int_skip
= (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE
);
4260 if (IS_NULL(*int_skip
)) return ONIGERR_MEMORY
;
4262 for (i
= 0; i
< ONIG_CHAR_TABLE_SIZE
; i
++) (*int_skip
)[i
] = (int )len
;
4265 for (i
= 0; i
< len
- 1; i
+= clen
) {
4268 n
= ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc
, reg
->case_fold_flag
,
4270 clen
= enclen(enc
, p
, end
);
4272 clen
= (int )(end
- p
);
4274 for (j
= 0; j
< n
; j
++) {
4275 if ((items
[j
].code_len
!= 1) || (items
[j
].byte_len
!= clen
))
4276 return 1; /* different length isn't supported. */
4277 flen
= ONIGENC_CODE_TO_MBC(enc
, items
[j
].code
[0], buf
[j
]);
4279 return 1; /* different length isn't supported. */
4281 for (j
= 0; j
< clen
; j
++) {
4282 (*int_skip
)[s
[i
+ j
]] = (int )(len
- 1 - i
- j
);
4283 for (k
= 0; k
< n
; k
++) {
4284 (*int_skip
)[buf
[k
][j
]] = (int )(len
- 1 - i
- j
);
4293 #else /* USE_SUNDAY_QUICK_SEARCH */
4295 /* set skip map for Sunday's quick search */
4297 set_bm_skip(UChar
* s
, UChar
* end
, regex_t
* reg
,
4298 UChar skip
[], int** int_skip
, int ignore_case
)
4300 OnigDistance i
, len
;
4301 int clen
, flen
, n
, j
, k
;
4302 UChar
*p
, buf
[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM
][ONIGENC_MBC_CASE_FOLD_MAXLEN
];
4303 OnigCaseFoldCodeItem items
[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM
];
4304 OnigEncoding enc
= reg
->enc
;
4307 if (len
< ONIG_CHAR_TABLE_SIZE
) {
4308 for (i
= 0; i
< ONIG_CHAR_TABLE_SIZE
; i
++) skip
[i
] = (UChar
)(len
+ 1);
4311 for (i
= 0; i
< len
; i
+= clen
) {
4314 n
= ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc
, reg
->case_fold_flag
,
4316 clen
= enclen(enc
, p
, end
);
4318 clen
= (int )(end
- p
);
4320 for (j
= 0; j
< n
; j
++) {
4321 if ((items
[j
].code_len
!= 1) || (items
[j
].byte_len
!= clen
))
4322 return 1; /* different length isn't supported. */
4323 flen
= ONIGENC_CODE_TO_MBC(enc
, items
[j
].code
[0], buf
[j
]);
4325 return 1; /* different length isn't supported. */
4327 for (j
= 0; j
< clen
; j
++) {
4328 skip
[s
[i
+ j
]] = (UChar
)(len
- i
- j
);
4329 for (k
= 0; k
< n
; k
++) {
4330 skip
[buf
[k
][j
]] = (UChar
)(len
- i
- j
);
4336 # if OPT_EXACT_MAXLEN < ONIG_CHAR_TABLE_SIZE
4337 /* This should not happen. */
4338 return ONIGERR_TYPE_BUG
;
4340 if (IS_NULL(*int_skip
)) {
4341 *int_skip
= (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE
);
4342 if (IS_NULL(*int_skip
)) return ONIGERR_MEMORY
;
4344 for (i
= 0; i
< ONIG_CHAR_TABLE_SIZE
; i
++) (*int_skip
)[i
] = (int )(len
+ 1);
4347 for (i
= 0; i
< len
; i
+= clen
) {
4350 n
= ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc
, reg
->case_fold_flag
,
4352 clen
= enclen(enc
, p
, end
);
4354 clen
= (int )(end
- p
);
4356 for (j
= 0; j
< n
; j
++) {
4357 if ((items
[j
].code_len
!= 1) || (items
[j
].byte_len
!= clen
))
4358 return 1; /* different length isn't supported. */
4359 flen
= ONIGENC_CODE_TO_MBC(enc
, items
[j
].code
[0], buf
[j
]);
4361 return 1; /* different length isn't supported. */
4363 for (j
= 0; j
< clen
; j
++) {
4364 (*int_skip
)[s
[i
+ j
]] = (int )(len
- i
- j
);
4365 for (k
= 0; k
< n
; k
++) {
4366 (*int_skip
)[buf
[k
][j
]] = (int )(len
- i
- j
);
4374 #endif /* USE_SUNDAY_QUICK_SEARCH */
4377 OnigDistance min
; /* min byte length */
4378 OnigDistance max
; /* max byte length */
4384 OnigOptionType options
;
4385 OnigCaseFoldType case_fold_flag
;
4395 MinMaxLen mmd
; /* info position */
4399 int ignore_case
; /* -1: unset, 0: case sensitive, 1: ignore case */
4401 UChar s
[OPT_EXACT_MAXLEN
];
4405 MinMaxLen mmd
; /* info position */
4408 int value
; /* weighted value */
4409 UChar map
[ONIG_CHAR_TABLE_SIZE
];
4416 OptExactInfo exb
; /* boundary */
4417 OptExactInfo exm
; /* middle */
4418 OptExactInfo expr
; /* prec read (?=...) */
4420 OptMapInfo map
; /* boundary */
4425 map_position_value(OnigEncoding enc
, int i
)
4427 static const short int ByteValTable
[] = {
4428 5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1,
4429 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4430 12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5,
4431 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5,
4432 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
4433 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5,
4434 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
4435 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1
4438 if (i
< numberof(ByteValTable
)) {
4439 if (i
== 0 && ONIGENC_MBC_MINLEN(enc
) > 1)
4442 return (int )ByteValTable
[i
];
4445 return 4; /* Take it easy. */
4449 distance_value(MinMaxLen
* mm
)
4451 /* 1000 / (min-max-dist + 1) */
4452 static const short int dist_vals
[] = {
4453 1000, 500, 333, 250, 200, 167, 143, 125, 111, 100,
4454 91, 83, 77, 71, 67, 63, 59, 56, 53, 50,
4455 48, 45, 43, 42, 40, 38, 37, 36, 34, 33,
4456 32, 31, 30, 29, 29, 28, 27, 26, 26, 25,
4457 24, 24, 23, 23, 22, 22, 21, 21, 20, 20,
4458 20, 19, 19, 19, 18, 18, 18, 17, 17, 17,
4459 16, 16, 16, 16, 15, 15, 15, 15, 14, 14,
4460 14, 14, 14, 14, 13, 13, 13, 13, 13, 13,
4461 12, 12, 12, 12, 12, 12, 11, 11, 11, 11,
4462 11, 11, 11, 11, 11, 10, 10, 10, 10, 10
4467 if (mm
->max
== ONIG_INFINITE_DISTANCE
) return 0;
4469 d
= mm
->max
- mm
->min
;
4470 if (d
< numberof(dist_vals
))
4471 /* return dist_vals[d] * 16 / (mm->min + 12); */
4472 return (int )dist_vals
[d
];
4478 comp_distance_value(MinMaxLen
* d1
, MinMaxLen
* d2
, int v1
, int v2
)
4480 if (v2
<= 0) return -1;
4481 if (v1
<= 0) return 1;
4483 v1
*= distance_value(d1
);
4484 v2
*= distance_value(d2
);
4486 if (v2
> v1
) return 1;
4487 if (v2
< v1
) return -1;
4489 if (d2
->min
< d1
->min
) return 1;
4490 if (d2
->min
> d1
->min
) return -1;
4495 is_equal_mml(MinMaxLen
* a
, MinMaxLen
* b
)
4497 return (a
->min
== b
->min
&& a
->max
== b
->max
) ? 1 : 0;
4502 set_mml(MinMaxLen
* mml
, OnigDistance min
, OnigDistance max
)
4509 clear_mml(MinMaxLen
* mml
)
4511 mml
->min
= mml
->max
= 0;
4515 copy_mml(MinMaxLen
* to
, MinMaxLen
* from
)
4517 to
->min
= from
->min
;
4518 to
->max
= from
->max
;
4522 add_mml(MinMaxLen
* to
, MinMaxLen
* from
)
4524 to
->min
= distance_add(to
->min
, from
->min
);
4525 to
->max
= distance_add(to
->max
, from
->max
);
4530 add_len_mml(MinMaxLen
* to
, OnigDistance len
)
4532 to
->min
= distance_add(to
->min
, len
);
4533 to
->max
= distance_add(to
->max
, len
);
4538 alt_merge_mml(MinMaxLen
* to
, MinMaxLen
* from
)
4540 if (to
->min
> from
->min
) to
->min
= from
->min
;
4541 if (to
->max
< from
->max
) to
->max
= from
->max
;
4545 copy_opt_env(OptEnv
* to
, OptEnv
* from
)
4551 clear_opt_anc_info(OptAncInfo
* anc
)
4553 anc
->left_anchor
= 0;
4554 anc
->right_anchor
= 0;
4558 copy_opt_anc_info(OptAncInfo
* to
, OptAncInfo
* from
)
4564 concat_opt_anc_info(OptAncInfo
* to
, OptAncInfo
* left
, OptAncInfo
* right
,
4565 OnigDistance left_len
, OnigDistance right_len
)
4567 clear_opt_anc_info(to
);
4569 to
->left_anchor
= left
->left_anchor
;
4570 if (left_len
== 0) {
4571 to
->left_anchor
|= right
->left_anchor
;
4574 to
->right_anchor
= right
->right_anchor
;
4575 if (right_len
== 0) {
4576 to
->right_anchor
|= left
->right_anchor
;
4579 to
->right_anchor
|= (left
->right_anchor
& ANCHOR_PREC_READ_NOT
);
4584 is_left_anchor(int anc
)
4586 if (anc
== ANCHOR_END_BUF
|| anc
== ANCHOR_SEMI_END_BUF
||
4587 anc
== ANCHOR_END_LINE
|| anc
== ANCHOR_PREC_READ
||
4588 anc
== ANCHOR_PREC_READ_NOT
)
4595 is_set_opt_anc_info(OptAncInfo
* to
, int anc
)
4597 if ((to
->left_anchor
& anc
) != 0) return 1;
4599 return ((to
->right_anchor
& anc
) != 0 ? 1 : 0);
4603 add_opt_anc_info(OptAncInfo
* to
, int anc
)
4605 if (is_left_anchor(anc
))
4606 to
->left_anchor
|= anc
;
4608 to
->right_anchor
|= anc
;
4612 remove_opt_anc_info(OptAncInfo
* to
, int anc
)
4614 if (is_left_anchor(anc
))
4615 to
->left_anchor
&= ~anc
;
4617 to
->right_anchor
&= ~anc
;
4621 alt_merge_opt_anc_info(OptAncInfo
* to
, OptAncInfo
* add
)
4623 to
->left_anchor
&= add
->left_anchor
;
4624 to
->right_anchor
&= add
->right_anchor
;
4628 is_full_opt_exact_info(OptExactInfo
* ex
)
4630 return (ex
->len
>= OPT_EXACT_MAXLEN
? 1 : 0);
4634 clear_opt_exact_info(OptExactInfo
* ex
)
4636 clear_mml(&ex
->mmd
);
4637 clear_opt_anc_info(&ex
->anc
);
4639 ex
->ignore_case
= -1; /* unset */
4645 copy_opt_exact_info(OptExactInfo
* to
, OptExactInfo
* from
)
4651 concat_opt_exact_info(OptExactInfo
* to
, OptExactInfo
* add
, OnigEncoding enc
)
4657 if (to
->ignore_case
< 0)
4658 to
->ignore_case
= add
->ignore_case
;
4659 else if (to
->ignore_case
!= add
->ignore_case
)
4660 return ; /* avoid */
4664 for (i
= to
->len
; p
< end
; ) {
4665 len
= enclen(enc
, p
, end
);
4666 if (i
+ len
> OPT_EXACT_MAXLEN
) break;
4667 for (j
= 0; j
< len
&& p
< end
; j
++)
4672 to
->reach_end
= (p
== end
? add
->reach_end
: 0);
4674 concat_opt_anc_info(&tanc
, &to
->anc
, &add
->anc
, 1, 1);
4675 if (! to
->reach_end
) tanc
.right_anchor
= 0;
4676 copy_opt_anc_info(&to
->anc
, &tanc
);
4680 concat_opt_exact_info_str(OptExactInfo
* to
, UChar
* s
, UChar
* end
,
4681 int raw ARG_UNUSED
, OnigEncoding enc
)
4686 for (i
= to
->len
, p
= s
; p
< end
&& i
< OPT_EXACT_MAXLEN
; ) {
4687 len
= enclen(enc
, p
, end
);
4688 if (i
+ len
> OPT_EXACT_MAXLEN
) break;
4689 for (j
= 0; j
< len
&& p
< end
; j
++)
4697 alt_merge_opt_exact_info(OptExactInfo
* to
, OptExactInfo
* add
, OptEnv
* env
)
4701 if (add
->len
== 0 || to
->len
== 0) {
4702 clear_opt_exact_info(to
);
4706 if (! is_equal_mml(&to
->mmd
, &add
->mmd
)) {
4707 clear_opt_exact_info(to
);
4711 for (i
= 0; i
< to
->len
&& i
< add
->len
; ) {
4712 if (to
->s
[i
] != add
->s
[i
]) break;
4713 len
= enclen(env
->enc
, to
->s
+ i
, to
->s
+ to
->len
);
4715 for (j
= 1; j
< len
; j
++) {
4716 if (to
->s
[i
+j
] != add
->s
[i
+j
]) break;
4722 if (! add
->reach_end
|| i
< add
->len
|| i
< to
->len
) {
4726 if (to
->ignore_case
< 0)
4727 to
->ignore_case
= add
->ignore_case
;
4728 else if (add
->ignore_case
>= 0)
4729 to
->ignore_case
|= add
->ignore_case
;
4731 alt_merge_opt_anc_info(&to
->anc
, &add
->anc
);
4732 if (! to
->reach_end
) to
->anc
.right_anchor
= 0;
4736 select_opt_exact_info(OnigEncoding enc
, OptExactInfo
* now
, OptExactInfo
* alt
)
4747 copy_opt_exact_info(now
, alt
);
4750 else if (v1
<= 2 && v2
<= 2) {
4751 /* ByteValTable[x] is big value --> low price */
4752 v2
= map_position_value(enc
, now
->s
[0]);
4753 v1
= map_position_value(enc
, alt
->s
[0]);
4755 if (now
->len
> 1) v1
+= 5;
4756 if (alt
->len
> 1) v2
+= 5;
4759 if (now
->ignore_case
<= 0) v1
*= 2;
4760 if (alt
->ignore_case
<= 0) v2
*= 2;
4762 if (comp_distance_value(&now
->mmd
, &alt
->mmd
, v1
, v2
) > 0)
4763 copy_opt_exact_info(now
, alt
);
4767 clear_opt_map_info(OptMapInfo
* map
)
4769 static const OptMapInfo clean_info
= {
4772 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4773 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4774 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4775 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4776 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4777 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4778 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4779 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4780 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4781 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4782 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4783 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4784 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4785 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4786 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4787 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
4791 xmemcpy(map
, &clean_info
, sizeof(OptMapInfo
));
4795 copy_opt_map_info(OptMapInfo
* to
, OptMapInfo
* from
)
4801 add_char_opt_map_info(OptMapInfo
* map
, UChar c
, OnigEncoding enc
)
4803 if (map
->map
[c
] == 0) {
4805 map
->value
+= map_position_value(enc
, c
);
4810 add_char_amb_opt_map_info(OptMapInfo
* map
, UChar
* p
, UChar
* end
,
4811 OnigEncoding enc
, OnigCaseFoldType case_fold_flag
)
4813 OnigCaseFoldCodeItem items
[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM
];
4814 UChar buf
[ONIGENC_CODE_TO_MBC_MAXLEN
];
4817 add_char_opt_map_info(map
, p
[0], enc
);
4819 case_fold_flag
= DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag
);
4820 n
= ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc
, case_fold_flag
, p
, end
, items
);
4821 if (n
< 0) return n
;
4823 for (i
= 0; i
< n
; i
++) {
4824 ONIGENC_CODE_TO_MBC(enc
, items
[i
].code
[0], buf
);
4825 add_char_opt_map_info(map
, buf
[0], enc
);
4832 select_opt_map_info(OptMapInfo
* now
, OptMapInfo
* alt
)
4834 const int z
= 1<<15; /* 32768: something big value */
4838 if (alt
->value
== 0) return ;
4839 if (now
->value
== 0) {
4840 copy_opt_map_info(now
, alt
);
4844 v1
= z
/ now
->value
;
4845 v2
= z
/ alt
->value
;
4846 if (comp_distance_value(&now
->mmd
, &alt
->mmd
, v1
, v2
) > 0)
4847 copy_opt_map_info(now
, alt
);
4851 comp_opt_exact_or_map_info(OptExactInfo
* e
, OptMapInfo
* m
)
4853 #define COMP_EM_BASE 20
4856 if (m
->value
<= 0) return -1;
4858 ve
= COMP_EM_BASE
* e
->len
* (e
->ignore_case
> 0 ? 1 : 2);
4859 vm
= COMP_EM_BASE
* 5 * 2 / m
->value
;
4860 return comp_distance_value(&e
->mmd
, &m
->mmd
, ve
, vm
);
4864 alt_merge_opt_map_info(OnigEncoding enc
, OptMapInfo
* to
, OptMapInfo
* add
)
4868 /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */
4869 if (to
->value
== 0) return ;
4870 if (add
->value
== 0 || to
->mmd
.max
< add
->mmd
.min
) {
4871 clear_opt_map_info(to
);
4875 alt_merge_mml(&to
->mmd
, &add
->mmd
);
4878 for (i
= 0; i
< ONIG_CHAR_TABLE_SIZE
; i
++) {
4883 val
+= map_position_value(enc
, i
);
4887 alt_merge_opt_anc_info(&to
->anc
, &add
->anc
);
4891 set_bound_node_opt_info(NodeOptInfo
* opt
, MinMaxLen
* mmd
)
4893 copy_mml(&(opt
->exb
.mmd
), mmd
);
4894 copy_mml(&(opt
->expr
.mmd
), mmd
);
4895 copy_mml(&(opt
->map
.mmd
), mmd
);
4899 clear_node_opt_info(NodeOptInfo
* opt
)
4901 clear_mml(&opt
->len
);
4902 clear_opt_anc_info(&opt
->anc
);
4903 clear_opt_exact_info(&opt
->exb
);
4904 clear_opt_exact_info(&opt
->exm
);
4905 clear_opt_exact_info(&opt
->expr
);
4906 clear_opt_map_info(&opt
->map
);
4910 copy_node_opt_info(NodeOptInfo
* to
, NodeOptInfo
* from
)
4916 concat_left_node_opt_info(OnigEncoding enc
, NodeOptInfo
* to
, NodeOptInfo
* add
)
4918 int exb_reach
, exm_reach
;
4921 concat_opt_anc_info(&tanc
, &to
->anc
, &add
->anc
, to
->len
.max
, add
->len
.max
);
4922 copy_opt_anc_info(&to
->anc
, &tanc
);
4924 if (add
->exb
.len
> 0 && to
->len
.max
== 0) {
4925 concat_opt_anc_info(&tanc
, &to
->anc
, &add
->exb
.anc
,
4926 to
->len
.max
, add
->len
.max
);
4927 copy_opt_anc_info(&add
->exb
.anc
, &tanc
);
4930 if (add
->map
.value
> 0 && to
->len
.max
== 0) {
4931 if (add
->map
.mmd
.max
== 0)
4932 add
->map
.anc
.left_anchor
|= to
->anc
.left_anchor
;
4935 exb_reach
= to
->exb
.reach_end
;
4936 exm_reach
= to
->exm
.reach_end
;
4938 if (add
->len
.max
!= 0)
4939 to
->exb
.reach_end
= to
->exm
.reach_end
= 0;
4941 if (add
->exb
.len
> 0) {
4943 concat_opt_exact_info(&to
->exb
, &add
->exb
, enc
);
4944 clear_opt_exact_info(&add
->exb
);
4946 else if (exm_reach
) {
4947 concat_opt_exact_info(&to
->exm
, &add
->exb
, enc
);
4948 clear_opt_exact_info(&add
->exb
);
4951 select_opt_exact_info(enc
, &to
->exm
, &add
->exb
);
4952 select_opt_exact_info(enc
, &to
->exm
, &add
->exm
);
4954 if (to
->expr
.len
> 0) {
4955 if (add
->len
.max
> 0) {
4956 if (to
->expr
.len
> (int )add
->len
.max
)
4957 to
->expr
.len
= (int )add
->len
.max
;
4959 if (to
->expr
.mmd
.max
== 0)
4960 select_opt_exact_info(enc
, &to
->exb
, &to
->expr
);
4962 select_opt_exact_info(enc
, &to
->exm
, &to
->expr
);
4965 else if (add
->expr
.len
> 0) {
4966 copy_opt_exact_info(&to
->expr
, &add
->expr
);
4969 select_opt_map_info(&to
->map
, &add
->map
);
4971 add_mml(&to
->len
, &add
->len
);
4975 alt_merge_node_opt_info(NodeOptInfo
* to
, NodeOptInfo
* add
, OptEnv
* env
)
4977 alt_merge_opt_anc_info (&to
->anc
, &add
->anc
);
4978 alt_merge_opt_exact_info(&to
->exb
, &add
->exb
, env
);
4979 alt_merge_opt_exact_info(&to
->exm
, &add
->exm
, env
);
4980 alt_merge_opt_exact_info(&to
->expr
, &add
->expr
, env
);
4981 alt_merge_opt_map_info(env
->enc
, &to
->map
, &add
->map
);
4983 alt_merge_mml(&to
->len
, &add
->len
);
4987 #define MAX_NODE_OPT_INFO_REF_COUNT 5
4990 optimize_node_left(Node
* node
, NodeOptInfo
* opt
, OptEnv
* env
)
4995 clear_node_opt_info(opt
);
4996 set_bound_node_opt_info(opt
, &env
->mmd
);
5006 copy_opt_env(&nenv
, env
);
5008 r
= optimize_node_left(NCAR(nd
), &nopt
, &nenv
);
5010 add_mml(&nenv
.mmd
, &nopt
.len
);
5011 concat_left_node_opt_info(env
->enc
, opt
, &nopt
);
5013 } while (r
== 0 && IS_NOT_NULL(nd
= NCDR(nd
)));
5023 r
= optimize_node_left(NCAR(nd
), &nopt
, env
);
5025 if (nd
== node
) copy_node_opt_info(opt
, &nopt
);
5026 else alt_merge_node_opt_info(opt
, &nopt
, env
);
5028 } while ((r
== 0) && IS_NOT_NULL(nd
= NCDR(nd
)));
5034 StrNode
* sn
= NSTR(node
);
5035 OnigDistance slen
= sn
->end
- sn
->s
;
5036 int is_raw
= NSTRING_IS_RAW(node
);
5038 if (! NSTRING_IS_AMBIG(node
)) {
5039 concat_opt_exact_info_str(&opt
->exb
, sn
->s
, sn
->end
,
5041 opt
->exb
.ignore_case
= 0;
5043 add_char_opt_map_info(&opt
->map
, *(sn
->s
), env
->enc
);
5045 set_mml(&opt
->len
, slen
, slen
);
5050 if (NSTRING_IS_DONT_GET_OPT_INFO(node
)) {
5051 int n
= onigenc_strlen(env
->enc
, sn
->s
, sn
->end
);
5052 max
= ONIGENC_MBC_MAXLEN_DIST(env
->enc
) * (OnigDistance
)n
;
5055 concat_opt_exact_info_str(&opt
->exb
, sn
->s
, sn
->end
,
5057 opt
->exb
.ignore_case
= 1;
5060 r
= add_char_amb_opt_map_info(&opt
->map
, sn
->s
, sn
->end
,
5061 env
->enc
, env
->case_fold_flag
);
5068 set_mml(&opt
->len
, slen
, max
);
5071 if ((OnigDistance
)opt
->exb
.len
== slen
)
5072 opt
->exb
.reach_end
= 1;
5079 CClassNode
* cc
= NCCLASS(node
);
5081 /* no need to check ignore case. (set in setup_tree()) */
5083 if (IS_NOT_NULL(cc
->mbuf
) || IS_NCCLASS_NOT(cc
)) {
5084 OnigDistance min
= ONIGENC_MBC_MINLEN(env
->enc
);
5085 OnigDistance max
= ONIGENC_MBC_MAXLEN_DIST(env
->enc
);
5087 set_mml(&opt
->len
, min
, max
);
5090 for (i
= 0; i
< SINGLE_BYTE_SIZE
; i
++) {
5091 z
= BITSET_AT(cc
->bs
, i
);
5092 if ((z
&& !IS_NCCLASS_NOT(cc
)) || (!z
&& IS_NCCLASS_NOT(cc
))) {
5093 add_char_opt_map_info(&opt
->map
, (UChar
)i
, env
->enc
);
5096 set_mml(&opt
->len
, 1, 1);
5106 max
= ONIGENC_MBC_MAXLEN_DIST(env
->enc
);
5111 maxcode
= NCTYPE(node
)->ascii_range
? 0x80 : SINGLE_BYTE_SIZE
;
5112 switch (NCTYPE(node
)->ctype
) {
5113 case ONIGENC_CTYPE_WORD
:
5114 if (NCTYPE(node
)->not != 0) {
5115 for (i
= 0; i
< SINGLE_BYTE_SIZE
; i
++) {
5116 if (! ONIGENC_IS_CODE_WORD(env
->enc
, i
) || i
>= maxcode
) {
5117 add_char_opt_map_info(&opt
->map
, (UChar
)i
, env
->enc
);
5122 for (i
= 0; i
< maxcode
; i
++) {
5123 if (ONIGENC_IS_CODE_WORD(env
->enc
, i
)) {
5124 add_char_opt_map_info(&opt
->map
, (UChar
)i
, env
->enc
);
5132 min
= ONIGENC_MBC_MINLEN(env
->enc
);
5134 set_mml(&opt
->len
, min
, max
);
5140 OnigDistance min
= ONIGENC_MBC_MINLEN(env
->enc
);
5141 OnigDistance max
= ONIGENC_MBC_MAXLEN_DIST(env
->enc
);
5142 set_mml(&opt
->len
, min
, max
);
5147 switch (NANCHOR(node
)->type
) {
5148 case ANCHOR_BEGIN_BUF
:
5149 case ANCHOR_BEGIN_POSITION
:
5150 case ANCHOR_BEGIN_LINE
:
5151 case ANCHOR_END_BUF
:
5152 case ANCHOR_SEMI_END_BUF
:
5153 case ANCHOR_END_LINE
:
5154 case ANCHOR_LOOK_BEHIND
: /* just for (?<=x).* */
5155 case ANCHOR_PREC_READ_NOT
: /* just for (?!x).* */
5156 add_opt_anc_info(&opt
->anc
, NANCHOR(node
)->type
);
5159 case ANCHOR_PREC_READ
:
5163 r
= optimize_node_left(NANCHOR(node
)->target
, &nopt
, env
);
5165 if (nopt
.exb
.len
> 0)
5166 copy_opt_exact_info(&opt
->expr
, &nopt
.exb
);
5167 else if (nopt
.exm
.len
> 0)
5168 copy_opt_exact_info(&opt
->expr
, &nopt
.exm
);
5170 opt
->expr
.reach_end
= 0;
5172 if (nopt
.map
.value
> 0)
5173 copy_opt_map_info(&opt
->map
, &nopt
.map
);
5178 case ANCHOR_LOOK_BEHIND_NOT
:
5187 OnigDistance min
, max
, tmin
, tmax
;
5188 Node
** nodes
= SCANENV_MEM_NODES(env
->scan_env
);
5189 BRefNode
* br
= NBREF(node
);
5191 if (br
->state
& NST_RECURSION
) {
5192 set_mml(&opt
->len
, 0, ONIG_INFINITE_DISTANCE
);
5195 backs
= BACKREFS_P(br
);
5196 r
= get_min_match_length(nodes
[backs
[0]], &min
, env
->scan_env
);
5198 r
= get_max_match_length(nodes
[backs
[0]], &max
, env
->scan_env
);
5200 for (i
= 1; i
< br
->back_num
; i
++) {
5201 r
= get_min_match_length(nodes
[backs
[i
]], &tmin
, env
->scan_env
);
5203 r
= get_max_match_length(nodes
[backs
[i
]], &tmax
, env
->scan_env
);
5205 if (min
> tmin
) min
= tmin
;
5206 if (max
< tmax
) max
= tmax
;
5208 if (r
== 0) set_mml(&opt
->len
, min
, max
);
5212 #ifdef USE_SUBEXP_CALL
5214 if (IS_CALL_RECURSION(NCALL(node
)))
5215 set_mml(&opt
->len
, 0, ONIG_INFINITE_DISTANCE
);
5217 OnigOptionType save
= env
->options
;
5218 env
->options
= NENCLOSE(NCALL(node
)->target
)->option
;
5219 r
= optimize_node_left(NCALL(node
)->target
, opt
, env
);
5220 env
->options
= save
;
5228 OnigDistance min
, max
;
5230 QtfrNode
* qn
= NQTFR(node
);
5232 r
= optimize_node_left(qn
->target
, &nopt
, env
);
5235 if (/*qn->lower == 0 &&*/ IS_REPEAT_INFINITE(qn
->upper
)) {
5236 if (env
->mmd
.max
== 0 &&
5237 NTYPE(qn
->target
) == NT_CANY
&& qn
->greedy
) {
5238 if (IS_MULTILINE(env
->options
))
5239 /* implicit anchor: /.*a/ ==> /\A.*a/ */
5240 add_opt_anc_info(&opt
->anc
, ANCHOR_ANYCHAR_STAR_ML
);
5242 add_opt_anc_info(&opt
->anc
, ANCHOR_ANYCHAR_STAR
);
5246 if (qn
->lower
> 0) {
5247 copy_node_opt_info(opt
, &nopt
);
5248 if (nopt
.exb
.len
> 0) {
5249 if (nopt
.exb
.reach_end
) {
5250 for (i
= 2; i
<= qn
->lower
&&
5251 ! is_full_opt_exact_info(&opt
->exb
); i
++) {
5252 concat_opt_exact_info(&opt
->exb
, &nopt
.exb
, env
->enc
);
5254 if (i
< qn
->lower
) {
5255 opt
->exb
.reach_end
= 0;
5260 if (qn
->lower
!= qn
->upper
) {
5261 opt
->exb
.reach_end
= 0;
5262 opt
->exm
.reach_end
= 0;
5265 opt
->exm
.reach_end
= 0;
5269 min
= distance_multiply(nopt
.len
.min
, qn
->lower
);
5270 if (IS_REPEAT_INFINITE(qn
->upper
))
5271 max
= (nopt
.len
.max
> 0 ? ONIG_INFINITE_DISTANCE
: 0);
5273 max
= distance_multiply(nopt
.len
.max
, qn
->upper
);
5275 set_mml(&opt
->len
, min
, max
);
5281 EncloseNode
* en
= NENCLOSE(node
);
5284 case ENCLOSE_OPTION
:
5286 OnigOptionType save
= env
->options
;
5288 env
->options
= en
->option
;
5289 r
= optimize_node_left(en
->target
, opt
, env
);
5290 env
->options
= save
;
5294 case ENCLOSE_MEMORY
:
5295 #ifdef USE_SUBEXP_CALL
5297 if (en
->opt_count
> MAX_NODE_OPT_INFO_REF_COUNT
) {
5298 OnigDistance min
, max
;
5301 max
= ONIG_INFINITE_DISTANCE
;
5302 if (IS_ENCLOSE_MIN_FIXED(en
)) min
= en
->min_len
;
5303 if (IS_ENCLOSE_MAX_FIXED(en
)) max
= en
->max_len
;
5304 set_mml(&opt
->len
, min
, max
);
5309 r
= optimize_node_left(en
->target
, opt
, env
);
5311 if (is_set_opt_anc_info(&opt
->anc
, ANCHOR_ANYCHAR_STAR_MASK
)) {
5312 if (BIT_STATUS_AT(env
->scan_env
->backrefed_mem
, en
->regnum
))
5313 remove_opt_anc_info(&opt
->anc
, ANCHOR_ANYCHAR_STAR_MASK
);
5318 case ENCLOSE_STOP_BACKTRACK
:
5319 case ENCLOSE_CONDITION
:
5320 r
= optimize_node_left(en
->target
, opt
, env
);
5323 case ENCLOSE_ABSENT
:
5324 set_mml(&opt
->len
, 0, ONIG_INFINITE_DISTANCE
);
5332 fprintf(stderr
, "optimize_node_left: undefined node type %d\n",
5335 r
= ONIGERR_TYPE_BUG
;
5343 set_optimize_exact_info(regex_t
* reg
, OptExactInfo
* e
)
5348 if (e
->len
== 0) return 0;
5350 reg
->exact
= (UChar
* )xmalloc(e
->len
);
5351 CHECK_NULL_RETURN_MEMERR(reg
->exact
);
5352 xmemcpy(reg
->exact
, e
->s
, e
->len
);
5353 reg
->exact_end
= reg
->exact
+ e
->len
;
5356 ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg
->enc
, reg
->exact
, reg
->exact_end
);
5358 if (e
->ignore_case
> 0) {
5359 if (e
->len
>= 3 || (e
->len
>= 2 && allow_reverse
)) {
5360 r
= set_bm_skip(reg
->exact
, reg
->exact_end
, reg
,
5361 reg
->map
, &(reg
->int_map
), 1);
5363 reg
->optimize
= (allow_reverse
!= 0
5364 ? ONIG_OPTIMIZE_EXACT_BM_IC
: ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC
);
5367 reg
->optimize
= ONIG_OPTIMIZE_EXACT_IC
;
5371 reg
->optimize
= ONIG_OPTIMIZE_EXACT_IC
;
5375 if (e
->len
>= 3 || (e
->len
>= 2 && allow_reverse
)) {
5376 r
= set_bm_skip(reg
->exact
, reg
->exact_end
, reg
,
5377 reg
->map
, &(reg
->int_map
), 0);
5379 reg
->optimize
= (allow_reverse
!= 0
5380 ? ONIG_OPTIMIZE_EXACT_BM
: ONIG_OPTIMIZE_EXACT_BM_NOT_REV
);
5383 reg
->optimize
= ONIG_OPTIMIZE_EXACT
;
5387 reg
->optimize
= ONIG_OPTIMIZE_EXACT
;
5391 reg
->dmin
= e
->mmd
.min
;
5392 reg
->dmax
= e
->mmd
.max
;
5394 if (reg
->dmin
!= ONIG_INFINITE_DISTANCE
) {
5395 reg
->threshold_len
= (int )(reg
->dmin
+ (reg
->exact_end
- reg
->exact
));
5402 set_optimize_map_info(regex_t
* reg
, OptMapInfo
* m
)
5406 for (i
= 0; i
< ONIG_CHAR_TABLE_SIZE
; i
++)
5407 reg
->map
[i
] = m
->map
[i
];
5409 reg
->optimize
= ONIG_OPTIMIZE_MAP
;
5410 reg
->dmin
= m
->mmd
.min
;
5411 reg
->dmax
= m
->mmd
.max
;
5413 if (reg
->dmin
!= ONIG_INFINITE_DISTANCE
) {
5414 reg
->threshold_len
= (int )(reg
->dmin
+ 1);
5419 set_sub_anchor(regex_t
* reg
, OptAncInfo
* anc
)
5421 reg
->sub_anchor
|= anc
->left_anchor
& ANCHOR_BEGIN_LINE
;
5422 reg
->sub_anchor
|= anc
->right_anchor
& ANCHOR_END_LINE
;
5425 #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5426 static void print_optimize_info(FILE* f
, regex_t
* reg
);
5430 set_optimize_info_from_tree(Node
* node
, regex_t
* reg
, ScanEnv
* scan_env
)
5438 env
.options
= reg
->options
;
5439 env
.case_fold_flag
= reg
->case_fold_flag
;
5440 env
.scan_env
= scan_env
;
5441 clear_mml(&env
.mmd
);
5443 r
= optimize_node_left(node
, &opt
, &env
);
5446 reg
->anchor
= opt
.anc
.left_anchor
& (ANCHOR_BEGIN_BUF
|
5447 ANCHOR_BEGIN_POSITION
| ANCHOR_ANYCHAR_STAR
| ANCHOR_ANYCHAR_STAR_ML
|
5448 ANCHOR_LOOK_BEHIND
);
5450 if ((opt
.anc
.left_anchor
& (ANCHOR_LOOK_BEHIND
| ANCHOR_PREC_READ_NOT
)) != 0)
5451 reg
->anchor
&= ~ANCHOR_ANYCHAR_STAR_ML
;
5453 reg
->anchor
|= opt
.anc
.right_anchor
& (ANCHOR_END_BUF
| ANCHOR_SEMI_END_BUF
|
5454 ANCHOR_PREC_READ_NOT
);
5456 if (reg
->anchor
& (ANCHOR_END_BUF
| ANCHOR_SEMI_END_BUF
)) {
5457 reg
->anchor_dmin
= opt
.len
.min
;
5458 reg
->anchor_dmax
= opt
.len
.max
;
5461 if (opt
.exb
.len
> 0 || opt
.exm
.len
> 0) {
5462 select_opt_exact_info(reg
->enc
, &opt
.exb
, &opt
.exm
);
5463 if (opt
.map
.value
> 0 &&
5464 comp_opt_exact_or_map_info(&opt
.exb
, &opt
.map
) > 0) {
5468 r
= set_optimize_exact_info(reg
, &opt
.exb
);
5469 set_sub_anchor(reg
, &opt
.exb
.anc
);
5472 else if (opt
.map
.value
> 0) {
5474 set_optimize_map_info(reg
, &opt
.map
);
5475 set_sub_anchor(reg
, &opt
.map
.anc
);
5478 reg
->sub_anchor
|= opt
.anc
.left_anchor
& ANCHOR_BEGIN_LINE
;
5479 if (opt
.len
.max
== 0)
5480 reg
->sub_anchor
|= opt
.anc
.right_anchor
& ANCHOR_END_LINE
;
5483 #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5484 print_optimize_info(stderr
, reg
);
5490 clear_optimize_info(regex_t
* reg
)
5492 reg
->optimize
= ONIG_OPTIMIZE_NONE
;
5494 reg
->anchor_dmin
= 0;
5495 reg
->anchor_dmax
= 0;
5496 reg
->sub_anchor
= 0;
5497 reg
->exact_end
= (UChar
* )NULL
;
5498 reg
->threshold_len
= 0;
5500 reg
->exact
= (UChar
* )NULL
;
5505 static void print_enc_string(FILE* fp
, OnigEncoding enc
,
5506 const UChar
*s
, const UChar
*end
)
5508 fprintf(fp
, "\nPATTERN: /");
5510 if (ONIGENC_MBC_MINLEN(enc
) > 1) {
5516 code
= ONIGENC_MBC_TO_CODE(enc
, p
, end
);
5518 fprintf(fp
, " 0x%04x ", (int )code
);
5521 fputc((int )code
, fp
);
5524 p
+= enclen(enc
, p
, end
);
5529 fputc((int )*s
, fp
);
5534 fprintf(fp
, "/ (%s)\n", enc
->name
);
5536 #endif /* ONIG_DEBUG */
5538 #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5540 print_distance_range(FILE* f
, OnigDistance a
, OnigDistance b
)
5542 if (a
== ONIG_INFINITE_DISTANCE
)
5545 fprintf(f
, "(%"PRIuPTR
")", a
);
5549 if (b
== ONIG_INFINITE_DISTANCE
)
5552 fprintf(f
, "(%"PRIuPTR
")", b
);
5556 print_anchor(FILE* f
, int anchor
)
5562 if (anchor
& ANCHOR_BEGIN_BUF
) {
5563 fprintf(f
, "begin-buf");
5566 if (anchor
& ANCHOR_BEGIN_LINE
) {
5567 if (q
) fprintf(f
, ", ");
5569 fprintf(f
, "begin-line");
5571 if (anchor
& ANCHOR_BEGIN_POSITION
) {
5572 if (q
) fprintf(f
, ", ");
5574 fprintf(f
, "begin-pos");
5576 if (anchor
& ANCHOR_END_BUF
) {
5577 if (q
) fprintf(f
, ", ");
5579 fprintf(f
, "end-buf");
5581 if (anchor
& ANCHOR_SEMI_END_BUF
) {
5582 if (q
) fprintf(f
, ", ");
5584 fprintf(f
, "semi-end-buf");
5586 if (anchor
& ANCHOR_END_LINE
) {
5587 if (q
) fprintf(f
, ", ");
5589 fprintf(f
, "end-line");
5591 if (anchor
& ANCHOR_ANYCHAR_STAR
) {
5592 if (q
) fprintf(f
, ", ");
5594 fprintf(f
, "anychar-star");
5596 if (anchor
& ANCHOR_ANYCHAR_STAR_ML
) {
5597 if (q
) fprintf(f
, ", ");
5598 fprintf(f
, "anychar-star-ml");
5605 print_optimize_info(FILE* f
, regex_t
* reg
)
5607 static const char* on
[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV",
5609 "EXACT_BM_IC", "EXACT_BM_NOT_REV_IC" };
5611 fprintf(f
, "optimize: %s\n", on
[reg
->optimize
]);
5612 fprintf(f
, " anchor: "); print_anchor(f
, reg
->anchor
);
5613 if ((reg
->anchor
& ANCHOR_END_BUF_MASK
) != 0)
5614 print_distance_range(f
, reg
->anchor_dmin
, reg
->anchor_dmax
);
5617 if (reg
->optimize
) {
5618 fprintf(f
, " sub anchor: "); print_anchor(f
, reg
->sub_anchor
);
5625 fprintf(f
, "exact: [");
5626 for (p
= reg
->exact
; p
< reg
->exact_end
; p
++) {
5629 fprintf(f
, "]: length: %"PRIdPTR
"\n", (reg
->exact_end
- reg
->exact
));
5631 else if (reg
->optimize
& ONIG_OPTIMIZE_MAP
) {
5634 for (i
= 0; i
< ONIG_CHAR_TABLE_SIZE
; i
++)
5635 if (reg
->map
[i
]) n
++;
5637 fprintf(f
, "map: n=%d\n", n
);
5641 for (i
= 0; i
< ONIG_CHAR_TABLE_SIZE
; i
++) {
5642 if (reg
->map
[i
] != 0) {
5643 if (c
> 0) fputs(", ", f
);
5645 if (ONIGENC_MBC_MAXLEN(reg
->enc
) == 1 &&
5646 ONIGENC_IS_CODE_PRINT(reg
->enc
, (OnigCodePoint
)i
))
5649 fprintf(f
, "%d", i
);
5656 #endif /* ONIG_DEBUG_COMPILE || ONIG_DEBUG_MATCH */
5660 onig_free_body(regex_t
* reg
)
5662 if (IS_NOT_NULL(reg
)) {
5665 xfree(reg
->int_map
);
5666 xfree(reg
->int_map_backward
);
5667 xfree(reg
->repeat_range
);
5668 onig_free(reg
->chain
);
5670 #ifdef USE_NAMED_GROUP
5671 onig_names_free(reg
);
5677 onig_free(regex_t
* reg
)
5679 if (IS_NOT_NULL(reg
)) {
5680 onig_free_body(reg
);
5686 dup_copy(const void *ptr
, size_t size
)
5688 void *newptr
= xmalloc(size
);
5689 if (IS_NOT_NULL(newptr
)) {
5690 memcpy(newptr
, ptr
, size
);
5696 onig_reg_copy(regex_t
** nreg
, regex_t
* oreg
)
5698 if (IS_NOT_NULL(oreg
)) {
5699 regex_t
*reg
= *nreg
= (regex_t
* )xmalloc(sizeof(regex_t
));
5700 if (IS_NULL(reg
)) return ONIGERR_MEMORY
;
5704 # define COPY_FAILED(mem, size) IS_NULL(reg->mem = dup_copy(reg->mem, size))
5706 if (IS_NOT_NULL(reg
->exact
)) {
5707 size_t exact_size
= reg
->exact_end
- reg
->exact
;
5708 if (COPY_FAILED(exact
, exact_size
))
5710 (reg
)->exact_end
= (reg
)->exact
+ exact_size
;
5713 if (IS_NOT_NULL(reg
->int_map
)) {
5714 if (COPY_FAILED(int_map
, sizeof(int) * ONIG_CHAR_TABLE_SIZE
))
5717 if (IS_NOT_NULL(reg
->int_map_backward
)) {
5718 if (COPY_FAILED(int_map_backward
, sizeof(int) * ONIG_CHAR_TABLE_SIZE
))
5719 goto err_int_map_backward
;
5721 if (IS_NOT_NULL(reg
->p
)) {
5722 if (COPY_FAILED(p
, reg
->alloc
))
5725 if (IS_NOT_NULL(reg
->repeat_range
)) {
5726 if (COPY_FAILED(repeat_range
, reg
->repeat_range_alloc
* sizeof(OnigRepeatRange
)))
5727 goto err_repeat_range
;
5729 if (IS_NOT_NULL(reg
->name_table
)) {
5730 if (onig_names_copy(reg
, oreg
))
5731 goto err_name_table
;
5733 if (IS_NOT_NULL(reg
->chain
)) {
5734 if (onig_reg_copy(®
->chain
, reg
->chain
))
5741 onig_names_free(reg
);
5743 xfree(reg
->repeat_range
);
5747 xfree(reg
->int_map_backward
);
5748 err_int_map_backward
:
5749 xfree(reg
->int_map
);
5754 return ONIGERR_MEMORY
;
5761 onig_memsize(const regex_t
*reg
)
5763 size_t size
= sizeof(regex_t
);
5764 if (IS_NULL(reg
)) return 0;
5765 if (IS_NOT_NULL(reg
->p
)) size
+= reg
->alloc
;
5766 if (IS_NOT_NULL(reg
->exact
)) size
+= reg
->exact_end
- reg
->exact
;
5767 if (IS_NOT_NULL(reg
->int_map
)) size
+= sizeof(int) * ONIG_CHAR_TABLE_SIZE
;
5768 if (IS_NOT_NULL(reg
->int_map_backward
)) size
+= sizeof(int) * ONIG_CHAR_TABLE_SIZE
;
5769 if (IS_NOT_NULL(reg
->repeat_range
)) size
+= reg
->repeat_range_alloc
* sizeof(OnigRepeatRange
);
5770 if (IS_NOT_NULL(reg
->chain
)) size
+= onig_memsize(reg
->chain
);
5776 onig_region_memsize(const OnigRegion
*regs
)
5778 size_t size
= sizeof(*regs
);
5779 if (IS_NULL(regs
)) return 0;
5780 size
+= regs
->allocated
* (sizeof(*regs
->beg
) + sizeof(*regs
->end
));
5785 #define REGEX_TRANSFER(to,from) do {\
5786 onig_free_body(to);\
5787 xmemcpy(to, from, sizeof(regex_t));\
5793 onig_transfer(regex_t
* to
, regex_t
* from
)
5795 REGEX_TRANSFER(to
, from
);
5799 #ifdef ONIG_DEBUG_COMPILE
5800 static void print_compiled_byte_code_list(FILE* f
, regex_t
* reg
);
5802 #ifdef ONIG_DEBUG_PARSE_TREE
5803 static void print_tree(FILE* f
, Node
* node
);
5808 onig_compile(regex_t
* reg
, const UChar
* pattern
, const UChar
* pattern_end
,
5809 OnigErrorInfo
* einfo
)
5811 return onig_compile_ruby(reg
, pattern
, pattern_end
, einfo
, NULL
, 0);
5817 onig_compile_ruby(regex_t
* reg
, const UChar
* pattern
, const UChar
* pattern_end
,
5818 OnigErrorInfo
* einfo
, const char *sourcefile
, int sourceline
)
5821 onig_compile(regex_t
* reg
, const UChar
* pattern
, const UChar
* pattern_end
,
5822 OnigErrorInfo
* einfo
)
5825 #define COMPILE_INIT_SIZE 20
5828 OnigDistance init_size
;
5830 ScanEnv scan_env
= {0};
5831 #ifdef USE_SUBEXP_CALL
5832 UnsetAddrList uslist
;
5835 if (IS_NOT_NULL(einfo
)) einfo
->par
= (UChar
* )NULL
;
5838 scan_env
.sourcefile
= sourcefile
;
5839 scan_env
.sourceline
= sourceline
;
5843 print_enc_string(stderr
, reg
->enc
, pattern
, pattern_end
);
5846 if (reg
->alloc
== 0) {
5847 init_size
= (pattern_end
- pattern
) * 2;
5848 if (init_size
<= 0) init_size
= COMPILE_INIT_SIZE
;
5849 r
= BBUF_INIT(reg
, init_size
);
5850 if (r
!= 0) goto end
;
5856 reg
->num_repeat
= 0;
5857 reg
->num_null_check
= 0;
5858 reg
->repeat_range_alloc
= 0;
5859 reg
->repeat_range
= (OnigRepeatRange
* )NULL
;
5860 #ifdef USE_COMBINATION_EXPLOSION_CHECK
5861 reg
->num_comb_exp_check
= 0;
5864 r
= onig_parse_make_tree(&root
, pattern
, pattern_end
, reg
, &scan_env
);
5865 if (r
!= 0) goto err
;
5867 #ifdef ONIG_DEBUG_PARSE_TREE
5869 fprintf(stderr
, "ORIGINAL PARSE TREE:\n");
5870 print_tree(stderr
, root
);
5874 #ifdef USE_NAMED_GROUP
5875 /* mixed use named group and no-named group */
5876 if (scan_env
.num_named
> 0 &&
5877 IS_SYNTAX_BV(scan_env
.syntax
, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP
) &&
5878 !ONIG_IS_OPTION_ON(reg
->options
, ONIG_OPTION_CAPTURE_GROUP
)) {
5879 if (scan_env
.num_named
!= scan_env
.num_mem
)
5880 r
= disable_noname_group_capture(&root
, reg
, &scan_env
);
5882 r
= numbered_ref_check(root
);
5884 if (r
!= 0) goto err
;
5888 #ifdef USE_SUBEXP_CALL
5889 if (scan_env
.num_call
> 0) {
5890 r
= unset_addr_list_init(&uslist
, scan_env
.num_call
);
5891 if (r
!= 0) goto err
;
5892 scan_env
.unset_addr_list
= &uslist
;
5893 r
= setup_subexp_call(root
, &scan_env
);
5894 if (r
!= 0) goto err_unset
;
5895 r
= subexp_recursive_check_trav(root
, &scan_env
);
5896 if (r
< 0) goto err_unset
;
5897 r
= subexp_inf_recursive_check_trav(root
, &scan_env
);
5898 if (r
!= 0) goto err_unset
;
5900 reg
->num_call
= scan_env
.num_call
;
5906 r
= setup_tree(root
, reg
, 0, &scan_env
);
5907 if (r
!= 0) goto err_unset
;
5909 #ifdef ONIG_DEBUG_PARSE_TREE
5910 print_tree(stderr
, root
);
5913 reg
->capture_history
= scan_env
.capture_history
;
5914 reg
->bt_mem_start
= scan_env
.bt_mem_start
;
5915 reg
->bt_mem_start
|= reg
->capture_history
;
5916 if (IS_FIND_CONDITION(reg
->options
))
5917 BIT_STATUS_ON_ALL(reg
->bt_mem_end
);
5919 reg
->bt_mem_end
= scan_env
.bt_mem_end
;
5920 reg
->bt_mem_end
|= reg
->capture_history
;
5923 #ifdef USE_COMBINATION_EXPLOSION_CHECK
5924 if (scan_env
.backrefed_mem
== 0
5925 # ifdef USE_SUBEXP_CALL
5926 || scan_env
.num_call
== 0
5929 setup_comb_exp_check(root
, 0, &scan_env
);
5930 # ifdef USE_SUBEXP_CALL
5931 if (scan_env
.has_recursion
!= 0) {
5932 scan_env
.num_comb_exp_check
= 0;
5936 if (scan_env
.comb_exp_max_regnum
> 0) {
5938 for (i
= 1; i
<= scan_env
.comb_exp_max_regnum
; i
++) {
5939 if (BIT_STATUS_AT(scan_env
.backrefed_mem
, i
) != 0) {
5940 scan_env
.num_comb_exp_check
= 0;
5947 reg
->num_comb_exp_check
= scan_env
.num_comb_exp_check
;
5950 clear_optimize_info(reg
);
5951 #ifndef ONIG_DONT_OPTIMIZE
5952 r
= set_optimize_info_from_tree(root
, reg
, &scan_env
);
5953 if (r
!= 0) goto err_unset
;
5956 if (IS_NOT_NULL(scan_env
.mem_nodes_dynamic
)) {
5957 xfree(scan_env
.mem_nodes_dynamic
);
5958 scan_env
.mem_nodes_dynamic
= (Node
** )NULL
;
5961 r
= compile_tree(root
, reg
);
5963 r
= add_opcode(reg
, OP_END
);
5964 #ifdef USE_SUBEXP_CALL
5965 if (scan_env
.num_call
> 0) {
5966 r
= unset_addr_list_fix(&uslist
, reg
);
5967 unset_addr_list_end(&uslist
);
5972 if ((reg
->num_repeat
!= 0) || (reg
->bt_mem_end
!= 0))
5973 reg
->stack_pop_level
= STACK_POP_LEVEL_ALL
;
5975 if (reg
->bt_mem_start
!= 0)
5976 reg
->stack_pop_level
= STACK_POP_LEVEL_MEM_START
;
5978 reg
->stack_pop_level
= STACK_POP_LEVEL_FREE
;
5981 #ifdef USE_SUBEXP_CALL
5982 else if (scan_env
.num_call
> 0) {
5983 unset_addr_list_end(&uslist
);
5986 onig_node_free(root
);
5988 #ifdef ONIG_DEBUG_COMPILE
5989 # ifdef USE_NAMED_GROUP
5990 onig_print_names(stderr
, reg
);
5992 print_compiled_byte_code_list(stderr
, reg
);
5996 onig_reg_resize(reg
);
6000 #ifdef USE_SUBEXP_CALL
6001 if (scan_env
.num_call
> 0) {
6002 unset_addr_list_end(&uslist
);
6006 if (IS_NOT_NULL(scan_env
.error
)) {
6007 if (IS_NOT_NULL(einfo
)) {
6008 einfo
->enc
= scan_env
.enc
;
6009 einfo
->par
= scan_env
.error
;
6010 einfo
->par_end
= scan_env
.error_end
;
6014 onig_node_free(root
);
6015 xfree(scan_env
.mem_nodes_dynamic
);
6020 static int onig_inited
= 0;
6023 onig_reg_init(regex_t
* reg
, OnigOptionType option
,
6024 OnigCaseFoldType case_fold_flag
,
6025 OnigEncoding enc
, const OnigSyntaxType
* syntax
)
6031 return ONIGERR_INVALID_ARGUMENT
;
6033 if (ONIGENC_IS_UNDEF(enc
))
6034 return ONIGERR_DEFAULT_ENCODING_IS_NOT_SET
;
6036 if ((option
& (ONIG_OPTION_DONT_CAPTURE_GROUP
|ONIG_OPTION_CAPTURE_GROUP
))
6037 == (ONIG_OPTION_DONT_CAPTURE_GROUP
|ONIG_OPTION_CAPTURE_GROUP
)) {
6038 return ONIGERR_INVALID_COMBINATION_OF_OPTIONS
;
6041 if ((option
& ONIG_OPTION_NEGATE_SINGLELINE
) != 0) {
6042 option
|= syntax
->options
;
6043 option
&= ~ONIG_OPTION_SINGLELINE
;
6046 option
|= syntax
->options
;
6049 (reg
)->options
= option
;
6050 (reg
)->syntax
= syntax
;
6051 (reg
)->optimize
= 0;
6052 (reg
)->exact
= (UChar
* )NULL
;
6053 (reg
)->int_map
= (int* )NULL
;
6054 (reg
)->int_map_backward
= (int* )NULL
;
6055 (reg
)->chain
= (regex_t
* )NULL
;
6057 (reg
)->p
= (UChar
* )NULL
;
6060 (reg
)->name_table
= (void* )NULL
;
6062 (reg
)->case_fold_flag
= case_fold_flag
;
6064 (reg
)->timelimit
= 0;
6070 onig_new_without_alloc(regex_t
* reg
, const UChar
* pattern
,
6071 const UChar
* pattern_end
, OnigOptionType option
, OnigEncoding enc
,
6072 const OnigSyntaxType
* syntax
, OnigErrorInfo
* einfo
)
6076 r
= onig_reg_init(reg
, option
, ONIGENC_CASE_FOLD_DEFAULT
, enc
, syntax
);
6079 r
= onig_compile(reg
, pattern
, pattern_end
, einfo
);
6084 onig_new(regex_t
** reg
, const UChar
* pattern
, const UChar
* pattern_end
,
6085 OnigOptionType option
, OnigEncoding enc
, const OnigSyntaxType
* syntax
,
6086 OnigErrorInfo
* einfo
)
6088 *reg
= (regex_t
* )xmalloc(sizeof(regex_t
));
6089 if (IS_NULL(*reg
)) return ONIGERR_MEMORY
;
6091 int r
= onig_new_without_alloc(*reg
, pattern
, pattern_end
, option
, enc
, syntax
, einfo
);
6101 onig_initialize(OnigEncoding encodings
[] ARG_UNUSED
, int n ARG_UNUSED
)
6109 if (onig_inited
!= 0)
6114 #if defined(ONIG_DEBUG_MEMLEAK) && defined(_MSC_VER)
6115 _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF
| _CRTDBG_LEAK_CHECK_DF
);
6119 /* onigenc_set_default_caseconv_table((UChar* )0); */
6121 #ifdef ONIG_DEBUG_STATISTICS
6122 onig_statistics_init();
6129 static OnigEndCallListItemType
* EndCallTop
;
6131 extern void onig_add_end_call(void (*func
)(void))
6133 OnigEndCallListItemType
* item
;
6135 item
= (OnigEndCallListItemType
* )xmalloc(sizeof(*item
));
6136 if (item
== 0) return ;
6138 item
->next
= EndCallTop
;
6145 exec_end_call_list(void)
6147 OnigEndCallListItemType
* prev
;
6150 while (EndCallTop
!= 0) {
6151 func
= EndCallTop
->func
;
6155 EndCallTop
= EndCallTop
->next
;
6163 exec_end_call_list();
6165 #ifdef ONIG_DEBUG_STATISTICS
6166 onig_print_statistics(stderr
);
6169 #if defined(ONIG_DEBUG_MEMLEAK) && defined(_MSC_VER)
6170 _CrtDumpMemoryLeaks();
6179 onig_is_in_code_range(const UChar
* p
, OnigCodePoint code
)
6181 OnigCodePoint n
, *data
;
6182 OnigCodePoint low
, high
, x
;
6184 GET_CODE_POINT(n
, p
);
6185 data
= (OnigCodePoint
* )p
;
6188 for (low
= 0, high
= n
; low
< high
; ) {
6189 x
= (low
+ high
) >> 1;
6190 if (code
> data
[x
* 2 + 1])
6196 return ((low
< n
&& code
>= data
[low
* 2]) ? 1 : 0);
6200 onig_is_code_in_cc_len(int elen
, OnigCodePoint code
, CClassNode
* cc
)
6204 if (elen
> 1 || (code
>= SINGLE_BYTE_SIZE
)) {
6205 if (IS_NULL(cc
->mbuf
)) {
6209 found
= (onig_is_in_code_range(cc
->mbuf
->p
, code
) != 0 ? 1 : 0);
6213 found
= (BITSET_AT(cc
->bs
, code
) == 0 ? 0 : 1);
6216 if (IS_NCCLASS_NOT(cc
))
6223 onig_is_code_in_cc(OnigEncoding enc
, OnigCodePoint code
, CClassNode
* cc
)
6227 if (ONIGENC_MBC_MINLEN(enc
) > 1) {
6231 len
= ONIGENC_CODE_TO_MBCLEN(enc
, code
);
6233 return onig_is_code_in_cc_len(len
, code
, cc
);
6239 /* arguments type */
6240 # define ARG_SPECIAL -1
6242 # define ARG_RELADDR 1
6243 # define ARG_ABSADDR 2
6244 # define ARG_LENGTH 3
6245 # define ARG_MEMNUM 4
6246 # define ARG_OPTION 5
6247 # define ARG_STATE_CHECK 6
6249 OnigOpInfoType OnigOpInfo
[] = {
6250 { OP_FINISH
, "finish", ARG_NON
},
6251 { OP_END
, "end", ARG_NON
},
6252 { OP_EXACT1
, "exact1", ARG_SPECIAL
},
6253 { OP_EXACT2
, "exact2", ARG_SPECIAL
},
6254 { OP_EXACT3
, "exact3", ARG_SPECIAL
},
6255 { OP_EXACT4
, "exact4", ARG_SPECIAL
},
6256 { OP_EXACT5
, "exact5", ARG_SPECIAL
},
6257 { OP_EXACTN
, "exactn", ARG_SPECIAL
},
6258 { OP_EXACTMB2N1
, "exactmb2-n1", ARG_SPECIAL
},
6259 { OP_EXACTMB2N2
, "exactmb2-n2", ARG_SPECIAL
},
6260 { OP_EXACTMB2N3
, "exactmb2-n3", ARG_SPECIAL
},
6261 { OP_EXACTMB2N
, "exactmb2-n", ARG_SPECIAL
},
6262 { OP_EXACTMB3N
, "exactmb3n" , ARG_SPECIAL
},
6263 { OP_EXACTMBN
, "exactmbn", ARG_SPECIAL
},
6264 { OP_EXACT1_IC
, "exact1-ic", ARG_SPECIAL
},
6265 { OP_EXACTN_IC
, "exactn-ic", ARG_SPECIAL
},
6266 { OP_CCLASS
, "cclass", ARG_SPECIAL
},
6267 { OP_CCLASS_MB
, "cclass-mb", ARG_SPECIAL
},
6268 { OP_CCLASS_MIX
, "cclass-mix", ARG_SPECIAL
},
6269 { OP_CCLASS_NOT
, "cclass-not", ARG_SPECIAL
},
6270 { OP_CCLASS_MB_NOT
, "cclass-mb-not", ARG_SPECIAL
},
6271 { OP_CCLASS_MIX_NOT
, "cclass-mix-not", ARG_SPECIAL
},
6272 { OP_ANYCHAR
, "anychar", ARG_NON
},
6273 { OP_ANYCHAR_ML
, "anychar-ml", ARG_NON
},
6274 { OP_ANYCHAR_STAR
, "anychar*", ARG_NON
},
6275 { OP_ANYCHAR_ML_STAR
, "anychar-ml*", ARG_NON
},
6276 { OP_ANYCHAR_STAR_PEEK_NEXT
, "anychar*-peek-next", ARG_SPECIAL
},
6277 { OP_ANYCHAR_ML_STAR_PEEK_NEXT
, "anychar-ml*-peek-next", ARG_SPECIAL
},
6278 { OP_WORD
, "word", ARG_NON
},
6279 { OP_NOT_WORD
, "not-word", ARG_NON
},
6280 { OP_WORD_BOUND
, "word-bound", ARG_NON
},
6281 { OP_NOT_WORD_BOUND
, "not-word-bound", ARG_NON
},
6282 { OP_WORD_BEGIN
, "word-begin", ARG_NON
},
6283 { OP_WORD_END
, "word-end", ARG_NON
},
6284 { OP_ASCII_WORD
, "ascii-word", ARG_NON
},
6285 { OP_NOT_ASCII_WORD
, "not-ascii-word", ARG_NON
},
6286 { OP_ASCII_WORD_BOUND
, "ascii-word-bound", ARG_NON
},
6287 { OP_NOT_ASCII_WORD_BOUND
,"not-ascii-word-bound", ARG_NON
},
6288 { OP_ASCII_WORD_BEGIN
, "ascii-word-begin", ARG_NON
},
6289 { OP_ASCII_WORD_END
, "ascii-word-end", ARG_NON
},
6290 { OP_BEGIN_BUF
, "begin-buf", ARG_NON
},
6291 { OP_END_BUF
, "end-buf", ARG_NON
},
6292 { OP_BEGIN_LINE
, "begin-line", ARG_NON
},
6293 { OP_END_LINE
, "end-line", ARG_NON
},
6294 { OP_SEMI_END_BUF
, "semi-end-buf", ARG_NON
},
6295 { OP_BEGIN_POSITION
, "begin-position", ARG_NON
},
6296 { OP_BACKREF1
, "backref1", ARG_NON
},
6297 { OP_BACKREF2
, "backref2", ARG_NON
},
6298 { OP_BACKREFN
, "backrefn", ARG_MEMNUM
},
6299 { OP_BACKREFN_IC
, "backrefn-ic", ARG_SPECIAL
},
6300 { OP_BACKREF_MULTI
, "backref_multi", ARG_SPECIAL
},
6301 { OP_BACKREF_MULTI_IC
, "backref_multi-ic", ARG_SPECIAL
},
6302 { OP_BACKREF_WITH_LEVEL
, "backref_at_level", ARG_SPECIAL
},
6303 { OP_MEMORY_START_PUSH
, "mem-start-push", ARG_MEMNUM
},
6304 { OP_MEMORY_START
, "mem-start", ARG_MEMNUM
},
6305 { OP_MEMORY_END_PUSH
, "mem-end-push", ARG_MEMNUM
},
6306 { OP_MEMORY_END_PUSH_REC
, "mem-end-push-rec", ARG_MEMNUM
},
6307 { OP_MEMORY_END
, "mem-end", ARG_MEMNUM
},
6308 { OP_MEMORY_END_REC
, "mem-end-rec", ARG_MEMNUM
},
6309 { OP_SET_OPTION_PUSH
, "set-option-push", ARG_OPTION
},
6310 { OP_SET_OPTION
, "set-option", ARG_OPTION
},
6311 { OP_KEEP
, "keep", ARG_NON
},
6312 { OP_FAIL
, "fail", ARG_NON
},
6313 { OP_JUMP
, "jump", ARG_RELADDR
},
6314 { OP_PUSH
, "push", ARG_RELADDR
},
6315 { OP_POP
, "pop", ARG_NON
},
6316 { OP_PUSH_OR_JUMP_EXACT1
, "push-or-jump-e1", ARG_SPECIAL
},
6317 { OP_PUSH_IF_PEEK_NEXT
, "push-if-peek-next", ARG_SPECIAL
},
6318 { OP_REPEAT
, "repeat", ARG_SPECIAL
},
6319 { OP_REPEAT_NG
, "repeat-ng", ARG_SPECIAL
},
6320 { OP_REPEAT_INC
, "repeat-inc", ARG_MEMNUM
},
6321 { OP_REPEAT_INC_NG
, "repeat-inc-ng", ARG_MEMNUM
},
6322 { OP_REPEAT_INC_SG
, "repeat-inc-sg", ARG_MEMNUM
},
6323 { OP_REPEAT_INC_NG_SG
, "repeat-inc-ng-sg", ARG_MEMNUM
},
6324 { OP_NULL_CHECK_START
, "null-check-start", ARG_MEMNUM
},
6325 { OP_NULL_CHECK_END
, "null-check-end", ARG_MEMNUM
},
6326 { OP_NULL_CHECK_END_MEMST
,"null-check-end-memst", ARG_MEMNUM
},
6327 { OP_NULL_CHECK_END_MEMST_PUSH
,"null-check-end-memst-push", ARG_MEMNUM
},
6328 { OP_PUSH_POS
, "push-pos", ARG_NON
},
6329 { OP_POP_POS
, "pop-pos", ARG_NON
},
6330 { OP_PUSH_POS_NOT
, "push-pos-not", ARG_RELADDR
},
6331 { OP_FAIL_POS
, "fail-pos", ARG_NON
},
6332 { OP_PUSH_STOP_BT
, "push-stop-bt", ARG_NON
},
6333 { OP_POP_STOP_BT
, "pop-stop-bt", ARG_NON
},
6334 { OP_LOOK_BEHIND
, "look-behind", ARG_SPECIAL
},
6335 { OP_PUSH_LOOK_BEHIND_NOT
, "push-look-behind-not", ARG_SPECIAL
},
6336 { OP_FAIL_LOOK_BEHIND_NOT
, "fail-look-behind-not", ARG_NON
},
6337 { OP_PUSH_ABSENT_POS
, "push-absent-pos", ARG_NON
},
6338 { OP_ABSENT
, "absent", ARG_RELADDR
},
6339 { OP_ABSENT_END
, "absent-end", ARG_NON
},
6340 { OP_CALL
, "call", ARG_ABSADDR
},
6341 { OP_RETURN
, "return", ARG_NON
},
6342 { OP_CONDITION
, "condition", ARG_SPECIAL
},
6343 { OP_STATE_CHECK_PUSH
, "state-check-push", ARG_SPECIAL
},
6344 { OP_STATE_CHECK_PUSH_OR_JUMP
, "state-check-push-or-jump", ARG_SPECIAL
},
6345 { OP_STATE_CHECK
, "state-check", ARG_STATE_CHECK
},
6346 { OP_STATE_CHECK_ANYCHAR_STAR
, "state-check-anychar*", ARG_STATE_CHECK
},
6347 { OP_STATE_CHECK_ANYCHAR_ML_STAR
,
6348 "state-check-anychar-ml*", ARG_STATE_CHECK
},
6357 for (i
= 0; OnigOpInfo
[i
].opcode
>= 0; i
++) {
6358 if (opcode
== OnigOpInfo
[i
].opcode
)
6359 return OnigOpInfo
[i
].name
;
6365 op2arg_type(int opcode
)
6369 for (i
= 0; OnigOpInfo
[i
].opcode
>= 0; i
++) {
6370 if (opcode
== OnigOpInfo
[i
].opcode
)
6371 return OnigOpInfo
[i
].arg_type
;
6376 # ifdef ONIG_DEBUG_PARSE_TREE
6378 Indent(FILE* f
, int indent
)
6381 for (i
= 0; i
< indent
; i
++) putc(' ', f
);
6383 # endif /* ONIG_DEBUG_PARSE_TREE */
6386 p_string(FILE* f
, ptrdiff_t len
, UChar
* s
)
6389 while (len
-- > 0) { fputc(*s
++, f
); }
6393 p_len_string(FILE* f
, LengthType len
, int mb_len
, UChar
* s
)
6395 int x
= len
* mb_len
;
6397 fprintf(f
, ":%d:", len
);
6398 while (x
-- > 0) { fputc(*s
++, f
); }
6402 onig_print_compiled_byte_code(FILE* f
, UChar
* bp
, UChar
* bpend
, UChar
** nextp
,
6409 StateCheckNumType scn
;
6413 fprintf(f
, "[%s", op2name(*bp
));
6414 arg_type
= op2arg_type(*bp
);
6415 if (arg_type
!= ARG_SPECIAL
) {
6421 GET_RELADDR_INC(addr
, bp
);
6422 fprintf(f
, ":(%s%d)", (addr
>= 0) ? "+" : "", addr
);
6425 GET_ABSADDR_INC(addr
, bp
);
6426 fprintf(f
, ":(%d)", addr
);
6429 GET_LENGTH_INC(len
, bp
);
6430 fprintf(f
, ":%d", len
);
6433 mem
= *((MemNumType
* )bp
);
6435 fprintf(f
, ":%d", mem
);
6439 OnigOptionType option
= *((OnigOptionType
* )bp
);
6441 fprintf(f
, ":%d", option
);
6445 case ARG_STATE_CHECK
:
6446 scn
= *((StateCheckNumType
* )bp
);
6447 bp
+= SIZE_STATE_CHECK_NUM
;
6448 fprintf(f
, ":%d", scn
);
6455 case OP_ANYCHAR_STAR_PEEK_NEXT
:
6456 case OP_ANYCHAR_ML_STAR_PEEK_NEXT
:
6457 p_string(f
, 1, bp
++); break;
6459 p_string(f
, 2, bp
); bp
+= 2; break;
6461 p_string(f
, 3, bp
); bp
+= 3; break;
6463 p_string(f
, 4, bp
); bp
+= 4; break;
6465 p_string(f
, 5, bp
); bp
+= 5; break;
6467 GET_LENGTH_INC(len
, bp
);
6468 p_len_string(f
, len
, 1, bp
);
6473 p_string(f
, 2, bp
); bp
+= 2; break;
6475 p_string(f
, 4, bp
); bp
+= 4; break;
6477 p_string(f
, 6, bp
); bp
+= 6; break;
6479 GET_LENGTH_INC(len
, bp
);
6480 p_len_string(f
, len
, 2, bp
);
6484 GET_LENGTH_INC(len
, bp
);
6485 p_len_string(f
, len
, 3, bp
);
6492 GET_LENGTH_INC(mb_len
, bp
);
6493 GET_LENGTH_INC(len
, bp
);
6494 fprintf(f
, ":%d:%d:", mb_len
, len
);
6496 while (n
-- > 0) { fputc(*bp
++, f
); }
6501 len
= enclen(enc
, bp
, bpend
);
6502 p_string(f
, len
, bp
);
6506 GET_LENGTH_INC(len
, bp
);
6507 p_len_string(f
, len
, 1, bp
);
6512 n
= bitset_on_num((BitSetRef
)bp
);
6514 fprintf(f
, ":%d", n
);
6518 n
= bitset_on_num((BitSetRef
)bp
);
6520 fprintf(f
, ":%d", n
);
6524 case OP_CCLASS_MB_NOT
:
6525 GET_LENGTH_INC(len
, bp
);
6527 # ifndef PLATFORM_UNALIGNED_WORD_ACCESS
6530 GET_CODE_POINT(code
, q
);
6532 fprintf(f
, ":%d:%d", (int )code
, len
);
6536 case OP_CCLASS_MIX_NOT
:
6537 n
= bitset_on_num((BitSetRef
)bp
);
6539 GET_LENGTH_INC(len
, bp
);
6541 # ifndef PLATFORM_UNALIGNED_WORD_ACCESS
6544 GET_CODE_POINT(code
, q
);
6546 fprintf(f
, ":%d:%d:%d", n
, (int )code
, len
);
6549 case OP_BACKREFN_IC
:
6550 mem
= *((MemNumType
* )bp
);
6552 fprintf(f
, ":%d", mem
);
6555 case OP_BACKREF_MULTI_IC
:
6556 case OP_BACKREF_MULTI
:
6558 GET_LENGTH_INC(len
, bp
);
6559 for (i
= 0; i
< len
; i
++) {
6560 GET_MEMNUM_INC(mem
, bp
);
6561 if (i
> 0) fputs(", ", f
);
6562 fprintf(f
, "%d", mem
);
6566 case OP_BACKREF_WITH_LEVEL
:
6568 OnigOptionType option
;
6571 GET_OPTION_INC(option
, bp
);
6572 fprintf(f
, ":%d", option
);
6573 GET_LENGTH_INC(level
, bp
);
6574 fprintf(f
, ":%d", level
);
6577 GET_LENGTH_INC(len
, bp
);
6578 for (i
= 0; i
< len
; i
++) {
6579 GET_MEMNUM_INC(mem
, bp
);
6580 if (i
> 0) fputs(", ", f
);
6581 fprintf(f
, "%d", mem
);
6589 mem
= *((MemNumType
* )bp
);
6591 addr
= *((RelAddrType
* )bp
);
6593 fprintf(f
, ":%d:%d", mem
, addr
);
6597 case OP_PUSH_OR_JUMP_EXACT1
:
6598 case OP_PUSH_IF_PEEK_NEXT
:
6599 addr
= *((RelAddrType
* )bp
);
6601 fprintf(f
, ":(%s%d)", (addr
>= 0) ? "+" : "", addr
);
6606 case OP_LOOK_BEHIND
:
6607 GET_LENGTH_INC(len
, bp
);
6608 fprintf(f
, ":%d", len
);
6611 case OP_PUSH_LOOK_BEHIND_NOT
:
6612 GET_RELADDR_INC(addr
, bp
);
6613 GET_LENGTH_INC(len
, bp
);
6614 fprintf(f
, ":%d:(%s%d)", len
, (addr
>= 0) ? "+" : "", addr
);
6617 case OP_STATE_CHECK_PUSH
:
6618 case OP_STATE_CHECK_PUSH_OR_JUMP
:
6619 scn
= *((StateCheckNumType
* )bp
);
6620 bp
+= SIZE_STATE_CHECK_NUM
;
6621 addr
= *((RelAddrType
* )bp
);
6623 fprintf(f
, ":%d:(%s%d)", scn
, (addr
>= 0) ? "+" : "", addr
);
6627 GET_MEMNUM_INC(mem
, bp
);
6628 GET_RELADDR_INC(addr
, bp
);
6629 fprintf(f
, ":%d:(%s%d)", mem
, (addr
>= 0) ? "+" : "", addr
);
6633 fprintf(stderr
, "onig_print_compiled_byte_code: undefined code %d\n",
6638 if (nextp
) *nextp
= bp
;
6641 # ifdef ONIG_DEBUG_COMPILE
6643 print_compiled_byte_code_list(FILE* f
, regex_t
* reg
)
6647 UChar
* end
= reg
->p
+ reg
->used
;
6649 fprintf(f
, "code length: %d", reg
->used
);
6655 fprintf(f
, "\n%ld:", bp
- reg
->p
);
6657 fprintf(f
, " %ld:", bp
- reg
->p
);
6658 onig_print_compiled_byte_code(f
, bp
, end
, &bp
, reg
->enc
);
6663 # endif /* ONIG_DEBUG_COMPILE */
6665 # ifdef ONIG_DEBUG_PARSE_TREE
6667 print_indent_tree(FILE* f
, Node
* node
, int indent
)
6669 int i
, type
, container_p
= 0;
6674 if (IS_NULL(node
)) {
6675 fprintf(f
, "ERROR: null node!!!\n");
6683 if (NTYPE(node
) == NT_LIST
)
6684 fprintf(f
, "<list:%"PRIxPTR
">\n", (intptr_t )node
);
6686 fprintf(f
, "<alt:%"PRIxPTR
">\n", (intptr_t )node
);
6688 print_indent_tree(f
, NCAR(node
), indent
+ add
);
6689 while (IS_NOT_NULL(node
= NCDR(node
))) {
6690 if (NTYPE(node
) != type
) {
6691 fprintf(f
, "ERROR: list/alt right is not a cons. %d\n", NTYPE(node
));
6694 print_indent_tree(f
, NCAR(node
), indent
+ add
);
6699 fprintf(f
, "<string%s:%"PRIxPTR
">",
6700 (NSTRING_IS_RAW(node
) ? "-raw" : ""), (intptr_t )node
);
6701 for (p
= NSTR(node
)->s
; p
< NSTR(node
)->end
; p
++) {
6702 if (*p
>= 0x20 && *p
< 0x7f)
6705 fprintf(f
, " 0x%02x", *p
);
6711 fprintf(f
, "<cclass:%"PRIxPTR
">", (intptr_t )node
);
6712 if (IS_NCCLASS_NOT(NCCLASS(node
))) fputs("not ", f
);
6713 if (NCCLASS(node
)->mbuf
) {
6714 BBuf
* bbuf
= NCCLASS(node
)->mbuf
;
6715 OnigCodePoint
* data
= (OnigCodePoint
* )bbuf
->p
;
6716 OnigCodePoint
* end
= (OnigCodePoint
* )(bbuf
->p
+ bbuf
->used
);
6717 fprintf(f
, "%d", *data
++);
6718 for (; data
< end
; data
+=2) {
6720 fprintf(f
, "%04x-%04x", data
[0], data
[1]);
6726 fprintf(f
, "<ctype:%"PRIxPTR
"> ", (intptr_t )node
);
6727 switch (NCTYPE(node
)->ctype
) {
6728 case ONIGENC_CTYPE_WORD
:
6729 if (NCTYPE(node
)->not != 0)
6730 fputs("not word", f
);
6736 fprintf(f
, "ERROR: undefined ctype.\n");
6742 fprintf(f
, "<anychar:%"PRIxPTR
">", (intptr_t )node
);
6746 fprintf(f
, "<anchor:%"PRIxPTR
"> ", (intptr_t )node
);
6747 switch (NANCHOR(node
)->type
) {
6748 case ANCHOR_BEGIN_BUF
: fputs("begin buf", f
); break;
6749 case ANCHOR_END_BUF
: fputs("end buf", f
); break;
6750 case ANCHOR_BEGIN_LINE
: fputs("begin line", f
); break;
6751 case ANCHOR_END_LINE
: fputs("end line", f
); break;
6752 case ANCHOR_SEMI_END_BUF
: fputs("semi end buf", f
); break;
6753 case ANCHOR_BEGIN_POSITION
: fputs("begin position", f
); break;
6755 case ANCHOR_WORD_BOUND
: fputs("word bound", f
); break;
6756 case ANCHOR_NOT_WORD_BOUND
: fputs("not word bound", f
); break;
6757 # ifdef USE_WORD_BEGIN_END
6758 case ANCHOR_WORD_BEGIN
: fputs("word begin", f
); break;
6759 case ANCHOR_WORD_END
: fputs("word end", f
); break;
6761 case ANCHOR_PREC_READ
: fputs("prec read", f
); container_p
= TRUE
; break;
6762 case ANCHOR_PREC_READ_NOT
: fputs("prec read not", f
); container_p
= TRUE
; break;
6763 case ANCHOR_LOOK_BEHIND
: fputs("look_behind", f
); container_p
= TRUE
; break;
6764 case ANCHOR_LOOK_BEHIND_NOT
: fputs("look_behind_not",f
); container_p
= TRUE
; break;
6765 case ANCHOR_KEEP
: fputs("keep",f
); break;
6768 fprintf(f
, "ERROR: undefined anchor type.\n");
6776 BRefNode
* br
= NBREF(node
);
6778 fprintf(f
, "<backref:%"PRIxPTR
">", (intptr_t )node
);
6779 for (i
= 0; i
< br
->back_num
; i
++) {
6780 if (i
> 0) fputs(", ", f
);
6781 fprintf(f
, "%d", p
[i
]);
6786 # ifdef USE_SUBEXP_CALL
6789 CallNode
* cn
= NCALL(node
);
6790 fprintf(f
, "<call:%"PRIxPTR
">", (intptr_t )node
);
6791 p_string(f
, cn
->name_end
- cn
->name
, cn
->name
);
6797 fprintf(f
, "<quantifier:%"PRIxPTR
">{%d,%d}%s\n", (intptr_t )node
,
6798 NQTFR(node
)->lower
, NQTFR(node
)->upper
,
6799 (NQTFR(node
)->greedy
? "" : "?"));
6800 print_indent_tree(f
, NQTFR(node
)->target
, indent
+ add
);
6804 fprintf(f
, "<enclose:%"PRIxPTR
"> ", (intptr_t )node
);
6805 switch (NENCLOSE(node
)->type
) {
6806 case ENCLOSE_OPTION
:
6807 fprintf(f
, "option:%d", NENCLOSE(node
)->option
);
6809 case ENCLOSE_MEMORY
:
6810 fprintf(f
, "memory:%d", NENCLOSE(node
)->regnum
);
6812 case ENCLOSE_STOP_BACKTRACK
:
6813 fprintf(f
, "stop-bt");
6815 case ENCLOSE_CONDITION
:
6816 fprintf(f
, "condition:%d", NENCLOSE(node
)->regnum
);
6818 case ENCLOSE_ABSENT
:
6819 fprintf(f
, "absent");
6826 print_indent_tree(f
, NENCLOSE(node
)->target
, indent
+ add
);
6830 fprintf(f
, "print_indent_tree: undefined node type %d\n", NTYPE(node
));
6834 if (type
!= NT_LIST
&& type
!= NT_ALT
&& type
!= NT_QTFR
&&
6838 if (container_p
) print_indent_tree(f
, NANCHOR(node
)->target
, indent
+ add
);
6844 print_tree(FILE* f
, Node
* node
)
6846 print_indent_tree(f
, node
, 0);
6848 # endif /* ONIG_DEBUG_PARSE_TREE */
6849 #endif /* ONIG_DEBUG */