1 /**********************************************************************
2 regexec.c - Onigmo (Oniguruma-mod) (regular expression library)
3 **********************************************************************/
5 * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
6 * Copyright (c) 2011-2016 K.Takata <kentkt AT csc DOT jp>
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 # undef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
36 # define USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
39 #ifndef USE_TOKEN_THREADED_VM
41 # define USE_TOKEN_THREADED_VM 1
43 # define USE_TOKEN_THREADED_VM 0
48 # define ENC_DUMMY_FLAG (1<<24)
50 rb_enc_asciicompat(OnigEncoding enc
)
52 return ONIGENC_MBC_MINLEN(enc
)==1 && !((enc
)->ruby_encoding_index
& ENC_DUMMY_FLAG
);
54 # undef ONIGENC_IS_MBC_ASCII_WORD
55 # define ONIGENC_IS_MBC_ASCII_WORD(enc,s,end) \
56 (rb_enc_asciicompat(enc) ? (ISALNUM(*s) || *s=='_') : \
57 onigenc_ascii_is_code_ctype( \
58 ONIGENC_MBC_TO_CODE(enc,s,end),ONIGENC_CTYPE_WORD,enc))
61 #ifdef USE_CRNL_AS_LINE_TERMINATOR
62 # define ONIGENC_IS_MBC_CRNL(enc,p,end) \
63 (ONIGENC_MBC_TO_CODE(enc,p,end) == 13 && \
64 ONIGENC_MBC_TO_CODE(enc,(p+enclen(enc,p,end)),end) == 10)
65 # define ONIGENC_IS_MBC_NEWLINE_EX(enc,p,start,end,option,check_prev) \
66 is_mbc_newline_ex((enc),(p),(start),(end),(option),(check_prev))
68 is_mbc_newline_ex(OnigEncoding enc
, const UChar
*p
, const UChar
*start
,
69 const UChar
*end
, OnigOptionType option
, int check_prev
)
71 if (IS_NEWLINE_CRLF(option
)) {
72 if (ONIGENC_MBC_TO_CODE(enc
, p
, end
) == 0x0a) {
74 const UChar
*prev
= onigenc_get_prev_char_head(enc
, start
, p
, end
);
75 if ((prev
!= NULL
) && ONIGENC_MBC_TO_CODE(enc
, prev
, end
) == 0x0d)
84 const UChar
*pnext
= p
+ enclen(enc
, p
, end
);
86 ONIGENC_MBC_TO_CODE(enc
, p
, end
) == 0x0d &&
87 ONIGENC_MBC_TO_CODE(enc
, pnext
, end
) == 0x0a)
89 if (ONIGENC_IS_MBC_NEWLINE(enc
, p
, end
))
95 return ONIGENC_IS_MBC_NEWLINE(enc
, p
, end
);
98 #else /* USE_CRNL_AS_LINE_TERMINATOR */
99 # define ONIGENC_IS_MBC_NEWLINE_EX(enc,p,start,end,option,check_prev) \
100 ONIGENC_IS_MBC_NEWLINE((enc), (p), (end))
101 #endif /* USE_CRNL_AS_LINE_TERMINATOR */
103 #ifdef USE_CAPTURE_HISTORY
104 static void history_tree_free(OnigCaptureTreeNode
* node
);
107 history_tree_clear(OnigCaptureTreeNode
* node
)
111 if (IS_NOT_NULL(node
)) {
112 for (i
= 0; i
< node
->num_childs
; i
++) {
113 if (IS_NOT_NULL(node
->childs
[i
])) {
114 history_tree_free(node
->childs
[i
]);
117 for (i
= 0; i
< node
->allocated
; i
++) {
118 node
->childs
[i
] = (OnigCaptureTreeNode
* )0;
120 node
->num_childs
= 0;
121 node
->beg
= ONIG_REGION_NOTPOS
;
122 node
->end
= ONIG_REGION_NOTPOS
;
125 node
->childs
= (OnigCaptureTreeNode
** )0;
130 history_tree_free(OnigCaptureTreeNode
* node
)
132 history_tree_clear(node
);
137 history_root_free(OnigRegion
* r
)
139 if (IS_NOT_NULL(r
->history_root
)) {
140 history_tree_free(r
->history_root
);
141 r
->history_root
= (OnigCaptureTreeNode
* )0;
145 static OnigCaptureTreeNode
*
146 history_node_new(void)
148 OnigCaptureTreeNode
* node
;
150 node
= (OnigCaptureTreeNode
* )xmalloc(sizeof(OnigCaptureTreeNode
));
151 CHECK_NULL_RETURN(node
);
152 node
->childs
= (OnigCaptureTreeNode
** )0;
154 node
->num_childs
= 0;
156 node
->beg
= ONIG_REGION_NOTPOS
;
157 node
->end
= ONIG_REGION_NOTPOS
;
163 history_tree_add_child(OnigCaptureTreeNode
* parent
, OnigCaptureTreeNode
* child
)
165 # define HISTORY_TREE_INIT_ALLOC_SIZE 8
167 if (parent
->num_childs
>= parent
->allocated
) {
170 if (IS_NULL(parent
->childs
)) {
171 n
= HISTORY_TREE_INIT_ALLOC_SIZE
;
173 (OnigCaptureTreeNode
** )xmalloc(sizeof(OnigCaptureTreeNode
*) * n
);
174 CHECK_NULL_RETURN_MEMERR(parent
->childs
);
177 OnigCaptureTreeNode
** tmp
;
178 n
= parent
->allocated
* 2;
180 (OnigCaptureTreeNode
** )xrealloc(parent
->childs
,
181 sizeof(OnigCaptureTreeNode
*) * n
);
183 history_tree_clear(parent
);
184 return ONIGERR_MEMORY
;
186 parent
->childs
= tmp
;
188 for (i
= parent
->allocated
; i
< n
; i
++) {
189 parent
->childs
[i
] = (OnigCaptureTreeNode
* )0;
191 parent
->allocated
= n
;
194 parent
->childs
[parent
->num_childs
] = child
;
195 parent
->num_childs
++;
199 static OnigCaptureTreeNode
*
200 history_tree_clone(OnigCaptureTreeNode
* node
)
203 OnigCaptureTreeNode
*clone
, *child
;
205 clone
= history_node_new();
206 CHECK_NULL_RETURN(clone
);
208 clone
->beg
= node
->beg
;
209 clone
->end
= node
->end
;
210 for (i
= 0; i
< node
->num_childs
; i
++) {
211 child
= history_tree_clone(node
->childs
[i
]);
212 if (IS_NULL(child
)) {
213 history_tree_free(clone
);
214 return (OnigCaptureTreeNode
* )0;
216 r
= history_tree_add_child(clone
, child
);
218 history_tree_free(child
);
219 history_tree_free(clone
);
220 return (OnigCaptureTreeNode
* )0;
227 extern OnigCaptureTreeNode
*
228 onig_get_capture_tree(OnigRegion
* region
)
230 return region
->history_root
;
232 #endif /* USE_CAPTURE_HISTORY */
235 onig_region_clear(OnigRegion
* region
)
239 for (i
= 0; i
< region
->num_regs
; i
++) {
240 region
->beg
[i
] = region
->end
[i
] = ONIG_REGION_NOTPOS
;
242 #ifdef USE_CAPTURE_HISTORY
243 history_root_free(region
);
248 onig_region_resize(OnigRegion
* region
, int n
)
250 region
->num_regs
= n
;
252 if (n
< ONIG_NREGION
)
255 if (region
->allocated
== 0) {
256 region
->beg
= (OnigPosition
* )xmalloc(n
* sizeof(OnigPosition
));
257 if (region
->beg
== 0)
258 return ONIGERR_MEMORY
;
260 region
->end
= (OnigPosition
* )xmalloc(n
* sizeof(OnigPosition
));
261 if (region
->end
== 0) {
263 return ONIGERR_MEMORY
;
266 region
->allocated
= n
;
268 else if (region
->allocated
< n
) {
271 region
->allocated
= 0;
272 tmp
= (OnigPosition
* )xrealloc(region
->beg
, n
* sizeof(OnigPosition
));
276 return ONIGERR_MEMORY
;
279 tmp
= (OnigPosition
* )xrealloc(region
->end
, n
* sizeof(OnigPosition
));
283 return ONIGERR_MEMORY
;
287 region
->allocated
= n
;
294 onig_region_resize_clear(OnigRegion
* region
, int n
)
298 r
= onig_region_resize(region
, n
);
299 if (r
!= 0) return r
;
300 onig_region_clear(region
);
305 onig_region_set(OnigRegion
* region
, int at
, int beg
, int end
)
307 if (at
< 0) return ONIGERR_INVALID_ARGUMENT
;
309 if (at
>= region
->allocated
) {
310 int r
= onig_region_resize(region
, at
+ 1);
314 region
->beg
[at
] = beg
;
315 region
->end
[at
] = end
;
320 onig_region_init(OnigRegion
* region
)
322 region
->num_regs
= 0;
323 region
->allocated
= 0;
324 region
->beg
= (OnigPosition
* )0;
325 region
->end
= (OnigPosition
* )0;
326 #ifdef USE_CAPTURE_HISTORY
327 region
->history_root
= (OnigCaptureTreeNode
* )0;
332 onig_region_new(void)
336 r
= (OnigRegion
* )xmalloc(sizeof(OnigRegion
));
343 onig_region_free(OnigRegion
* r
, int free_self
)
346 if (r
->allocated
> 0) {
347 if (r
->beg
) xfree(r
->beg
);
348 if (r
->end
) xfree(r
->end
);
351 #ifdef USE_CAPTURE_HISTORY
352 history_root_free(r
);
354 if (free_self
) xfree(r
);
359 onig_region_copy(OnigRegion
* to
, const OnigRegion
* from
)
361 #define RREGC_SIZE (sizeof(int) * from->num_regs)
364 if (to
== from
) return;
366 r
= onig_region_resize(to
, from
->num_regs
);
369 for (i
= 0; i
< from
->num_regs
; i
++) {
370 to
->beg
[i
] = from
->beg
[i
];
371 to
->end
[i
] = from
->end
[i
];
373 to
->num_regs
= from
->num_regs
;
375 #ifdef USE_CAPTURE_HISTORY
376 history_root_free(to
);
378 if (IS_NOT_NULL(from
->history_root
)) {
379 to
->history_root
= history_tree_clone(from
->history_root
);
386 #define INVALID_STACK_INDEX -1
389 /* used by normal-POP */
390 #define STK_ALT 0x0001
391 #define STK_LOOK_BEHIND_NOT 0x0002
392 #define STK_POS_NOT 0x0003
393 /* handled by normal-POP */
394 #define STK_MEM_START 0x0100
395 #define STK_MEM_END 0x8200
396 #define STK_REPEAT_INC 0x0300
397 #define STK_STATE_CHECK_MARK 0x1000
398 /* avoided by normal-POP */
399 #define STK_NULL_CHECK_START 0x3000
400 #define STK_NULL_CHECK_END 0x5000 /* for recursive call */
401 #define STK_MEM_END_MARK 0x8400
402 #define STK_POS 0x0500 /* used when POP-POS */
403 #define STK_STOP_BT 0x0600 /* mark for "(?>...)" */
404 #define STK_REPEAT 0x0700
405 #define STK_CALL_FRAME 0x0800
406 #define STK_RETURN 0x0900
407 #define STK_VOID 0x0a00 /* for fill a blank */
408 #define STK_ABSENT_POS 0x0b00 /* for absent */
409 #define STK_ABSENT 0x0c00 /* absent inner loop marker */
411 /* stack type check mask */
412 #define STK_MASK_POP_USED 0x00ff
413 #define STK_MASK_TO_VOID_TARGET 0x10ff
414 #define STK_MASK_MEM_END_OR_MARK 0x8000 /* MEM_END or MEM_END_MARK */
416 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
417 # define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start, arg_gpos) do {\
418 (msa).stack_p = (void* )0;\
419 (msa).options = (arg_option);\
420 (msa).region = (arg_region);\
421 (msa).start = (arg_start);\
422 (msa).gpos = (arg_gpos);\
423 (msa).best_len = ONIG_MISMATCH;\
426 # define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start, arg_gpos) do {\
427 (msa).stack_p = (void* )0;\
428 (msa).options = (arg_option);\
429 (msa).region = (arg_region);\
430 (msa).start = (arg_start);\
431 (msa).gpos = (arg_gpos);\
435 #ifdef USE_COMBINATION_EXPLOSION_CHECK
437 # define STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE 16
439 # define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) do { \
440 if ((state_num) > 0 && str_len >= STATE_CHECK_STRING_THRESHOLD_LEN) {\
441 unsigned int size = (unsigned int )(((str_len) + 1) * (state_num) + 7) >> 3;\
442 offset = ((offset) * (state_num)) >> 3;\
443 if (size > 0 && offset < size && size < STATE_CHECK_BUFF_MAX_SIZE) {\
444 if (size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) {\
445 (msa).state_check_buff = (void* )xmalloc(size);\
446 CHECK_NULL_RETURN_MEMERR((msa).state_check_buff);\
449 (msa).state_check_buff = (void* )xalloca(size);\
450 xmemset(((char* )((msa).state_check_buff)+(offset)), 0, \
451 (size_t )(size - (offset))); \
452 (msa).state_check_buff_size = size;\
455 (msa).state_check_buff = (void* )0;\
456 (msa).state_check_buff_size = 0;\
460 (msa).state_check_buff = (void* )0;\
461 (msa).state_check_buff_size = 0;\
465 # define MATCH_ARG_FREE(msa) do {\
466 if ((msa).stack_p) xfree((msa).stack_p);\
467 if ((msa).state_check_buff_size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) { \
468 if ((msa).state_check_buff) xfree((msa).state_check_buff);\
471 #else /* USE_COMBINATION_EXPLOSION_CHECK */
472 # define MATCH_ARG_FREE(msa) if ((msa).stack_p) xfree((msa).stack_p)
473 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
477 #define MAX_PTR_NUM 100
479 #define STACK_INIT(alloc_addr, heap_addr, ptr_num, stack_num) do {\
480 if (ptr_num > MAX_PTR_NUM) {\
481 alloc_addr = (char* )xmalloc(sizeof(OnigStackIndex) * (ptr_num));\
482 heap_addr = alloc_addr;\
484 stk_alloc = (OnigStackType* )(msa->stack_p);\
485 stk_base = stk_alloc;\
487 stk_end = stk_base + msa->stack_n;\
489 stk_alloc = (OnigStackType* )xalloca(sizeof(OnigStackType) * (stack_num));\
490 stk_base = stk_alloc;\
492 stk_end = stk_base + (stack_num);\
494 } else if (msa->stack_p) {\
495 alloc_addr = (char* )xalloca(sizeof(OnigStackIndex) * (ptr_num));\
497 stk_alloc = (OnigStackType* )(msa->stack_p);\
498 stk_base = stk_alloc;\
500 stk_end = stk_base + msa->stack_n;\
503 alloc_addr = (char* )xalloca(sizeof(OnigStackIndex) * (ptr_num)\
504 + sizeof(OnigStackType) * (stack_num));\
506 stk_alloc = (OnigStackType* )(alloc_addr + sizeof(OnigStackIndex) * (ptr_num));\
507 stk_base = stk_alloc;\
509 stk_end = stk_base + (stack_num);\
513 #define STACK_SAVE do{\
514 if (stk_base != stk_alloc) {\
515 msa->stack_p = stk_base;\
516 msa->stack_n = stk_end - stk_base; /* TODO: check overflow */\
520 static unsigned int MatchStackLimitSize
= DEFAULT_MATCH_STACK_LIMIT_SIZE
;
523 onig_get_match_stack_limit_size(void)
525 return MatchStackLimitSize
;
529 onig_set_match_stack_limit_size(unsigned int size
)
531 MatchStackLimitSize
= size
;
536 stack_double(OnigStackType
** arg_stk_base
, OnigStackType
** arg_stk_end
,
537 OnigStackType
** arg_stk
, OnigStackType
* stk_alloc
, OnigMatchArg
* msa
)
540 OnigStackType
*x
, *stk_base
, *stk_end
, *stk
;
542 stk_base
= *arg_stk_base
;
543 stk_end
= *arg_stk_end
;
546 n
= stk_end
- stk_base
;
547 if (stk_base
== stk_alloc
&& IS_NULL(msa
->stack_p
)) {
548 x
= (OnigStackType
* )xmalloc(sizeof(OnigStackType
) * n
* 2);
551 return ONIGERR_MEMORY
;
553 xmemcpy(x
, stk_base
, n
* sizeof(OnigStackType
));
557 unsigned int limit_size
= MatchStackLimitSize
;
559 if (limit_size
!= 0 && n
> limit_size
) {
560 if ((unsigned int )(stk_end
- stk_base
) == limit_size
)
561 return ONIGERR_MATCH_STACK_LIMIT_OVER
;
565 x
= (OnigStackType
* )xrealloc(stk_base
, sizeof(OnigStackType
) * n
);
568 return ONIGERR_MEMORY
;
571 *arg_stk
= x
+ (stk
- stk_base
);
573 *arg_stk_end
= x
+ n
;
577 #define STACK_ENSURE(n) do {\
578 if (stk_end - stk < (n)) {\
579 int r = stack_double(&stk_base, &stk_end, &stk, stk_alloc, msa);\
582 if (xmalloc_base) xfree(xmalloc_base);\
588 #define STACK_AT(index) (stk_base + (index))
589 #define GET_STACK_INDEX(stk) ((stk) - stk_base)
591 #define STACK_PUSH_TYPE(stack_type) do {\
593 stk->type = (stack_type);\
597 #define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0)
599 #ifdef USE_COMBINATION_EXPLOSION_CHECK
600 # define STATE_CHECK_POS(s,snum) \
601 (((s) - str) * num_comb_exp_check + ((snum) - 1))
602 # define STATE_CHECK_VAL(v,snum) do {\
603 if (state_check_buff != NULL) {\
604 ptrdiff_t x = STATE_CHECK_POS(s,snum);\
605 (v) = state_check_buff[x/8] & (1<<(x%8));\
611 # define ELSE_IF_STATE_CHECK_MARK(stk) \
612 else if ((stk)->type == STK_STATE_CHECK_MARK) { \
613 ptrdiff_t x = STATE_CHECK_POS(stk->u.state.pstr, stk->u.state.state_check);\
614 state_check_buff[x/8] |= (1<<(x%8)); \
617 # define STACK_PUSH(stack_type,pat,s,sprev,keep) do {\
619 stk->type = (stack_type);\
620 stk->u.state.pcode = (pat);\
621 stk->u.state.pstr = (s);\
622 stk->u.state.pstr_prev = (sprev);\
623 stk->u.state.state_check = 0;\
624 stk->u.state.pkeep = (keep);\
628 # define STACK_PUSH_ENSURED(stack_type,pat) do {\
629 stk->type = (stack_type);\
630 stk->u.state.pcode = (pat);\
631 stk->u.state.state_check = 0;\
635 # define STACK_PUSH_ALT_WITH_STATE_CHECK(pat,s,sprev,snum,keep) do {\
637 stk->type = STK_ALT;\
638 stk->u.state.pcode = (pat);\
639 stk->u.state.pstr = (s);\
640 stk->u.state.pstr_prev = (sprev);\
641 stk->u.state.state_check = ((state_check_buff != NULL) ? (snum) : 0);\
642 stk->u.state.pkeep = (keep);\
646 # define STACK_PUSH_STATE_CHECK(s,snum) do {\
647 if (state_check_buff != NULL) {\
649 stk->type = STK_STATE_CHECK_MARK;\
650 stk->u.state.pstr = (s);\
651 stk->u.state.state_check = (snum);\
656 #else /* USE_COMBINATION_EXPLOSION_CHECK */
658 # define ELSE_IF_STATE_CHECK_MARK(stk)
660 # define STACK_PUSH(stack_type,pat,s,sprev,keep) do {\
662 stk->type = (stack_type);\
663 stk->u.state.pcode = (pat);\
664 stk->u.state.pstr = (s);\
665 stk->u.state.pstr_prev = (sprev);\
666 stk->u.state.pkeep = (keep);\
670 # define STACK_PUSH_ENSURED(stack_type,pat) do {\
671 stk->type = (stack_type);\
672 stk->u.state.pcode = (pat);\
675 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
677 #define STACK_PUSH_ALT(pat,s,sprev,keep) STACK_PUSH(STK_ALT,pat,s,sprev,keep)
678 #define STACK_PUSH_POS(s,sprev,keep) STACK_PUSH(STK_POS,NULL_UCHARP,s,sprev,keep)
679 #define STACK_PUSH_POS_NOT(pat,s,sprev,keep) STACK_PUSH(STK_POS_NOT,pat,s,sprev,keep)
680 #define STACK_PUSH_ABSENT STACK_PUSH_TYPE(STK_ABSENT)
681 #define STACK_PUSH_STOP_BT STACK_PUSH_TYPE(STK_STOP_BT)
682 #define STACK_PUSH_LOOK_BEHIND_NOT(pat,s,sprev,keep) \
683 STACK_PUSH(STK_LOOK_BEHIND_NOT,pat,s,sprev,keep)
685 #define STACK_PUSH_REPEAT(id, pat) do {\
687 stk->type = STK_REPEAT;\
688 stk->u.repeat.num = (id);\
689 stk->u.repeat.pcode = (pat);\
690 stk->u.repeat.count = 0;\
694 #define STACK_PUSH_REPEAT_INC(sindex) do {\
696 stk->type = STK_REPEAT_INC;\
697 stk->u.repeat_inc.si = (sindex);\
701 #define STACK_PUSH_MEM_START(mnum, s) do {\
703 stk->type = STK_MEM_START;\
704 stk->u.mem.num = (mnum);\
705 stk->u.mem.pstr = (s);\
706 stk->u.mem.start = mem_start_stk[mnum];\
707 stk->u.mem.end = mem_end_stk[mnum];\
708 mem_start_stk[mnum] = GET_STACK_INDEX(stk);\
709 mem_end_stk[mnum] = INVALID_STACK_INDEX;\
713 #define STACK_PUSH_MEM_END(mnum, s) do {\
715 stk->type = STK_MEM_END;\
716 stk->u.mem.num = (mnum);\
717 stk->u.mem.pstr = (s);\
718 stk->u.mem.start = mem_start_stk[mnum];\
719 stk->u.mem.end = mem_end_stk[mnum];\
720 mem_end_stk[mnum] = GET_STACK_INDEX(stk);\
724 #define STACK_PUSH_MEM_END_MARK(mnum) do {\
726 stk->type = STK_MEM_END_MARK;\
727 stk->u.mem.num = (mnum);\
731 #define STACK_GET_MEM_START(mnum, k) do {\
734 while (k > stk_base) {\
736 if ((k->type & STK_MASK_MEM_END_OR_MARK) != 0 \
737 && k->u.mem.num == (mnum)) {\
740 else if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
741 if (level == 0) break;\
747 #define STACK_GET_MEM_RANGE(k, mnum, start, end) do {\
750 if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
751 if (level == 0) (start) = k->u.mem.pstr;\
754 else if (k->type == STK_MEM_END && k->u.mem.num == (mnum)) {\
757 (end) = k->u.mem.pstr;\
765 #define STACK_PUSH_NULL_CHECK_START(cnum, s) do {\
767 stk->type = STK_NULL_CHECK_START;\
768 stk->u.null_check.num = (cnum);\
769 stk->u.null_check.pstr = (s);\
773 #define STACK_PUSH_NULL_CHECK_END(cnum) do {\
775 stk->type = STK_NULL_CHECK_END;\
776 stk->u.null_check.num = (cnum);\
780 #define STACK_PUSH_CALL_FRAME(pat) do {\
782 stk->type = STK_CALL_FRAME;\
783 stk->u.call_frame.ret_addr = (pat);\
787 #define STACK_PUSH_RETURN do {\
789 stk->type = STK_RETURN;\
793 #define STACK_PUSH_ABSENT_POS(start, end) do {\
795 stk->type = STK_ABSENT_POS;\
796 stk->u.absent_pos.abs_pstr = (start);\
797 stk->u.absent_pos.end_pstr = (end);\
803 # define STACK_BASE_CHECK(p, at) \
804 if ((p) < stk_base) {\
805 fprintf(stderr, "at %s\n", at);\
809 # define STACK_BASE_CHECK(p, at)
812 #define STACK_POP_ONE do {\
814 STACK_BASE_CHECK(stk, "STACK_POP_ONE"); \
817 #define STACK_POP do {\
818 switch (pop_level) {\
819 case STACK_POP_LEVEL_FREE:\
822 STACK_BASE_CHECK(stk, "STACK_POP"); \
823 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
824 ELSE_IF_STATE_CHECK_MARK(stk);\
827 case STACK_POP_LEVEL_MEM_START:\
830 STACK_BASE_CHECK(stk, "STACK_POP 2"); \
831 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
832 else if (stk->type == STK_MEM_START) {\
833 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
834 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
836 ELSE_IF_STATE_CHECK_MARK(stk);\
842 STACK_BASE_CHECK(stk, "STACK_POP 3"); \
843 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
844 else if (stk->type == STK_MEM_START) {\
845 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
846 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
848 else if (stk->type == STK_REPEAT_INC) {\
849 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
851 else if (stk->type == STK_MEM_END) {\
852 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
853 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
855 ELSE_IF_STATE_CHECK_MARK(stk);\
861 #define STACK_POP_TIL_POS_NOT do {\
864 STACK_BASE_CHECK(stk, "STACK_POP_TIL_POS_NOT"); \
865 if (stk->type == STK_POS_NOT) break;\
866 else if (stk->type == STK_MEM_START) {\
867 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
868 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
870 else if (stk->type == STK_REPEAT_INC) {\
871 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
873 else if (stk->type == STK_MEM_END) {\
874 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
875 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
877 ELSE_IF_STATE_CHECK_MARK(stk);\
881 #define STACK_POP_TIL_LOOK_BEHIND_NOT do {\
884 STACK_BASE_CHECK(stk, "STACK_POP_TIL_LOOK_BEHIND_NOT"); \
885 if (stk->type == STK_LOOK_BEHIND_NOT) break;\
886 else if (stk->type == STK_MEM_START) {\
887 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
888 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
890 else if (stk->type == STK_REPEAT_INC) {\
891 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
893 else if (stk->type == STK_MEM_END) {\
894 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
895 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
897 ELSE_IF_STATE_CHECK_MARK(stk);\
901 #define STACK_POP_TIL_ABSENT do {\
904 STACK_BASE_CHECK(stk, "STACK_POP_TIL_ABSENT"); \
905 if (stk->type == STK_ABSENT) break;\
906 else if (stk->type == STK_MEM_START) {\
907 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
908 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
910 else if (stk->type == STK_REPEAT_INC) {\
911 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
913 else if (stk->type == STK_MEM_END) {\
914 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
915 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
917 ELSE_IF_STATE_CHECK_MARK(stk);\
921 #define STACK_POP_ABSENT_POS(start, end) do {\
923 STACK_BASE_CHECK(stk, "STACK_POP_ABSENT_POS"); \
924 (start) = stk->u.absent_pos.abs_pstr;\
925 (end) = stk->u.absent_pos.end_pstr;\
928 #define STACK_POS_END(k) do {\
932 STACK_BASE_CHECK(k, "STACK_POS_END"); \
933 if (IS_TO_VOID_TARGET(k)) {\
936 else if (k->type == STK_POS) {\
943 #define STACK_STOP_BT_END do {\
944 OnigStackType *k = stk;\
947 STACK_BASE_CHECK(k, "STACK_STOP_BT_END"); \
948 if (IS_TO_VOID_TARGET(k)) {\
951 else if (k->type == STK_STOP_BT) {\
958 #define STACK_NULL_CHECK(isnull,id,s) do {\
959 OnigStackType* k = stk;\
962 STACK_BASE_CHECK(k, "STACK_NULL_CHECK"); \
963 if (k->type == STK_NULL_CHECK_START) {\
964 if (k->u.null_check.num == (id)) {\
965 (isnull) = (k->u.null_check.pstr == (s));\
972 #define STACK_NULL_CHECK_REC(isnull,id,s) do {\
974 OnigStackType* k = stk;\
977 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_REC"); \
978 if (k->type == STK_NULL_CHECK_START) {\
979 if (k->u.null_check.num == (id)) {\
981 (isnull) = (k->u.null_check.pstr == (s));\
987 else if (k->type == STK_NULL_CHECK_END) {\
993 #define STACK_NULL_CHECK_MEMST(isnull,id,s,reg) do {\
994 OnigStackType* k = stk;\
997 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST"); \
998 if (k->type == STK_NULL_CHECK_START) {\
999 if (k->u.null_check.num == (id)) {\
1000 if (k->u.null_check.pstr != (s)) {\
1008 if (k->type == STK_MEM_START) {\
1009 if (k->u.mem.end == INVALID_STACK_INDEX) {\
1010 (isnull) = 0; break;\
1012 if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
1013 endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
1015 endp = (UChar* )k->u.mem.end;\
1016 if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
1017 (isnull) = 0; break;\
1019 else if (endp != s) {\
1020 (isnull) = -1; /* empty, but position changed */ \
1032 #define STACK_NULL_CHECK_MEMST_REC(isnull,id,s,reg) do {\
1034 OnigStackType* k = stk;\
1037 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST_REC"); \
1038 if (k->type == STK_NULL_CHECK_START) {\
1039 if (k->u.null_check.num == (id)) {\
1041 if (k->u.null_check.pstr != (s)) {\
1049 if (k->type == STK_MEM_START) {\
1050 if (k->u.mem.end == INVALID_STACK_INDEX) {\
1051 (isnull) = 0; break;\
1053 if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
1054 endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
1056 endp = (UChar* )k->u.mem.end;\
1057 if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
1058 (isnull) = 0; break;\
1060 else if (endp != s) {\
1061 (isnull) = -1; /* empty, but position changed */ \
1074 else if (k->type == STK_NULL_CHECK_END) {\
1075 if (k->u.null_check.num == (id)) level++;\
1080 #define STACK_GET_REPEAT(id, k) do {\
1085 STACK_BASE_CHECK(k, "STACK_GET_REPEAT"); \
1086 if (k->type == STK_REPEAT) {\
1088 if (k->u.repeat.num == (id)) {\
1093 else if (k->type == STK_CALL_FRAME) level--;\
1094 else if (k->type == STK_RETURN) level++;\
1098 #define STACK_RETURN(addr) do {\
1100 OnigStackType* k = stk;\
1103 STACK_BASE_CHECK(k, "STACK_RETURN"); \
1104 if (k->type == STK_CALL_FRAME) {\
1106 (addr) = k->u.call_frame.ret_addr;\
1111 else if (k->type == STK_RETURN)\
1117 #define STRING_CMP(s1,s2,len) do {\
1118 while (len-- > 0) {\
1119 if (*s1++ != *s2++) goto fail;\
1123 #define STRING_CMP_IC(case_fold_flag,s1,ps2,len,text_end) do {\
1124 if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len, text_end) == 0) \
1128 static int string_cmp_ic(OnigEncoding enc
, int case_fold_flag
,
1129 UChar
* s1
, UChar
** ps2
, OnigDistance mblen
, const UChar
* text_end
)
1131 UChar buf1
[ONIGENC_MBC_CASE_FOLD_MAXLEN
];
1132 UChar buf2
[ONIGENC_MBC_CASE_FOLD_MAXLEN
];
1133 UChar
*p1
, *p2
, *end1
, *s2
;
1139 len1
= ONIGENC_MBC_CASE_FOLD(enc
, case_fold_flag
, &s1
, text_end
, buf1
);
1140 len2
= ONIGENC_MBC_CASE_FOLD(enc
, case_fold_flag
, &s2
, text_end
, buf2
);
1141 if (len1
!= len2
) return 0;
1144 while (len1
-- > 0) {
1145 if (*p1
!= *p2
) return 0;
1155 #define STRING_CMP_VALUE(s1,s2,len,is_fail) do {\
1157 while (len-- > 0) {\
1158 if (*s1++ != *s2++) {\
1159 is_fail = 1; break;\
1164 #define STRING_CMP_VALUE_IC(case_fold_flag,s1,ps2,len,text_end,is_fail) do {\
1165 if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len, text_end) == 0) \
1172 #define IS_EMPTY_STR (str == end)
1173 #define ON_STR_BEGIN(s) ((s) == str)
1174 #define ON_STR_END(s) ((s) == end)
1175 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
1176 # define DATA_ENSURE_CHECK1 (s < right_range)
1177 # define DATA_ENSURE_CHECK(n) (s + (n) <= right_range)
1178 # define DATA_ENSURE(n) if (s + (n) > right_range) goto fail
1179 # define ABSENT_END_POS right_range
1181 # define DATA_ENSURE_CHECK1 (s < end)
1182 # define DATA_ENSURE_CHECK(n) (s + (n) <= end)
1183 # define DATA_ENSURE(n) if (s + (n) > end) goto fail
1184 # define ABSENT_END_POS end
1185 #endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */
1188 #ifdef USE_CAPTURE_HISTORY
1190 make_capture_history_tree(OnigCaptureTreeNode
* node
, OnigStackType
** kp
,
1191 OnigStackType
* stk_top
, UChar
* str
, regex_t
* reg
)
1194 OnigCaptureTreeNode
* child
;
1195 OnigStackType
* k
= *kp
;
1197 while (k
< stk_top
) {
1198 if (k
->type
== STK_MEM_START
) {
1200 if (n
<= ONIG_MAX_CAPTURE_HISTORY_GROUP
&&
1201 BIT_STATUS_AT(reg
->capture_history
, n
) != 0) {
1202 child
= history_node_new();
1203 CHECK_NULL_RETURN_MEMERR(child
);
1205 child
->beg
= k
->u
.mem
.pstr
- str
;
1206 r
= history_tree_add_child(node
, child
);
1208 history_tree_free(child
);
1212 r
= make_capture_history_tree(child
, kp
, stk_top
, str
, reg
);
1213 if (r
!= 0) return r
;
1216 child
->end
= k
->u
.mem
.pstr
- str
;
1219 else if (k
->type
== STK_MEM_END
) {
1220 if (k
->u
.mem
.num
== node
->group
) {
1221 node
->end
= k
->u
.mem
.pstr
- str
;
1229 return 1; /* 1: root node ending. */
1231 #endif /* USE_CAPTURE_HISTORY */
1233 #ifdef USE_BACKREF_WITH_LEVEL
1234 static int mem_is_in_memp(int mem
, int num
, UChar
* memp
)
1239 for (i
= 0; i
< num
; i
++) {
1240 GET_MEMNUM_INC(m
, memp
);
1241 if (mem
== (int )m
) return 1;
1246 static int backref_match_at_nested_level(regex_t
* reg
,
1247 OnigStackType
* top
, OnigStackType
* stk_base
,
1248 int ignore_case
, int case_fold_flag
,
1249 int nest
, int mem_num
, UChar
* memp
, UChar
** s
, const UChar
* send
)
1251 UChar
*ss
, *p
, *pstart
, *pend
= NULL_UCHARP
;
1258 while (k
>= stk_base
) {
1259 if (k
->type
== STK_CALL_FRAME
) {
1262 else if (k
->type
== STK_RETURN
) {
1265 else if (level
== nest
) {
1266 if (k
->type
== STK_MEM_START
) {
1267 if (mem_is_in_memp(k
->u
.mem
.num
, mem_num
, memp
)) {
1268 pstart
= k
->u
.mem
.pstr
;
1269 if (pend
!= NULL_UCHARP
) {
1270 if (pend
- pstart
> send
- *s
) return 0; /* or goto next_mem; */
1274 if (ignore_case
!= 0) {
1275 if (string_cmp_ic(reg
->enc
, case_fold_flag
,
1276 pstart
, &ss
, pend
- pstart
, send
) == 0)
1277 return 0; /* or goto next_mem; */
1281 if (*p
++ != *ss
++) return 0; /* or goto next_mem; */
1290 else if (k
->type
== STK_MEM_END
) {
1291 if (mem_is_in_memp(k
->u
.mem
.num
, mem_num
, memp
)) {
1292 pend
= k
->u
.mem
.pstr
;
1301 #endif /* USE_BACKREF_WITH_LEVEL */
1304 #ifdef ONIG_DEBUG_STATISTICS
1307 # include <windows.h>
1308 static LARGE_INTEGER ts
, te
, freq
;
1309 # define GETTIME(t) QueryPerformanceCounter(&(t))
1310 # define TIMEDIFF(te,ts) (unsigned long )(((te).QuadPart - (ts).QuadPart) \
1311 * 1000000 / freq.QuadPart)
1314 # define USE_TIMEOFDAY
1316 # ifdef USE_TIMEOFDAY
1317 # ifdef HAVE_SYS_TIME_H
1318 # include <sys/time.h>
1320 # ifdef HAVE_UNISTD_H
1321 # include <unistd.h>
1323 static struct timeval ts
, te
;
1324 # define GETTIME(t) gettimeofday(&(t), (struct timezone* )0)
1325 # define TIMEDIFF(te,ts) (((te).tv_usec - (ts).tv_usec) + \
1326 (((te).tv_sec - (ts).tv_sec)*1000000))
1327 # else /* USE_TIMEOFDAY */
1328 # ifdef HAVE_SYS_TIMES_H
1329 # include <sys/times.h>
1331 static struct tms ts
, te
;
1332 # define GETTIME(t) times(&(t))
1333 # define TIMEDIFF(te,ts) ((te).tms_utime - (ts).tms_utime)
1334 # endif /* USE_TIMEOFDAY */
1336 # endif /* _WIN32 */
1338 static int OpCounter
[256];
1339 static int OpPrevCounter
[256];
1340 static unsigned long OpTime
[256];
1341 static int OpCurr
= OP_FINISH
;
1342 static int OpPrevTarget
= OP_FAIL
;
1343 static int MaxStackDepth
= 0;
1345 # define MOP_IN(opcode) do {\
1346 if (opcode == OpPrevTarget) OpPrevCounter[OpCurr]++;\
1348 OpCounter[opcode]++;\
1352 # define MOP_OUT do {\
1354 OpTime[OpCurr] += TIMEDIFF(te, ts);\
1358 onig_statistics_init(void)
1361 for (i
= 0; i
< 256; i
++) {
1362 OpCounter
[i
] = OpPrevCounter
[i
] = 0; OpTime
[i
] = 0;
1366 QueryPerformanceFrequency(&freq
);
1371 onig_print_statistics(FILE* f
)
1374 fprintf(f
, " count prev time\n");
1375 for (i
= 0; OnigOpInfo
[i
].opcode
>= 0; i
++) {
1376 fprintf(f
, "%8d: %8d: %10lu: %s\n",
1377 OpCounter
[i
], OpPrevCounter
[i
], OpTime
[i
], OnigOpInfo
[i
].name
);
1379 fprintf(f
, "\nmax stack depth: %d\n", MaxStackDepth
);
1382 # define STACK_INC do {\
1384 if (stk - stk_base > MaxStackDepth) \
1385 MaxStackDepth = stk - stk_base;\
1388 #else /* ONIG_DEBUG_STATISTICS */
1389 # define STACK_INC stk++
1391 # define MOP_IN(opcode)
1393 #endif /* ONIG_DEBUG_STATISTICS */
1396 #ifdef ONIG_DEBUG_MATCH
1398 stack_type_str(int stack_type
)
1400 switch (stack_type
) {
1401 case STK_ALT
: return "Alt ";
1402 case STK_LOOK_BEHIND_NOT
: return "LBNot ";
1403 case STK_POS_NOT
: return "PosNot";
1404 case STK_MEM_START
: return "MemS ";
1405 case STK_MEM_END
: return "MemE ";
1406 case STK_REPEAT_INC
: return "RepInc";
1407 case STK_STATE_CHECK_MARK
: return "StChMk";
1408 case STK_NULL_CHECK_START
: return "NulChS";
1409 case STK_NULL_CHECK_END
: return "NulChE";
1410 case STK_MEM_END_MARK
: return "MemEMk";
1411 case STK_POS
: return "Pos ";
1412 case STK_STOP_BT
: return "StopBt";
1413 case STK_REPEAT
: return "Rep ";
1414 case STK_CALL_FRAME
: return "Call ";
1415 case STK_RETURN
: return "Ret ";
1416 case STK_VOID
: return "Void ";
1417 case STK_ABSENT_POS
: return "AbsPos";
1418 case STK_ABSENT
: return "Absent";
1419 default: return " ";
1424 /* match data(str - end) from position (sstart). */
1425 /* if sstart == str then set sprev to NULL. */
1427 match_at(regex_t
* reg
, const UChar
* str
, const UChar
* end
,
1428 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
1429 const UChar
* right_range
,
1431 const UChar
* sstart
, UChar
* sprev
, OnigMatchArg
* msa
)
1433 static const UChar FinishCode
[] = { OP_FINISH
};
1435 int i
, num_mem
, pop_level
;
1436 ptrdiff_t n
, best_len
;
1437 LengthType tlen
, tlen2
;
1440 OnigOptionType option
= reg
->options
;
1441 OnigEncoding encode
= reg
->enc
;
1442 OnigCaseFoldType case_fold_flag
= reg
->case_fold_flag
;
1443 UChar
*s
, *q
, *sbegin
;
1447 char *xmalloc_base
= NULL
;
1448 OnigStackType
*stk_alloc
, *stk_base
, *stk
, *stk_end
;
1449 OnigStackType
*stkp
; /* used as any purpose. */
1451 OnigStackIndex
*repeat_stk
;
1452 OnigStackIndex
*mem_start_stk
, *mem_end_stk
;
1453 #ifdef USE_COMBINATION_EXPLOSION_CHECK
1455 unsigned char* state_check_buff
= msa
->state_check_buff
;
1456 int num_comb_exp_check
= reg
->num_comb_exp_check
;
1459 #if USE_TOKEN_THREADED_VM
1460 # define OP_OFFSET 1
1461 # define VM_LOOP JUMP;
1462 # define VM_LOOP_END
1463 # define CASE(x) L_##x: sbegin = s; OPCODE_EXEC_HOOK;
1464 # define DEFAULT L_DEFAULT:
1465 # define NEXT sprev = sbegin; JUMP
1466 # define JUMP RB_GNUC_EXTENSION_BLOCK(goto *oplabels[*p++])
1468 RB_GNUC_EXTENSION
static const void *oplabels
[] = {
1469 &&L_OP_FINISH
, /* matching process terminator (no more alternative) */
1470 &&L_OP_END
, /* pattern code terminator (success end) */
1472 &&L_OP_EXACT1
, /* single byte, N = 1 */
1473 &&L_OP_EXACT2
, /* single byte, N = 2 */
1474 &&L_OP_EXACT3
, /* single byte, N = 3 */
1475 &&L_OP_EXACT4
, /* single byte, N = 4 */
1476 &&L_OP_EXACT5
, /* single byte, N = 5 */
1477 &&L_OP_EXACTN
, /* single byte */
1478 &&L_OP_EXACTMB2N1
, /* mb-length = 2 N = 1 */
1479 &&L_OP_EXACTMB2N2
, /* mb-length = 2 N = 2 */
1480 &&L_OP_EXACTMB2N3
, /* mb-length = 2 N = 3 */
1481 &&L_OP_EXACTMB2N
, /* mb-length = 2 */
1482 &&L_OP_EXACTMB3N
, /* mb-length = 3 */
1483 &&L_OP_EXACTMBN
, /* other length */
1485 &&L_OP_EXACT1_IC
, /* single byte, N = 1, ignore case */
1486 &&L_OP_EXACTN_IC
, /* single byte, ignore case */
1492 &&L_OP_CCLASS_MB_NOT
,
1493 &&L_OP_CCLASS_MIX_NOT
,
1495 &&L_OP_ANYCHAR
, /* "." */
1496 &&L_OP_ANYCHAR_ML
, /* "." multi-line */
1497 &&L_OP_ANYCHAR_STAR
, /* ".*" */
1498 &&L_OP_ANYCHAR_ML_STAR
, /* ".*" multi-line */
1499 &&L_OP_ANYCHAR_STAR_PEEK_NEXT
,
1500 &&L_OP_ANYCHAR_ML_STAR_PEEK_NEXT
,
1505 &&L_OP_NOT_WORD_BOUND
,
1506 # ifdef USE_WORD_BEGIN_END
1514 &&L_OP_NOT_ASCII_WORD
,
1515 &&L_OP_ASCII_WORD_BOUND
,
1516 &&L_OP_NOT_ASCII_WORD_BOUND
,
1517 # ifdef USE_WORD_BEGIN_END
1518 &&L_OP_ASCII_WORD_BEGIN
,
1519 &&L_OP_ASCII_WORD_END
,
1529 &&L_OP_SEMI_END_BUF
,
1530 &&L_OP_BEGIN_POSITION
,
1536 &&L_OP_BACKREF_MULTI
,
1537 &&L_OP_BACKREF_MULTI_IC
,
1538 # ifdef USE_BACKREF_WITH_LEVEL
1539 &&L_OP_BACKREF_WITH_LEVEL
, /* \k<xxx+n>, \k<xxx-n> */
1543 &&L_OP_MEMORY_START
,
1544 &&L_OP_MEMORY_START_PUSH
, /* push back-tracker to stack */
1545 &&L_OP_MEMORY_END_PUSH
, /* push back-tracker to stack */
1546 # ifdef USE_SUBEXP_CALL
1547 &&L_OP_MEMORY_END_PUSH_REC
, /* push back-tracker to stack */
1552 # ifdef USE_SUBEXP_CALL
1553 &&L_OP_MEMORY_END_REC
, /* push marker to stack */
1560 &&L_OP_FAIL
, /* pop stack and move */
1564 # ifdef USE_OP_PUSH_OR_JUMP_EXACT
1565 &&L_OP_PUSH_OR_JUMP_EXACT1
, /* if match exact then push, else jump. */
1569 &&L_OP_PUSH_IF_PEEK_NEXT
, /* if match exact then push, else none. */
1570 &&L_OP_REPEAT
, /* {n,m} */
1571 &&L_OP_REPEAT_NG
, /* {n,m}? (non greedy) */
1573 &&L_OP_REPEAT_INC_NG
, /* non greedy */
1574 &&L_OP_REPEAT_INC_SG
, /* search and get in stack */
1575 &&L_OP_REPEAT_INC_NG_SG
, /* search and get in stack (non greedy) */
1576 &&L_OP_NULL_CHECK_START
, /* null loop checker start */
1577 &&L_OP_NULL_CHECK_END
, /* null loop checker end */
1578 # ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
1579 &&L_OP_NULL_CHECK_END_MEMST
, /* null loop checker end (with capture status) */
1583 # ifdef USE_SUBEXP_CALL
1584 &&L_OP_NULL_CHECK_END_MEMST_PUSH
, /* with capture status and push check-end */
1589 &&L_OP_PUSH_POS
, /* (?=...) start */
1590 &&L_OP_POP_POS
, /* (?=...) end */
1591 &&L_OP_PUSH_POS_NOT
, /* (?!...) start */
1592 &&L_OP_FAIL_POS
, /* (?!...) end */
1593 &&L_OP_PUSH_STOP_BT
, /* (?>...) start */
1594 &&L_OP_POP_STOP_BT
, /* (?>...) end */
1595 &&L_OP_LOOK_BEHIND
, /* (?<=...) start (no needs end opcode) */
1596 &&L_OP_PUSH_LOOK_BEHIND_NOT
, /* (?<!...) start */
1597 &&L_OP_FAIL_LOOK_BEHIND_NOT
, /* (?<!...) end */
1598 &&L_OP_PUSH_ABSENT_POS
, /* (?~...) start */
1599 &&L_OP_ABSENT
, /* (?~...) start of inner loop */
1600 &&L_OP_ABSENT_END
, /* (?~...) end */
1602 # ifdef USE_SUBEXP_CALL
1603 &&L_OP_CALL
, /* \g<name> */
1611 # ifdef USE_COMBINATION_EXPLOSION_CHECK
1612 &&L_OP_STATE_CHECK_PUSH
, /* combination explosion check and push */
1613 &&L_OP_STATE_CHECK_PUSH_OR_JUMP
, /* check ok -> push, else jump */
1614 &&L_OP_STATE_CHECK
, /* check only */
1620 # ifdef USE_COMBINATION_EXPLOSION_CHECK
1621 &&L_OP_STATE_CHECK_ANYCHAR_STAR
,
1622 &&L_OP_STATE_CHECK_ANYCHAR_ML_STAR
,
1627 /* no need: IS_DYNAMIC_OPTION() == 0 */
1628 # if 0 /* no need: IS_DYNAMIC_OPTION() == 0 */
1629 &&L_OP_SET_OPTION_PUSH
, /* set option and push recover option */
1630 &&L_OP_SET_OPTION
/* set option */
1636 #else /* USE_TOKEN_THREADED_VM */
1638 # define OP_OFFSET 0
1644 # define VM_LOOP_END } sprev = sbegin; }
1645 # define CASE(x) case x:
1646 # define DEFAULT default:
1648 # define JUMP continue; break
1649 #endif /* USE_TOKEN_THREADED_VM */
1652 #ifdef USE_SUBEXP_CALL
1653 /* Stack #0 is used to store the pattern itself and used for (?R), \g<0>,
1654 etc. Additional space is required. */
1655 # define ADD_NUMMEM 1
1657 /* Stack #0 not is used. */
1658 # define ADD_NUMMEM 0
1661 n
= reg
->num_repeat
+ (reg
->num_mem
+ ADD_NUMMEM
) * 2;
1663 STACK_INIT(alloca_base
, xmalloc_base
, n
, INIT_MATCH_STACK_SIZE
);
1664 pop_level
= reg
->stack_pop_level
;
1665 num_mem
= reg
->num_mem
;
1666 repeat_stk
= (OnigStackIndex
* )alloca_base
;
1668 mem_start_stk
= (OnigStackIndex
* )(repeat_stk
+ reg
->num_repeat
);
1669 mem_end_stk
= mem_start_stk
+ (num_mem
+ ADD_NUMMEM
);
1671 OnigStackIndex
*pp
= mem_start_stk
;
1672 for (; pp
< repeat_stk
+ n
; pp
+= 2) {
1673 pp
[0] = INVALID_STACK_INDEX
;
1674 pp
[1] = INVALID_STACK_INDEX
;
1677 #ifndef USE_SUBEXP_CALL
1678 mem_start_stk
--; /* for index start from 1,
1679 mem_start_stk[1]..mem_start_stk[num_mem] */
1680 mem_end_stk
--; /* for index start from 1,
1681 mem_end_stk[1]..mem_end_stk[num_mem] */
1684 #ifdef ONIG_DEBUG_MATCH
1685 fprintf(stderr
, "match_at: str: %"PRIuPTR
" (%p), end: %"PRIuPTR
" (%p), start: %"PRIuPTR
" (%p), sprev: %"PRIuPTR
" (%p)\n",
1686 (uintptr_t )str
, str
, (uintptr_t )end
, end
, (uintptr_t )sstart
, sstart
, (uintptr_t )sprev
, sprev
);
1687 fprintf(stderr
, "size: %d, start offset: %d\n",
1688 (int )(end
- str
), (int )(sstart
- str
));
1689 fprintf(stderr
, "\n ofs> str stk:type addr:opcode\n");
1692 STACK_PUSH_ENSURED(STK_ALT
, (UChar
* )FinishCode
); /* bottom stack */
1693 best_len
= ONIG_MISMATCH
;
1694 s
= (UChar
* )sstart
;
1695 pkeep
= (UChar
* )sstart
;
1698 #ifdef ONIG_DEBUG_MATCH
1699 # define OPCODE_EXEC_HOOK \
1701 UChar *op, *q, *bp, buf[50]; \
1703 op = p - OP_OFFSET; \
1704 fprintf(stderr, "%4"PRIdPTR"> \"", (*op == OP_FINISH) ? (ptrdiff_t )-1 : s - str); \
1707 if (*op != OP_FINISH) { /* s may not be a valid pointer if OP_FINISH. */ \
1708 for (i = 0; i < 7 && q < end; i++) { \
1709 len = enclen(encode, q, end); \
1710 while (len-- > 0) *bp++ = *q++; \
1712 if (q < end) { xmemcpy(bp, "...", 3); bp += 3; } \
1714 xmemcpy(bp, "\"", 1); bp += 1; \
1716 fputs((char* )buf, stderr); \
1717 for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr); \
1718 fprintf(stderr, "%4"PRIdPTR":%s %4"PRIdPTR":", \
1719 stk - stk_base - 1, \
1720 (stk > stk_base) ? stack_type_str(stk[-1].type) : " ", \
1721 (op == FinishCode) ? (ptrdiff_t )-1 : op - reg->p); \
1722 onig_print_compiled_byte_code(stderr, op, reg->p+reg->used, NULL, encode); \
1723 fprintf(stderr, "\n"); \
1726 # define OPCODE_EXEC_HOOK ((void) 0)
1731 CASE(OP_END) MOP_IN(OP_END);
1735 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
1736 if (IS_FIND_LONGEST(option)) {
1737 if (n > msa->best_len) {
1739 msa->best_s = (UChar* )sstart;
1746 region = msa->region;
1748 region->beg[0] = ((pkeep > s) ? s : pkeep) - str;
1749 region->end[0] = s - str;
1750 for (i = 1; i <= num_mem; i++) {
1751 if (mem_end_stk[i] != INVALID_STACK_INDEX) {
1752 if (BIT_STATUS_AT(reg->bt_mem_start, i))
1753 region->beg[i] = STACK_AT(mem_start_stk[i])->u.mem.pstr - str;
1755 region->beg[i] = (UChar* )((void* )mem_start_stk[i]) - str;
1757 region->end[i] = (BIT_STATUS_AT(reg->bt_mem_end, i)
1758 ? STACK_AT(mem_end_stk[i])->u.mem.pstr
1759 : (UChar* )((void* )mem_end_stk[i])) - str;
1762 region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
1766 #ifdef USE_CAPTURE_HISTORY
1767 if (reg->capture_history != 0) {
1769 OnigCaptureTreeNode* node;
1771 if (IS_NULL(region->history_root)) {
1772 region->history_root = node = history_node_new();
1773 CHECK_NULL_RETURN_MEMERR(node);
1776 node = region->history_root;
1777 history_tree_clear(node);
1781 node->beg = ((pkeep > s) ? s : pkeep) - str;
1782 node->end = s - str;
1785 r = make_capture_history_tree(region->history_root, &stkp,
1786 stk, (UChar* )str, reg);
1788 best_len = r; /* error code */
1792 #endif /* USE_CAPTURE_HISTORY */
1794 } /* n > best_len */
1796 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
1801 if (IS_FIND_CONDITION(option)) {
1802 if (IS_FIND_NOT_EMPTY(option) && s == sstart) {
1803 best_len = ONIG_MISMATCH;
1804 goto fail; /* for retry */
1806 if (IS_FIND_LONGEST(option) && DATA_ENSURE_CHECK1) {
1807 goto fail; /* for retry */
1811 /* default behavior: return first-matching result. */
1815 CASE(OP_EXACT1) MOP_IN(OP_EXACT1);
1817 if (*p != *s) goto fail;
1822 CASE(OP_EXACT1_IC) MOP_IN(OP_EXACT1_IC);
1825 UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
1828 len = ONIGENC_MBC_CASE_FOLD(encode,
1829 /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
1844 CASE(OP_EXACT2) MOP_IN(OP_EXACT2);
1846 if (*p != *s) goto fail;
1848 if (*p != *s) goto fail;
1854 CASE(OP_EXACT3) MOP_IN(OP_EXACT3);
1856 if (*p != *s) goto fail;
1858 if (*p != *s) goto fail;
1860 if (*p != *s) goto fail;
1866 CASE(OP_EXACT4) MOP_IN(OP_EXACT4);
1868 if (*p != *s) goto fail;
1870 if (*p != *s) goto fail;
1872 if (*p != *s) goto fail;
1874 if (*p != *s) goto fail;
1880 CASE(OP_EXACT5) MOP_IN(OP_EXACT5);
1882 if (*p != *s) goto fail;
1884 if (*p != *s) goto fail;
1886 if (*p != *s) goto fail;
1888 if (*p != *s) goto fail;
1890 if (*p != *s) goto fail;
1896 CASE(OP_EXACTN) MOP_IN(OP_EXACTN);
1897 GET_LENGTH_INC(tlen, p);
1899 while (tlen-- > 0) {
1900 if (*p++ != *s++) goto fail;
1906 CASE(OP_EXACTN_IC) MOP_IN(OP_EXACTN_IC);
1909 UChar *q, *endp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
1911 GET_LENGTH_INC(tlen, p);
1917 len = ONIGENC_MBC_CASE_FOLD(encode,
1918 /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
1924 if (*p != *q) goto fail;
1933 CASE(OP_EXACTMB2N1) MOP_IN(OP_EXACTMB2N1);
1935 if (*p != *s) goto fail;
1937 if (*p != *s) goto fail;
1942 CASE(OP_EXACTMB2N2) MOP_IN(OP_EXACTMB2N2);
1944 if (*p != *s) goto fail;
1946 if (*p != *s) goto fail;
1949 if (*p != *s) goto fail;
1951 if (*p != *s) goto fail;
1956 CASE(OP_EXACTMB2N3) MOP_IN(OP_EXACTMB2N3);
1958 if (*p != *s) goto fail;
1960 if (*p != *s) goto fail;
1962 if (*p != *s) goto fail;
1964 if (*p != *s) goto fail;
1967 if (*p != *s) goto fail;
1969 if (*p != *s) goto fail;
1974 CASE(OP_EXACTMB2N) MOP_IN(OP_EXACTMB2N);
1975 GET_LENGTH_INC(tlen, p);
1976 DATA_ENSURE(tlen * 2);
1977 while (tlen-- > 0) {
1978 if (*p != *s) goto fail;
1980 if (*p != *s) goto fail;
1987 CASE(OP_EXACTMB3N) MOP_IN(OP_EXACTMB3N);
1988 GET_LENGTH_INC(tlen, p);
1989 DATA_ENSURE(tlen * 3);
1990 while (tlen-- > 0) {
1991 if (*p != *s) goto fail;
1993 if (*p != *s) goto fail;
1995 if (*p != *s) goto fail;
2002 CASE(OP_EXACTMBN) MOP_IN(OP_EXACTMBN);
2003 GET_LENGTH_INC(tlen, p); /* mb-len */
2004 GET_LENGTH_INC(tlen2, p); /* string len */
2007 while (tlen2-- > 0) {
2008 if (*p != *s) goto fail;
2015 CASE(OP_CCLASS) MOP_IN(OP_CCLASS);
2017 if (BITSET_AT(((BitSetRef )p), *s) == 0) goto fail;
2019 s += enclen(encode, s, end); /* OP_CCLASS can match mb-code. \D, \S */
2023 CASE(OP_CCLASS_MB) MOP_IN(OP_CCLASS_MB);
2024 if (! ONIGENC_IS_MBC_HEAD(encode, s, end)) goto fail;
2027 GET_LENGTH_INC(tlen, p);
2034 mb_len = enclen(encode, s, end);
2035 DATA_ENSURE(mb_len);
2038 code = ONIGENC_MBC_TO_CODE(encode, ss, s);
2040 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
2041 if (! onig_is_in_code_range(p, code)) goto fail;
2045 if (! onig_is_in_code_range(q, code)) goto fail;
2052 CASE(OP_CCLASS_MIX) MOP_IN(OP_CCLASS_MIX);
2054 if (ONIGENC_IS_MBC_HEAD(encode, s, end)) {
2059 if (BITSET_AT(((BitSetRef )p), *s) == 0)
2063 GET_LENGTH_INC(tlen, p);
2070 CASE(OP_CCLASS_NOT) MOP_IN(OP_CCLASS_NOT);
2072 if (BITSET_AT(((BitSetRef )p), *s) != 0) goto fail;
2074 s += enclen(encode, s, end);
2078 CASE(OP_CCLASS_MB_NOT) MOP_IN(OP_CCLASS_MB_NOT);
2080 if (! ONIGENC_IS_MBC_HEAD(encode, s, end)) {
2082 GET_LENGTH_INC(tlen, p);
2084 goto cc_mb_not_success;
2088 GET_LENGTH_INC(tlen, p);
2092 int mb_len = enclen(encode, s, end);
2094 if (! DATA_ENSURE_CHECK(mb_len)) {
2098 goto cc_mb_not_success;
2103 code = ONIGENC_MBC_TO_CODE(encode, ss, s);
2105 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
2106 if (onig_is_in_code_range(p, code)) goto fail;
2110 if (onig_is_in_code_range(q, code)) goto fail;
2119 CASE(OP_CCLASS_MIX_NOT) MOP_IN(OP_CCLASS_MIX_NOT);
2121 if (ONIGENC_IS_MBC_HEAD(encode, s, end)) {
2126 if (BITSET_AT(((BitSetRef )p), *s) != 0)
2130 GET_LENGTH_INC(tlen, p);
2137 CASE(OP_ANYCHAR) MOP_IN(OP_ANYCHAR);
2139 n = enclen(encode, s, end);
2141 if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
2146 CASE(OP_ANYCHAR_ML) MOP_IN(OP_ANYCHAR_ML);
2148 n = enclen(encode, s, end);
2154 CASE(OP_ANYCHAR_STAR) MOP_IN(OP_ANYCHAR_STAR);
2155 while (DATA_ENSURE_CHECK1) {
2156 STACK_PUSH_ALT(p, s, sprev, pkeep);
2157 n = enclen(encode, s, end);
2159 if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
2166 CASE(OP_ANYCHAR_ML_STAR) MOP_IN(OP_ANYCHAR_ML_STAR);
2167 while (DATA_ENSURE_CHECK1) {
2168 STACK_PUSH_ALT(p, s, sprev, pkeep);
2169 n = enclen(encode, s, end);
2183 CASE(OP_ANYCHAR_STAR_PEEK_NEXT) MOP_IN(OP_ANYCHAR_STAR_PEEK_NEXT);
2184 while (DATA_ENSURE_CHECK1) {
2186 STACK_PUSH_ALT(p + 1, s, sprev, pkeep);
2188 n = enclen(encode, s, end);
2190 if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
2198 CASE(OP_ANYCHAR_ML_STAR_PEEK_NEXT)MOP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT);
2199 while (DATA_ENSURE_CHECK1) {
2201 STACK_PUSH_ALT(p + 1, s, sprev, pkeep);
2203 n = enclen(encode, s, end);
2218 #ifdef USE_COMBINATION_EXPLOSION_CHECK
2219 CASE(OP_STATE_CHECK_ANYCHAR_STAR) MOP_IN(OP_STATE_CHECK_ANYCHAR_STAR);
2220 GET_STATE_CHECK_NUM_INC(mem, p);
2221 while (DATA_ENSURE_CHECK1) {
2222 STATE_CHECK_VAL(scv, mem);
2225 STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem, pkeep);
2226 n = enclen(encode, s, end);
2228 if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
2235 CASE(OP_STATE_CHECK_ANYCHAR_ML_STAR)
2236 MOP_IN(OP_STATE_CHECK_ANYCHAR_ML_STAR);
2238 GET_STATE_CHECK_NUM_INC(mem, p);
2239 while (DATA_ENSURE_CHECK1) {
2240 STATE_CHECK_VAL(scv, mem);
2243 STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem, pkeep);
2244 n = enclen(encode, s, end);
2257 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
2259 CASE(OP_WORD) MOP_IN(OP_WORD);
2261 if (! ONIGENC_IS_MBC_WORD(encode, s, end))
2264 s += enclen(encode, s, end);
2268 CASE(OP_ASCII_WORD) MOP_IN(OP_ASCII_WORD);
2270 if (! ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
2273 s += enclen(encode, s, end);
2277 CASE(OP_NOT_WORD) MOP_IN(OP_NOT_WORD);
2279 if (ONIGENC_IS_MBC_WORD(encode, s, end))
2282 s += enclen(encode, s, end);
2286 CASE(OP_NOT_ASCII_WORD) MOP_IN(OP_NOT_ASCII_WORD);
2288 if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
2291 s += enclen(encode, s, end);
2295 CASE(OP_WORD_BOUND) MOP_IN(OP_WORD_BOUND);
2296 if (ON_STR_BEGIN(s)) {
2298 if (! ONIGENC_IS_MBC_WORD(encode, s, end))
2301 else if (ON_STR_END(s)) {
2302 if (! ONIGENC_IS_MBC_WORD(encode, sprev, end))
2306 if (ONIGENC_IS_MBC_WORD(encode, s, end)
2307 == ONIGENC_IS_MBC_WORD(encode, sprev, end))
2313 CASE(OP_ASCII_WORD_BOUND) MOP_IN(OP_ASCII_WORD_BOUND);
2314 if (ON_STR_BEGIN(s)) {
2316 if (! ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
2319 else if (ON_STR_END(s)) {
2320 if (! ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
2324 if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)
2325 == ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
2331 CASE(OP_NOT_WORD_BOUND) MOP_IN(OP_NOT_WORD_BOUND);
2332 if (ON_STR_BEGIN(s)) {
2333 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end))
2336 else if (ON_STR_END(s)) {
2337 if (ONIGENC_IS_MBC_WORD(encode, sprev, end))
2341 if (ONIGENC_IS_MBC_WORD(encode, s, end)
2342 != ONIGENC_IS_MBC_WORD(encode, sprev, end))
2348 CASE(OP_NOT_ASCII_WORD_BOUND) MOP_IN(OP_NOT_ASCII_WORD_BOUND);
2349 if (ON_STR_BEGIN(s)) {
2350 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
2353 else if (ON_STR_END(s)) {
2354 if (ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
2358 if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)
2359 != ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
2365 #ifdef USE_WORD_BEGIN_END
2366 CASE(OP_WORD_BEGIN) MOP_IN(OP_WORD_BEGIN);
2367 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end)) {
2368 if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
2376 CASE(OP_ASCII_WORD_BEGIN) MOP_IN(OP_ASCII_WORD_BEGIN);
2377 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) {
2378 if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) {
2386 CASE(OP_WORD_END) MOP_IN(OP_WORD_END);
2387 if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
2388 if (ON_STR_END(s) || !ONIGENC_IS_MBC_WORD(encode, s, end)) {
2396 CASE(OP_ASCII_WORD_END) MOP_IN(OP_ASCII_WORD_END);
2397 if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) {
2398 if (ON_STR_END(s) || !ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) {
2407 CASE(OP_BEGIN_BUF) MOP_IN(OP_BEGIN_BUF);
2408 if (! ON_STR_BEGIN(s)) goto fail;
2409 if (IS_NOTBOS(msa->options)) goto fail;
2414 CASE(OP_END_BUF) MOP_IN(OP_END_BUF);
2415 if (! ON_STR_END(s)) goto fail;
2416 if (IS_NOTEOS(msa->options)) goto fail;
2421 CASE(OP_BEGIN_LINE) MOP_IN(OP_BEGIN_LINE);
2422 if (ON_STR_BEGIN(s)) {
2423 if (IS_NOTBOL(msa->options)) goto fail;
2427 else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)
2428 #ifdef USE_CRNL_AS_LINE_TERMINATOR
2429 && !(IS_NEWLINE_CRLF(option)
2430 && ONIGENC_IS_MBC_CRNL(encode, sprev, end))
2432 && !ON_STR_END(s)) {
2439 CASE(OP_END_LINE) MOP_IN(OP_END_LINE);
2440 if (ON_STR_END(s)) {
2441 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2442 if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE_EX(encode, sprev, str, end, option, 1)) {
2444 if (IS_NOTEOL(msa->options)) goto fail;
2447 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2451 else if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 1)) {
2458 CASE(OP_SEMI_END_BUF) MOP_IN(OP_SEMI_END_BUF);
2459 if (ON_STR_END(s)) {
2460 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2461 if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE_EX(encode, sprev, str, end, option, 1)) {
2463 if (IS_NOTEOL(msa->options)) goto fail;
2466 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2470 else if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 1)) {
2471 UChar* ss = s + enclen(encode, s, end);
2472 if (ON_STR_END(ss)) {
2476 #ifdef USE_CRNL_AS_LINE_TERMINATOR
2477 else if (IS_NEWLINE_CRLF(option)
2478 && ONIGENC_IS_MBC_CRNL(encode, s, end)) {
2479 ss += enclen(encode, ss, end);
2480 if (ON_STR_END(ss)) {
2490 CASE(OP_BEGIN_POSITION) MOP_IN(OP_BEGIN_POSITION);
2497 CASE(OP_MEMORY_START_PUSH) MOP_IN(OP_MEMORY_START_PUSH);
2498 GET_MEMNUM_INC(mem, p);
2499 STACK_PUSH_MEM_START(mem, s);
2503 CASE(OP_MEMORY_START) MOP_IN(OP_MEMORY_START);
2504 GET_MEMNUM_INC(mem, p);
2505 mem_start_stk[mem] = (OnigStackIndex )((void* )s);
2506 mem_end_stk[mem] = INVALID_STACK_INDEX;
2510 CASE(OP_MEMORY_END_PUSH) MOP_IN(OP_MEMORY_END_PUSH);
2511 GET_MEMNUM_INC(mem, p);
2512 STACK_PUSH_MEM_END(mem, s);
2516 CASE(OP_MEMORY_END) MOP_IN(OP_MEMORY_END);
2517 GET_MEMNUM_INC(mem, p);
2518 mem_end_stk[mem] = (OnigStackIndex )((void* )s);
2522 CASE(OP_KEEP) MOP_IN(OP_KEEP);
2527 #ifdef USE_SUBEXP_CALL
2528 CASE(OP_MEMORY_END_PUSH_REC) MOP_IN(OP_MEMORY_END_PUSH_REC);
2529 GET_MEMNUM_INC(mem, p);
2530 STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */
2531 STACK_PUSH_MEM_END(mem, s);
2532 mem_start_stk[mem] = GET_STACK_INDEX(stkp);
2536 CASE(OP_MEMORY_END_REC) MOP_IN(OP_MEMORY_END_REC);
2537 GET_MEMNUM_INC(mem, p);
2538 mem_end_stk[mem] = (OnigStackIndex )((void* )s);
2539 STACK_GET_MEM_START(mem, stkp);
2541 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2542 mem_start_stk[mem] = GET_STACK_INDEX(stkp);
2544 mem_start_stk[mem] = (OnigStackIndex )((void* )stkp->u.mem.pstr);
2546 STACK_PUSH_MEM_END_MARK(mem);
2551 CASE(OP_BACKREF1) MOP_IN(OP_BACKREF1);
2556 CASE(OP_BACKREF2) MOP_IN(OP_BACKREF2);
2561 CASE(OP_BACKREFN) MOP_IN(OP_BACKREFN);
2562 GET_MEMNUM_INC(mem, p);
2566 UChar *pstart, *pend;
2568 /* if you want to remove following line,
2569 you should check in parse and compile time. */
2570 if (mem > num_mem) goto fail;
2571 if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
2572 if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
2574 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2575 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
2577 pstart = (UChar* )((void* )mem_start_stk[mem]);
2579 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
2580 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
2581 : (UChar* )((void* )mem_end_stk[mem]));
2585 STRING_CMP(pstart, s, n);
2586 while (sprev + (len = enclen(encode, sprev, end)) < s)
2593 CASE(OP_BACKREFN_IC) MOP_IN(OP_BACKREFN_IC);
2594 GET_MEMNUM_INC(mem, p);
2597 UChar *pstart, *pend;
2599 /* if you want to remove following line,
2600 you should check in parse and compile time. */
2601 if (mem > num_mem) goto fail;
2602 if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
2603 if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
2605 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2606 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
2608 pstart = (UChar* )((void* )mem_start_stk[mem]);
2610 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
2611 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
2612 : (UChar* )((void* )mem_end_stk[mem]));
2616 STRING_CMP_IC(case_fold_flag, pstart, &s, (int)n, end);
2617 while (sprev + (len = enclen(encode, sprev, end)) < s)
2625 CASE(OP_BACKREF_MULTI) MOP_IN(OP_BACKREF_MULTI);
2628 UChar *pstart, *pend, *swork;
2630 GET_LENGTH_INC(tlen, p);
2631 for (i = 0; i < tlen; i++) {
2632 GET_MEMNUM_INC(mem, p);
2634 if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
2635 if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
2637 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2638 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
2640 pstart = (UChar* )((void* )mem_start_stk[mem]);
2642 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
2643 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
2644 : (UChar* )((void* )mem_end_stk[mem]));
2649 STRING_CMP_VALUE(pstart, swork, n, is_fail);
2650 if (is_fail) continue;
2652 while (sprev + (len = enclen(encode, sprev, end)) < s)
2655 p += (SIZE_MEMNUM * (tlen - i - 1));
2656 break; /* success */
2658 if (i == tlen) goto fail;
2664 CASE(OP_BACKREF_MULTI_IC) MOP_IN(OP_BACKREF_MULTI_IC);
2667 UChar *pstart, *pend, *swork;
2669 GET_LENGTH_INC(tlen, p);
2670 for (i = 0; i < tlen; i++) {
2671 GET_MEMNUM_INC(mem, p);
2673 if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
2674 if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
2676 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2677 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
2679 pstart = (UChar* )((void* )mem_start_stk[mem]);
2681 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
2682 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
2683 : (UChar* )((void* )mem_end_stk[mem]));
2688 STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, end, is_fail);
2689 if (is_fail) continue;
2691 while (sprev + (len = enclen(encode, sprev, end)) < s)
2694 p += (SIZE_MEMNUM * (tlen - i - 1));
2695 break; /* success */
2697 if (i == tlen) goto fail;
2702 #ifdef USE_BACKREF_WITH_LEVEL
2703 CASE(OP_BACKREF_WITH_LEVEL)
2709 GET_OPTION_INC(ic, p);
2710 GET_LENGTH_INC(level, p);
2711 GET_LENGTH_INC(tlen, p);
2714 if (backref_match_at_nested_level(reg, stk, stk_base, ic,
2715 case_fold_flag, (int )level, (int )tlen, p, &s, end)) {
2716 while (sprev + (len = enclen(encode, sprev, end)) < s)
2719 p += (SIZE_MEMNUM * tlen);
2730 #if 0 /* no need: IS_DYNAMIC_OPTION() == 0 */
2731 CASE(OP_SET_OPTION_PUSH) MOP_IN(OP_SET_OPTION_PUSH);
2732 GET_OPTION_INC(option, p);
2733 STACK_PUSH_ALT(p, s, sprev, pkeep);
2734 p += SIZE_OP_SET_OPTION + SIZE_OP_FAIL;
2738 CASE(OP_SET_OPTION) MOP_IN(OP_SET_OPTION);
2739 GET_OPTION_INC(option, p);
2744 CASE(OP_NULL_CHECK_START) MOP_IN(OP_NULL_CHECK_START);
2745 GET_MEMNUM_INC(mem, p); /* mem: null check id */
2746 STACK_PUSH_NULL_CHECK_START(mem, s);
2750 CASE(OP_NULL_CHECK_END) MOP_IN(OP_NULL_CHECK_END);
2754 GET_MEMNUM_INC(mem, p); /* mem: null check id */
2755 STACK_NULL_CHECK(isnull, mem, s);
2757 #ifdef ONIG_DEBUG_MATCH
2758 fprintf(stderr, "NULL_CHECK_END: skip id:%d, s:%"PRIuPTR" (%p)\n",
2759 (int )mem, (uintptr_t )s, s);
2762 /* empty loop founded, skip next instruction */
2769 case OP_REPEAT_INC_NG:
2770 case OP_REPEAT_INC_SG:
2771 case OP_REPEAT_INC_NG_SG:
2775 goto unexpected_bytecode_error;
2783 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
2784 CASE(OP_NULL_CHECK_END_MEMST) MOP_IN(OP_NULL_CHECK_END_MEMST);
2788 GET_MEMNUM_INC(mem, p); /* mem: null check id */
2789 STACK_NULL_CHECK_MEMST(isnull, mem, s, reg);
2791 # ifdef ONIG_DEBUG_MATCH
2792 fprintf(stderr, "NULL_CHECK_END_MEMST: skip id:%d, s:%"PRIuPTR" (%p)\n",
2793 (int )mem, (uintptr_t )s, s);
2795 if (isnull == -1) goto fail;
2796 goto null_check_found;
2803 #ifdef USE_SUBEXP_CALL
2804 CASE(OP_NULL_CHECK_END_MEMST_PUSH)
2805 MOP_IN(OP_NULL_CHECK_END_MEMST_PUSH);
2809 GET_MEMNUM_INC(mem, p); /* mem: null check id */
2810 # ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
2811 STACK_NULL_CHECK_MEMST_REC(isnull, mem, s, reg);
2813 STACK_NULL_CHECK_REC(isnull, mem, s);
2816 # ifdef ONIG_DEBUG_MATCH
2817 fprintf(stderr, "NULL_CHECK_END_MEMST_PUSH: skip id:%d, s:%"PRIuPTR" (%p)\n",
2818 (int )mem, (uintptr_t )s, s);
2820 if (isnull == -1) goto fail;
2821 goto null_check_found;
2824 STACK_PUSH_NULL_CHECK_END(mem);
2831 CASE(OP_JUMP) MOP_IN(OP_JUMP);
2832 GET_RELADDR_INC(addr, p);
2835 CHECK_INTERRUPT_IN_MATCH_AT;
2838 CASE(OP_PUSH) MOP_IN(OP_PUSH);
2839 GET_RELADDR_INC(addr, p);
2840 STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
2844 #ifdef USE_COMBINATION_EXPLOSION_CHECK
2845 CASE(OP_STATE_CHECK_PUSH) MOP_IN(OP_STATE_CHECK_PUSH);
2846 GET_STATE_CHECK_NUM_INC(mem, p);
2847 STATE_CHECK_VAL(scv, mem);
2850 GET_RELADDR_INC(addr, p);
2851 STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem, pkeep);
2855 CASE(OP_STATE_CHECK_PUSH_OR_JUMP) MOP_IN(OP_STATE_CHECK_PUSH_OR_JUMP);
2856 GET_STATE_CHECK_NUM_INC(mem, p);
2857 GET_RELADDR_INC(addr, p);
2858 STATE_CHECK_VAL(scv, mem);
2863 STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem, pkeep);
2868 CASE(OP_STATE_CHECK) MOP_IN(OP_STATE_CHECK);
2869 GET_STATE_CHECK_NUM_INC(mem, p);
2870 STATE_CHECK_VAL(scv, mem);
2873 STACK_PUSH_STATE_CHECK(s, mem);
2876 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
2878 CASE(OP_POP) MOP_IN(OP_POP);
2883 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
2884 CASE(OP_PUSH_OR_JUMP_EXACT1) MOP_IN(OP_PUSH_OR_JUMP_EXACT1);
2885 GET_RELADDR_INC(addr, p);
2886 if (*p == *s && DATA_ENSURE_CHECK1) {
2888 STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
2897 CASE(OP_PUSH_IF_PEEK_NEXT) MOP_IN(OP_PUSH_IF_PEEK_NEXT);
2898 GET_RELADDR_INC(addr, p);
2901 STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
2909 CASE(OP_REPEAT) MOP_IN(OP_REPEAT);
2911 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2912 GET_RELADDR_INC(addr, p);
2915 repeat_stk[mem] = GET_STACK_INDEX(stk);
2916 STACK_PUSH_REPEAT(mem, p);
2918 if (reg->repeat_range[mem].lower == 0) {
2919 STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
2925 CASE(OP_REPEAT_NG) MOP_IN(OP_REPEAT_NG);
2927 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2928 GET_RELADDR_INC(addr, p);
2931 repeat_stk[mem] = GET_STACK_INDEX(stk);
2932 STACK_PUSH_REPEAT(mem, p);
2934 if (reg->repeat_range[mem].lower == 0) {
2935 STACK_PUSH_ALT(p, s, sprev, pkeep);
2942 CASE(OP_REPEAT_INC) MOP_IN(OP_REPEAT_INC);
2943 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2944 si = repeat_stk[mem];
2945 stkp = STACK_AT(si);
2948 stkp->u.repeat.count++;
2949 if (stkp->u.repeat.count >= reg->repeat_range[mem].upper) {
2950 /* end of repeat. Nothing to do. */
2952 else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
2953 STACK_PUSH_ALT(p, s, sprev, pkeep);
2954 p = STACK_AT(si)->u.repeat.pcode; /* Don't use stkp after PUSH. */
2957 p = stkp->u.repeat.pcode;
2959 STACK_PUSH_REPEAT_INC(si);
2961 CHECK_INTERRUPT_IN_MATCH_AT;
2964 CASE(OP_REPEAT_INC_SG) MOP_IN(OP_REPEAT_INC_SG);
2965 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2966 STACK_GET_REPEAT(mem, stkp);
2967 si = GET_STACK_INDEX(stkp);
2971 CASE(OP_REPEAT_INC_NG) MOP_IN(OP_REPEAT_INC_NG);
2972 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2973 si = repeat_stk[mem];
2974 stkp = STACK_AT(si);
2977 stkp->u.repeat.count++;
2978 if (stkp->u.repeat.count < reg->repeat_range[mem].upper) {
2979 if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
2980 UChar* pcode = stkp->u.repeat.pcode;
2982 STACK_PUSH_REPEAT_INC(si);
2983 STACK_PUSH_ALT(pcode, s, sprev, pkeep);
2986 p = stkp->u.repeat.pcode;
2987 STACK_PUSH_REPEAT_INC(si);
2990 else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) {
2991 STACK_PUSH_REPEAT_INC(si);
2994 CHECK_INTERRUPT_IN_MATCH_AT;
2997 CASE(OP_REPEAT_INC_NG_SG) MOP_IN(OP_REPEAT_INC_NG_SG);
2998 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2999 STACK_GET_REPEAT(mem, stkp);
3000 si = GET_STACK_INDEX(stkp);
3004 CASE(OP_PUSH_POS) MOP_IN(OP_PUSH_POS);
3005 STACK_PUSH_POS(s, sprev, pkeep);
3009 CASE(OP_POP_POS) MOP_IN(OP_POP_POS);
3011 STACK_POS_END(stkp);
3012 s = stkp->u.state.pstr;
3013 sprev = stkp->u.state.pstr_prev;
3018 CASE(OP_PUSH_POS_NOT) MOP_IN(OP_PUSH_POS_NOT);
3019 GET_RELADDR_INC(addr, p);
3020 STACK_PUSH_POS_NOT(p + addr, s, sprev, pkeep);
3024 CASE(OP_FAIL_POS) MOP_IN(OP_FAIL_POS);
3025 STACK_POP_TIL_POS_NOT;
3029 CASE(OP_PUSH_STOP_BT) MOP_IN(OP_PUSH_STOP_BT);
3034 CASE(OP_POP_STOP_BT) MOP_IN(OP_POP_STOP_BT);
3039 CASE(OP_LOOK_BEHIND) MOP_IN(OP_LOOK_BEHIND);
3040 GET_LENGTH_INC(tlen, p);
3041 s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, end, (int )tlen);
3042 if (IS_NULL(s)) goto fail;
3043 sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s, end);
3047 CASE(OP_PUSH_LOOK_BEHIND_NOT) MOP_IN(OP_PUSH_LOOK_BEHIND_NOT);
3048 GET_RELADDR_INC(addr, p);
3049 GET_LENGTH_INC(tlen, p);
3050 q = (UChar* )ONIGENC_STEP_BACK(encode, str, s, end, (int )tlen);
3052 /* too short case -> success. ex. /(?<!XXX)a/.match("a")
3053 If you want to change to fail, replace following line. */
3058 STACK_PUSH_LOOK_BEHIND_NOT(p + addr, s, sprev, pkeep);
3060 sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s, end);
3065 CASE(OP_FAIL_LOOK_BEHIND_NOT) MOP_IN(OP_FAIL_LOOK_BEHIND_NOT);
3066 STACK_POP_TIL_LOOK_BEHIND_NOT;
3070 CASE(OP_PUSH_ABSENT_POS) MOP_IN(OP_PUSH_ABSENT_POS);
3071 /* Save the absent-start-pos and the original end-pos. */
3072 STACK_PUSH_ABSENT_POS(s, ABSENT_END_POS);
3076 CASE(OP_ABSENT) MOP_IN(OP_ABSENT);
3078 const UChar* aend = ABSENT_END_POS;
3080 UChar* selfp = p - 1;
3082 STACK_POP_ABSENT_POS(absent, ABSENT_END_POS); /* Restore end-pos. */
3083 GET_RELADDR_INC(addr, p);
3084 #ifdef ONIG_DEBUG_MATCH
3085 fprintf(stderr, "ABSENT: s:%p, end:%p, absent:%p, aend:%p\n", s, end, absent, aend);
3087 if ((absent > aend) && (s > absent)) {
3088 /* An empty match occurred in (?~...) at the start point.
3093 else if ((s >= aend) && (s > absent)) {
3095 /* Only one (or less) character matched in the last iteration.
3096 * This is not a possible point. */
3099 /* All possible points were found. Try matching after (?~...). */
3104 STACK_PUSH_ALT(p + addr, s, sprev, pkeep); /* Push possible point. */
3105 n = enclen(encode, s, end);
3106 STACK_PUSH_ABSENT_POS(absent, ABSENT_END_POS); /* Save the original pos. */
3107 STACK_PUSH_ALT(selfp, s + n, s, pkeep); /* Next iteration. */
3109 ABSENT_END_POS = aend;
3115 CASE(OP_ABSENT_END) MOP_IN(OP_ABSENT_END);
3116 /* The pattern inside (?~...) was matched.
3117 * Set the end-pos temporary and go to next iteration. */
3118 if (sprev < ABSENT_END_POS)
3119 ABSENT_END_POS = sprev;
3120 #ifdef ONIG_DEBUG_MATCH
3121 fprintf(stderr, "ABSENT_END: end:%p\n", ABSENT_END_POS);
3123 STACK_POP_TIL_ABSENT;
3127 #ifdef USE_SUBEXP_CALL
3128 CASE(OP_CALL) MOP_IN(OP_CALL);
3129 GET_ABSADDR_INC(addr, p);
3130 STACK_PUSH_CALL_FRAME(p);
3135 CASE(OP_RETURN) MOP_IN(OP_RETURN);
3142 CASE(OP_CONDITION) MOP_IN(OP_CONDITION);
3143 GET_MEMNUM_INC(mem, p);
3144 GET_RELADDR_INC(addr, p);
3145 if ((mem > num_mem) ||
3146 (mem_end_stk[mem] == INVALID_STACK_INDEX) ||
3147 (mem_start_stk[mem] == INVALID_STACK_INDEX)) {
3165 p = stk->u.state.pcode;
3166 s = stk->u.state.pstr;
3167 sprev = stk->u.state.pstr_prev;
3168 pkeep = stk->u.state.pkeep;
3170 #ifdef USE_COMBINATION_EXPLOSION_CHECK
3171 if (stk->u.state.state_check != 0) {
3172 stk->type = STK_STATE_CHECK_MARK;
3181 goto bytecode_error;
3186 if (xmalloc_base) xfree(xmalloc_base);
3192 if (xmalloc_base) xfree(xmalloc_base);
3193 return ONIGERR_STACK_BUG;
3198 if (xmalloc_base) xfree(xmalloc_base);
3199 return ONIGERR_UNDEFINED_BYTECODE;
3201 unexpected_bytecode_error:
3203 if (xmalloc_base) xfree(xmalloc_base);
3204 return ONIGERR_UNEXPECTED_BYTECODE;
3209 slow_search(OnigEncoding enc, UChar* target, UChar* target_end,
3210 const UChar* text, const UChar* text_end, UChar* text_range)
3212 UChar *t, *p, *s, *end;
3214 end = (UChar* )text_end;
3215 end -= target_end - target - 1;
3216 if (end > text_range)
3221 if (enc->max_enc_len == enc->min_enc_len) {
3222 int n = enc->max_enc_len;
3225 if (*s == *target) {
3228 if (target_end == t || memcmp(t, p, target_end - t) == 0)
3233 return (UChar* )NULL;
3236 if (*s == *target) {
3239 if (target_end == t || memcmp(t, p, target_end - t) == 0)
3242 s += enclen(enc, s, text_end);
3245 return (UChar* )NULL;
3249 str_lower_case_match(OnigEncoding enc, int case_fold_flag,
3250 const UChar* t, const UChar* tend,
3251 const UChar* p, const UChar* end)
3254 UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
3257 lowlen = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &p, end, lowbuf);
3259 while (lowlen > 0) {
3260 if (*t++ != *q++) return 0;
3269 slow_search_ic(OnigEncoding enc, int case_fold_flag,
3270 UChar* target, UChar* target_end,
3271 const UChar* text, const UChar* text_end, UChar* text_range)
3275 end = (UChar* )text_end;
3276 end -= target_end - target - 1;
3277 if (end > text_range)
3283 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
3287 s += enclen(enc, s, text_end);
3290 return (UChar* )NULL;
3294 slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end,
3295 const UChar* text, const UChar* adjust_text,
3296 const UChar* text_end, const UChar* text_start)
3300 s = (UChar* )text_end;
3301 s -= (target_end - target);
3303 s = (UChar* )text_start;
3305 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s, text_end);
3308 if (*s == *target) {
3311 while (t < target_end) {
3316 if (t == target_end)
3319 s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s, text_end);
3322 return (UChar* )NULL;
3326 slow_search_backward_ic(OnigEncoding enc, int case_fold_flag,
3327 UChar* target, UChar* target_end,
3328 const UChar* text, const UChar* adjust_text,
3329 const UChar* text_end, const UChar* text_start)
3333 s = (UChar* )text_end;
3334 s -= (target_end - target);
3336 s = (UChar* )text_start;
3338 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s, text_end);
3341 if (str_lower_case_match(enc, case_fold_flag,
3342 target, target_end, s, text_end))
3345 s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s, text_end);
3348 return (UChar* )NULL;
3351 #ifndef USE_SUNDAY_QUICK_SEARCH
3352 /* Boyer-Moore-Horspool search applied to a multibyte string */
3354 bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
3355 const UChar* text, const UChar* text_end,
3356 const UChar* text_range)
3358 const UChar *s, *se, *t, *p, *end;
3360 ptrdiff_t skip, tlen1;
3362 # ifdef ONIG_DEBUG_SEARCH
3363 fprintf(stderr, "bm_search_notrev: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
3364 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
3367 tail = target_end - 1;
3368 tlen1 = tail - target;
3370 if (end + tlen1 > text_end)
3371 end = text_end - tlen1;
3375 if (IS_NULL(reg->int_map)) {
3380 if (t == target) return (UChar* )s;
3383 skip = reg->map[*se];
3386 s += enclen(reg->enc, s, end);
3387 } while ((s - t) < skip && s < end);
3391 # if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
3396 if (t == target) return (UChar* )s;
3399 skip = reg->int_map[*se];
3402 s += enclen(reg->enc, s, end);
3403 } while ((s - t) < skip && s < end);
3408 return (UChar* )NULL;
3411 /* Boyer-Moore-Horspool search */
3413 bm_search(regex_t* reg, const UChar* target, const UChar* target_end,
3414 const UChar* text, const UChar* text_end, const UChar* text_range)
3416 const UChar *s, *t, *p, *end;
3419 # ifdef ONIG_DEBUG_SEARCH
3420 fprintf(stderr, "bm_search: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
3421 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
3424 end = text_range + (target_end - target) - 1;
3428 tail = target_end - 1;
3429 s = text + (target_end - target) - 1;
3430 if (IS_NULL(reg->int_map)) {
3434 # ifdef ONIG_DEBUG_SEARCH
3435 fprintf(stderr, "bm_search_loop: pos: %"PRIdPTR" %s\n",
3436 (intptr_t )(s - text), s);
3439 if (t == target) return (UChar* )p;
3445 else { /* see int_map[] */
3446 # if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
3451 if (t == target) return (UChar* )p;
3454 s += reg->int_map[*s];
3458 return (UChar* )NULL;
3461 /* Boyer-Moore-Horspool search applied to a multibyte string (ignore case) */
3463 bm_search_notrev_ic(regex_t* reg, const UChar* target, const UChar* target_end,
3464 const UChar* text, const UChar* text_end,
3465 const UChar* text_range)
3467 const UChar *s, *se, *t, *end;
3469 ptrdiff_t skip, tlen1;
3470 OnigEncoding enc = reg->enc;
3471 int case_fold_flag = reg->case_fold_flag;
3473 # ifdef ONIG_DEBUG_SEARCH
3474 fprintf(stderr, "bm_search_notrev_ic: text: %d (%p), text_end: %d (%p), text_range: %d (%p)\n",
3475 (int )text, text, (int )text_end, text_end, (int )text_range, text_range);
3478 tail = target_end - 1;
3479 tlen1 = tail - target;
3481 if (end + tlen1 > text_end)
3482 end = text_end - tlen1;
3486 if (IS_NULL(reg->int_map)) {
3489 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
3492 skip = reg->map[*se];
3495 s += enclen(reg->enc, s, end);
3496 } while ((s - t) < skip && s < end);
3500 # if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
3503 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
3506 skip = reg->int_map[*se];
3509 s += enclen(reg->enc, s, end);
3510 } while ((s - t) < skip && s < end);
3515 return (UChar* )NULL;
3518 /* Boyer-Moore-Horspool search (ignore case) */
3520 bm_search_ic(regex_t* reg, const UChar* target, const UChar* target_end,
3521 const UChar* text, const UChar* text_end, const UChar* text_range)
3523 const UChar *s, *p, *end;
3525 OnigEncoding enc = reg->enc;
3526 int case_fold_flag = reg->case_fold_flag;
3528 # ifdef ONIG_DEBUG_SEARCH
3529 fprintf(stderr, "bm_search_ic: text: %d (%p), text_end: %d (%p), text_range: %d (%p)\n",
3530 (int )text, text, (int )text_end, text_end, (int )text_range, text_range);
3533 end = text_range + (target_end - target) - 1;
3537 tail = target_end - 1;
3538 s = text + (target_end - target) - 1;
3539 if (IS_NULL(reg->int_map)) {
3541 p = s - (target_end - target) + 1;
3542 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
3548 else { /* see int_map[] */
3549 # if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
3551 p = s - (target_end - target) + 1;
3552 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
3555 s += reg->int_map[*s];
3559 return (UChar* )NULL;
3562 #else /* USE_SUNDAY_QUICK_SEARCH */
3564 /* Sunday's quick search applied to a multibyte string */
3566 bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
3567 const UChar* text, const UChar* text_end,
3568 const UChar* text_range)
3570 const UChar *s, *se, *t, *p, *end;
3572 ptrdiff_t skip, tlen1;
3573 OnigEncoding enc = reg->enc;
3575 # ifdef ONIG_DEBUG_SEARCH
3576 fprintf(stderr, "bm_search_notrev: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
3577 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
3580 tail = target_end - 1;
3581 tlen1 = tail - target;
3583 if (end + tlen1 > text_end)
3584 end = text_end - tlen1;
3588 if (IS_NULL(reg->int_map)) {
3593 if (t == target) return (UChar* )s;
3596 if (s + 1 >= end) break;
3597 skip = reg->map[se[1]];
3600 s += enclen(enc, s, end);
3601 } while ((s - t) < skip && s < end);
3605 # if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
3610 if (t == target) return (UChar* )s;
3613 if (s + 1 >= end) break;
3614 skip = reg->int_map[se[1]];
3617 s += enclen(enc, s, end);
3618 } while ((s - t) < skip && s < end);
3623 return (UChar* )NULL;
3626 /* Sunday's quick search */
3628 bm_search(regex_t* reg, const UChar* target, const UChar* target_end,
3629 const UChar* text, const UChar* text_end, const UChar* text_range)
3631 const UChar *s, *t, *p, *end;
3635 # ifdef ONIG_DEBUG_SEARCH
3636 fprintf(stderr, "bm_search: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
3637 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
3640 tail = target_end - 1;
3641 tlen1 = tail - target;
3642 end = text_range + tlen1;
3647 if (IS_NULL(reg->int_map)) {
3652 if (t == target) return (UChar* )p;
3655 if (s + 1 >= end) break;
3656 s += reg->map[s[1]];
3659 else { /* see int_map[] */
3660 # if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
3665 if (t == target) return (UChar* )p;
3668 if (s + 1 >= end) break;
3669 s += reg->int_map[s[1]];
3673 return (UChar* )NULL;
3676 /* Sunday's quick search applied to a multibyte string (ignore case) */
3678 bm_search_notrev_ic(regex_t* reg, const UChar* target, const UChar* target_end,
3679 const UChar* text, const UChar* text_end,
3680 const UChar* text_range)
3682 const UChar *s, *se, *t, *end;
3684 ptrdiff_t skip, tlen1;
3685 OnigEncoding enc = reg->enc;
3686 int case_fold_flag = reg->case_fold_flag;
3688 # ifdef ONIG_DEBUG_SEARCH
3689 fprintf(stderr, "bm_search_notrev_ic: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
3690 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
3693 tail = target_end - 1;
3694 tlen1 = tail - target;
3696 if (end + tlen1 > text_end)
3697 end = text_end - tlen1;
3701 if (IS_NULL(reg->int_map)) {
3704 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
3707 if (s + 1 >= end) break;
3708 skip = reg->map[se[1]];
3711 s += enclen(enc, s, end);
3712 } while ((s - t) < skip && s < end);
3716 # if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
3719 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
3722 if (s + 1 >= end) break;
3723 skip = reg->int_map[se[1]];
3726 s += enclen(enc, s, end);
3727 } while ((s - t) < skip && s < end);
3732 return (UChar* )NULL;
3735 /* Sunday's quick search (ignore case) */
3737 bm_search_ic(regex_t* reg, const UChar* target, const UChar* target_end,
3738 const UChar* text, const UChar* text_end, const UChar* text_range)
3740 const UChar *s, *p, *end;
3743 OnigEncoding enc = reg->enc;
3744 int case_fold_flag = reg->case_fold_flag;
3746 # ifdef ONIG_DEBUG_SEARCH
3747 fprintf(stderr, "bm_search_ic: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
3748 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
3751 tail = target_end - 1;
3752 tlen1 = tail - target;
3753 end = text_range + tlen1;
3758 if (IS_NULL(reg->int_map)) {
3761 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
3764 if (s + 1 >= end) break;
3765 s += reg->map[s[1]];
3768 else { /* see int_map[] */
3769 # if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
3772 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
3775 if (s + 1 >= end) break;
3776 s += reg->int_map[s[1]];
3780 return (UChar* )NULL;
3782 #endif /* USE_SUNDAY_QUICK_SEARCH */
3784 #ifdef USE_INT_MAP_BACKWARD
3786 set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED,
3791 if (IS_NULL(*skip)) {
3792 *skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
3793 if (IS_NULL(*skip)) return ONIGERR_MEMORY;
3796 len = (int )(end - s);
3797 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
3800 for (i = len - 1; i > 0; i--)
3807 bm_search_backward(regex_t* reg, const UChar* target, const UChar* target_end,
3808 const UChar* text, const UChar* adjust_text,
3809 const UChar* text_end, const UChar* text_start)
3811 const UChar *s, *t, *p;
3813 s = text_end - (target_end - target);
3817 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s, text_end);
3822 while (t < target_end && *p == *t) {
3825 if (t == target_end)
3828 s -= reg->int_map_backward[*s];
3829 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s, text_end);
3832 return (UChar* )NULL;
3837 map_search(OnigEncoding enc, UChar map[],
3838 const UChar* text, const UChar* text_range, const UChar* text_end)
3840 const UChar *s = text;
3842 while (s < text_range) {
3843 if (map[*s]) return (UChar* )s;
3845 s += enclen(enc, s, text_end);
3847 return (UChar* )NULL;
3851 map_search_backward(OnigEncoding enc, UChar map[],
3852 const UChar* text, const UChar* adjust_text,
3853 const UChar* text_start, const UChar* text_end)
3855 const UChar *s = text_start;
3858 if (map[*s]) return (UChar* )s;
3860 s = onigenc_get_prev_char_head(enc, adjust_text, s, text_end);
3862 return (UChar* )NULL;
3866 onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, OnigRegion* region,
3867 OnigOptionType option)
3873 MATCH_ARG_INIT(msa, option, region, at, at);
3874 #ifdef USE_COMBINATION_EXPLOSION_CHECK
3876 ptrdiff_t offset = at - str;
3877 STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check);
3882 r = onig_region_resize_clear(region, reg->num_mem + 1);
3888 prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at, end);
3889 r = match_at(reg, str, end,
3890 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
3896 MATCH_ARG_FREE(msa);
3901 forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
3902 UChar* range, UChar** low, UChar** high, UChar** low_prev)
3904 UChar *p, *pprev = (UChar* )NULL;
3906 #ifdef ONIG_DEBUG_SEARCH
3907 fprintf(stderr, "forward_search_range: str: %"PRIuPTR" (%p), end: %"PRIuPTR" (%p), s: %"PRIuPTR" (%p), range: %"PRIuPTR" (%p)\n",
3908 (uintptr_t )str, str, (uintptr_t )end, end, (uintptr_t )s, s, (uintptr_t )range, range);
3912 if (reg->dmin > 0) {
3913 if (ONIGENC_IS_SINGLEBYTE(reg->enc)) {
3917 UChar *q = p + reg->dmin;
3919 if (q >= end) return 0; /* fail */
3920 while (p < q) p += enclen(reg->enc, p, end);
3925 switch (reg->optimize) {
3926 case ONIG_OPTIMIZE_EXACT:
3927 p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range);
3929 case ONIG_OPTIMIZE_EXACT_IC:
3930 p = slow_search_ic(reg->enc, reg->case_fold_flag,
3931 reg->exact, reg->exact_end, p, end, range);
3934 case ONIG_OPTIMIZE_EXACT_BM:
3935 p = bm_search(reg, reg->exact, reg->exact_end, p, end, range);
3938 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:
3939 p = bm_search_notrev(reg, reg->exact, reg->exact_end, p, end, range);
3942 case ONIG_OPTIMIZE_EXACT_BM_IC:
3943 p = bm_search_ic(reg, reg->exact, reg->exact_end, p, end, range);
3946 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC:
3947 p = bm_search_notrev_ic(reg, reg->exact, reg->exact_end, p, end, range);
3950 case ONIG_OPTIMIZE_MAP:
3951 p = map_search(reg->enc, reg->map, p, range, end);
3955 if (p && p < range) {
3956 if (p - reg->dmin < s) {
3959 p += enclen(reg->enc, p, end);
3963 if (reg->sub_anchor) {
3966 switch (reg->sub_anchor) {
3967 case ANCHOR_BEGIN_LINE:
3968 if (!ON_STR_BEGIN(p)) {
3969 prev = onigenc_get_prev_char_head(reg->enc,
3970 (pprev ? pprev : str), p, end);
3971 if (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0))
3976 case ANCHOR_END_LINE:
3977 if (ON_STR_END(p)) {
3978 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3979 prev = (UChar* )onigenc_get_prev_char_head(reg->enc,
3980 (pprev ? pprev : str), p);
3981 if (prev && ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 1))
3985 else if (! ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, p, str, end, reg->options, 1))
3991 if (reg->dmax == 0) {
3995 *low_prev = onigenc_get_prev_char_head(reg->enc, s, p, end);
3997 *low_prev = onigenc_get_prev_char_head(reg->enc,
3998 (pprev ? pprev : str), p, end);
4002 if (reg->dmax != ONIG_INFINITE_DISTANCE) {
4003 if (p < str + reg->dmax) {
4004 *low = (UChar* )str;
4006 *low_prev = onigenc_get_prev_char_head(reg->enc, str, *low, end);
4009 *low = p - reg->dmax;
4011 *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s,
4012 *low, end, (const UChar** )low_prev);
4013 if (low_prev && IS_NULL(*low_prev))
4014 *low_prev = onigenc_get_prev_char_head(reg->enc,
4015 (pprev ? pprev : s), *low, end);
4019 *low_prev = onigenc_get_prev_char_head(reg->enc,
4020 (pprev ? pprev : str), *low, end);
4025 /* no needs to adjust *high, *high is used as range check only */
4026 *high = p - reg->dmin;
4028 #ifdef ONIG_DEBUG_SEARCH
4030 "forward_search_range success: low: %"PRIdPTR", high: %"PRIdPTR", dmin: %"PRIdPTR", dmax: %"PRIdPTR"\n",
4031 *low - str, *high - str, reg->dmin, reg->dmax);
4033 return 1; /* success */
4036 return 0; /* fail */
4039 #define BM_BACKWARD_SEARCH_LENGTH_THRESHOLD 100
4042 backward_search_range(regex_t* reg, const UChar* str, const UChar* end,
4043 UChar* s, const UChar* range, UChar* adjrange,
4044 UChar** low, UChar** high)
4052 switch (reg->optimize) {
4053 case ONIG_OPTIMIZE_EXACT:
4055 p = slow_search_backward(reg->enc, reg->exact, reg->exact_end,
4056 range, adjrange, end, p);
4059 case ONIG_OPTIMIZE_EXACT_IC:
4060 case ONIG_OPTIMIZE_EXACT_BM_IC:
4061 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC:
4062 p = slow_search_backward_ic(reg->enc, reg->case_fold_flag,
4063 reg->exact, reg->exact_end,
4064 range, adjrange, end, p);
4067 case ONIG_OPTIMIZE_EXACT_BM:
4068 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:
4069 #ifdef USE_INT_MAP_BACKWARD
4070 if (IS_NULL(reg->int_map_backward)) {
4072 if (s - range < BM_BACKWARD_SEARCH_LENGTH_THRESHOLD)
4075 r = set_bm_backward_skip(reg->exact, reg->exact_end, reg->enc,
4076 &(reg->int_map_backward));
4079 p = bm_search_backward(reg, reg->exact, reg->exact_end, range, adjrange,
4086 case ONIG_OPTIMIZE_MAP:
4087 p = map_search_backward(reg->enc, reg->map, range, adjrange, p, end);
4092 if (reg->sub_anchor) {
4095 switch (reg->sub_anchor) {
4096 case ANCHOR_BEGIN_LINE:
4097 if (!ON_STR_BEGIN(p)) {
4098 prev = onigenc_get_prev_char_head(reg->enc, str, p, end);
4099 if (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0)) {
4106 case ANCHOR_END_LINE:
4107 if (ON_STR_END(p)) {
4108 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
4109 prev = onigenc_get_prev_char_head(reg->enc, adjrange, p);
4110 if (IS_NULL(prev)) goto fail;
4111 if (ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 1)) {
4117 else if (! ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, p, str, end, reg->options, 1)) {
4118 p = onigenc_get_prev_char_head(reg->enc, adjrange, p, end);
4119 if (IS_NULL(p)) goto fail;
4126 /* no needs to adjust *high, *high is used as range check only */
4127 if (reg->dmax != ONIG_INFINITE_DISTANCE) {
4128 *low = p - reg->dmax;
4129 *high = p - reg->dmin;
4130 *high = onigenc_get_right_adjust_char_head(reg->enc, adjrange, *high, end);
4133 #ifdef ONIG_DEBUG_SEARCH
4134 fprintf(stderr, "backward_search_range: low: %d, high: %d\n",
4135 (int )(*low - str), (int )(*high - str));
4137 return 1; /* success */
4141 #ifdef ONIG_DEBUG_SEARCH
4142 fprintf(stderr, "backward_search_range: fail.\n");
4144 return 0; /* fail */
4149 onig_search(regex_t* reg, const UChar* str, const UChar* end,
4150 const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option)
4152 return onig_search_gpos(reg, str, end, start, start, range, region, option);
4156 onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end,
4157 const UChar* global_pos,
4158 const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option)
4163 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
4164 const UChar *orig_start = start;
4165 const UChar *orig_range = range;
4168 #ifdef ONIG_DEBUG_SEARCH
4170 "onig_search (entry point): str: %"PRIuPTR" (%p), end: %"PRIuPTR", start: %"PRIuPTR", range: %"PRIuPTR"\n",
4171 (uintptr_t )str, str, end - str, start - str, range - str);
4175 r = onig_region_resize_clear(region, reg->num_mem + 1);
4176 if (r) goto finish_no_msa;
4179 if (start > end || start < str) goto mismatch_no_msa;
4182 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
4183 # ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
4184 # define MATCH_AND_RETURN_CHECK(upper_range) \
4185 r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
4186 if (r != ONIG_MISMATCH) {\
4188 if (! IS_FIND_LONGEST(reg->options)) {\
4192 else goto finish; /* error */ \
4195 # define MATCH_AND_RETURN_CHECK(upper_range) \
4196 r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
4197 if (r != ONIG_MISMATCH) {\
4201 else goto finish; /* error */ \
4203 # endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
4205 # ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
4206 # define MATCH_AND_RETURN_CHECK(none) \
4207 r = match_at(reg, str, end, s, prev, &msa);\
4208 if (r != ONIG_MISMATCH) {\
4210 if (! IS_FIND_LONGEST(reg->options)) {\
4214 else goto finish; /* error */ \
4217 # define MATCH_AND_RETURN_CHECK(none) \
4218 r = match_at(reg, str, end, s, prev, &msa);\
4219 if (r != ONIG_MISMATCH) {\
4223 else goto finish; /* error */ \
4225 # endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
4226 #endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */
4229 /* anchor optimize: resume search range */
4230 if (reg->anchor != 0 && str < end) {
4231 UChar *min_semi_end, *max_semi_end;
4233 if (reg->anchor & ANCHOR_BEGIN_POSITION) {
4234 /* search start-position only */
4238 if (global_pos > start)
4240 if (global_pos < range)
4241 range = global_pos + 1;
4249 else if (reg->anchor & ANCHOR_BEGIN_BUF) {
4250 /* search str-position only */
4251 if (range > start) {
4252 if (start != str) goto mismatch_no_msa;
4261 goto mismatch_no_msa;
4264 else if (reg->anchor & ANCHOR_END_BUF) {
4265 min_semi_end = max_semi_end = (UChar* )end;
4268 if ((OnigDistance )(max_semi_end - str) < reg->anchor_dmin)
4269 goto mismatch_no_msa;
4271 if (range > start) {
4272 if ((OnigDistance )(min_semi_end - start) > reg->anchor_dmax) {
4273 start = min_semi_end - reg->anchor_dmax;
4275 start = onigenc_get_right_adjust_char_head(reg->enc, str, start, end);
4277 if ((OnigDistance )(max_semi_end - (range - 1)) < reg->anchor_dmin) {
4278 range = max_semi_end - reg->anchor_dmin + 1;
4281 if (start > range) goto mismatch_no_msa;
4282 /* If start == range, match with empty at end.
4283 Backward search is used. */
4286 if ((OnigDistance )(min_semi_end - range) > reg->anchor_dmax) {
4287 range = min_semi_end - reg->anchor_dmax;
4289 if ((OnigDistance )(max_semi_end - start) < reg->anchor_dmin) {
4290 start = max_semi_end - reg->anchor_dmin;
4291 start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start, end);
4293 if (range > start) goto mismatch_no_msa;
4296 else if (reg->anchor & ANCHOR_SEMI_END_BUF) {
4297 UChar* pre_end = ONIGENC_STEP_BACK(reg->enc, str, end, end, 1);
4299 max_semi_end = (UChar* )end;
4300 if (ONIGENC_IS_MBC_NEWLINE(reg->enc, pre_end, end)) {
4301 min_semi_end = pre_end;
4303 #ifdef USE_CRNL_AS_LINE_TERMINATOR
4304 pre_end = ONIGENC_STEP_BACK(reg->enc, str, pre_end, end, 1);
4305 if (IS_NOT_NULL(pre_end) &&
4306 IS_NEWLINE_CRLF(reg->options) &&
4307 ONIGENC_IS_MBC_CRNL(reg->enc, pre_end, end)) {
4308 min_semi_end = pre_end;
4311 if (min_semi_end > str && start <= min_semi_end) {
4316 min_semi_end = (UChar* )end;
4320 else if ((reg->anchor & ANCHOR_ANYCHAR_STAR_ML)) {
4321 goto begin_position;
4324 else if (str == end) { /* empty string */
4325 static const UChar address_for_empty_string[] = "";
4327 #ifdef ONIG_DEBUG_SEARCH
4328 fprintf(stderr, "onig_search: empty string.\n");
4331 if (reg->threshold_len == 0) {
4332 start = end = str = address_for_empty_string;
4334 prev = (UChar* )NULL;
4336 MATCH_ARG_INIT(msa, option, region, start, start);
4337 #ifdef USE_COMBINATION_EXPLOSION_CHECK
4338 msa.state_check_buff = (void* )0;
4339 msa.state_check_buff_size = 0; /* NO NEED, for valgrind */
4341 MATCH_AND_RETURN_CHECK(end);
4344 goto mismatch_no_msa;
4347 #ifdef ONIG_DEBUG_SEARCH
4348 fprintf(stderr, "onig_search(apply anchor): end: %d, start: %d, range: %d\n",
4349 (int )(end - str), (int )(start - str), (int )(range - str));
4352 MATCH_ARG_INIT(msa, option, region, start, global_pos);
4353 #ifdef USE_COMBINATION_EXPLOSION_CHECK
4355 ptrdiff_t offset = (MIN(start, range) - str);
4356 STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check);
4361 if (range > start) { /* forward search */
4363 prev = onigenc_get_prev_char_head(reg->enc, str, s, end);
4365 prev = (UChar* )NULL;
4367 if (reg->optimize != ONIG_OPTIMIZE_NONE) {
4368 UChar *sch_range, *low, *high, *low_prev;
4370 sch_range = (UChar* )range;
4371 if (reg->dmax != 0) {
4372 if (reg->dmax == ONIG_INFINITE_DISTANCE)
4373 sch_range = (UChar* )end;
4375 sch_range += reg->dmax;
4376 if (sch_range > end) sch_range = (UChar* )end;
4380 if ((end - start) < reg->threshold_len)
4383 if (reg->dmax != ONIG_INFINITE_DISTANCE) {
4385 if (! forward_search_range(reg, str, end, s, sch_range,
4386 &low, &high, &low_prev)) goto mismatch;
4392 MATCH_AND_RETURN_CHECK(orig_range);
4394 s += enclen(reg->enc, s, end);
4396 } while (s < range);
4399 else { /* check only. */
4400 if (! forward_search_range(reg, str, end, s, sch_range,
4401 &low, &high, (UChar** )NULL)) goto mismatch;
4403 if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) {
4405 MATCH_AND_RETURN_CHECK(orig_range);
4407 s += enclen(reg->enc, s, end);
4409 if ((reg->anchor & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) == 0) {
4410 while (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0)
4413 s += enclen(reg->enc, s, end);
4416 } while (s < range);
4423 MATCH_AND_RETURN_CHECK(orig_range);
4425 s += enclen(reg->enc, s, end);
4426 } while (s < range);
4428 if (s == range) { /* because empty match with /$/. */
4429 MATCH_AND_RETURN_CHECK(orig_range);
4432 else { /* backward search */
4433 if (reg->optimize != ONIG_OPTIMIZE_NONE) {
4434 UChar *low, *high, *adjrange, *sch_start;
4437 adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range, end);
4439 adjrange = (UChar* )end;
4441 if (reg->dmax != ONIG_INFINITE_DISTANCE &&
4442 (end - range) >= reg->threshold_len) {
4444 sch_start = s + reg->dmax;
4445 if (sch_start > end) sch_start = (UChar* )end;
4446 if (backward_search_range(reg, str, end, sch_start, range, adjrange,
4454 prev = onigenc_get_prev_char_head(reg->enc, str, s, end);
4455 MATCH_AND_RETURN_CHECK(orig_start);
4458 } while (s >= range);
4461 else { /* check only. */
4462 if ((end - range) < reg->threshold_len) goto mismatch;
4465 if (reg->dmax != 0) {
4466 if (reg->dmax == ONIG_INFINITE_DISTANCE)
4467 sch_start = (UChar* )end;
4469 sch_start += reg->dmax;
4470 if (sch_start > end) sch_start = (UChar* )end;
4472 sch_start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc,
4473 start, sch_start, end);
4476 if (backward_search_range(reg, str, end, sch_start, range, adjrange,
4477 &low, &high) <= 0) goto mismatch;
4482 prev = onigenc_get_prev_char_head(reg->enc, str, s, end);
4483 MATCH_AND_RETURN_CHECK(orig_start);
4485 } while (s >= range);
4489 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
4490 if (IS_FIND_LONGEST(reg->options)) {
4491 if (msa.best_len >= 0) {
4500 MATCH_ARG_FREE(msa);
4502 /* If result is mismatch and no FIND_NOT_EMPTY option,
4503 then the region is not set in match_at(). */
4504 if (IS_FIND_NOT_EMPTY(reg->options) && region) {
4505 onig_region_clear(region);
4509 if (r != ONIG_MISMATCH)
4510 fprintf(stderr, "onig_search: error %"PRIdPTRDIFF"\n", r);
4518 if (r != ONIG_MISMATCH)
4519 fprintf(stderr, "onig_search: error %"PRIdPTRDIFF"\n", r);
4524 MATCH_ARG_FREE(msa);
4529 onig_scan(regex_t* reg, const UChar* str, const UChar* end,
4530 OnigRegion* region, OnigOptionType option,
4531 int (*scan_callback)(OnigPosition, OnigPosition, OnigRegion*, void*),
4542 r = onig_search(reg, str, end, start, end, region, option);
4544 rs = scan_callback(n, r, region, callback_arg);
4549 if (region->end[0] == start - str) {
4550 if (start >= end) break;
4551 start += enclen(reg->enc, start, end);
4554 start = str + region->end[0];
4559 else if (r == ONIG_MISMATCH) {
4571 onig_get_encoding(const regex_t* reg)
4576 extern OnigOptionType
4577 onig_get_options(const regex_t* reg)
4579 return reg->options;
4582 extern OnigCaseFoldType
4583 onig_get_case_fold_flag(const regex_t* reg)
4585 return reg->case_fold_flag;
4588 extern const OnigSyntaxType*
4589 onig_get_syntax(const regex_t* reg)
4595 onig_number_of_captures(const regex_t* reg)
4597 return reg->num_mem;
4601 onig_number_of_capture_histories(const regex_t* reg)
4603 #ifdef USE_CAPTURE_HISTORY
4607 for (i = 0; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
4608 if (BIT_STATUS_AT(reg->capture_history, i) != 0)
4618 onig_copy_encoding(OnigEncodingType *to, OnigEncoding from)