* 2022-01-18 [ci skip]
[ruby-80x24.org.git] / regexec.c
blob8334b16e96b5f11017d3d74dfd024293f57e91dd
1 /**********************************************************************
2 regexec.c - Onigmo (Oniguruma-mod) (regular expression library)
3 **********************************************************************/
4 /*-
5 * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
6 * Copyright (c) 2011-2016 K.Takata <kentkt AT csc DOT jp>
7 * All rights reserved.
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
31 #include "regint.h"
33 #ifdef RUBY
34 # undef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
35 #else
36 # define USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
37 #endif
39 #ifndef USE_TOKEN_THREADED_VM
40 # ifdef __GNUC__
41 # define USE_TOKEN_THREADED_VM 1
42 # else
43 # define USE_TOKEN_THREADED_VM 0
44 # endif
45 #endif
47 #ifdef RUBY
48 # define ENC_DUMMY_FLAG (1<<24)
49 static inline int
50 rb_enc_asciicompat(OnigEncoding enc)
52 return ONIGENC_MBC_MINLEN(enc)==1 && !((enc)->ruby_encoding_index & ENC_DUMMY_FLAG);
54 # undef ONIGENC_IS_MBC_ASCII_WORD
55 # define ONIGENC_IS_MBC_ASCII_WORD(enc,s,end) \
56 (rb_enc_asciicompat(enc) ? (ISALNUM(*s) || *s=='_') : \
57 onigenc_ascii_is_code_ctype( \
58 ONIGENC_MBC_TO_CODE(enc,s,end),ONIGENC_CTYPE_WORD,enc))
59 #endif /* RUBY */
61 #ifdef USE_CRNL_AS_LINE_TERMINATOR
62 # define ONIGENC_IS_MBC_CRNL(enc,p,end) \
63 (ONIGENC_MBC_TO_CODE(enc,p,end) == 13 && \
64 ONIGENC_MBC_TO_CODE(enc,(p+enclen(enc,p,end)),end) == 10)
65 # define ONIGENC_IS_MBC_NEWLINE_EX(enc,p,start,end,option,check_prev) \
66 is_mbc_newline_ex((enc),(p),(start),(end),(option),(check_prev))
67 static int
68 is_mbc_newline_ex(OnigEncoding enc, const UChar *p, const UChar *start,
69 const UChar *end, OnigOptionType option, int check_prev)
71 if (IS_NEWLINE_CRLF(option)) {
72 if (ONIGENC_MBC_TO_CODE(enc, p, end) == 0x0a) {
73 if (check_prev) {
74 const UChar *prev = onigenc_get_prev_char_head(enc, start, p, end);
75 if ((prev != NULL) && ONIGENC_MBC_TO_CODE(enc, prev, end) == 0x0d)
76 return 0;
77 else
78 return 1;
80 else
81 return 1;
83 else {
84 const UChar *pnext = p + enclen(enc, p, end);
85 if (pnext < end &&
86 ONIGENC_MBC_TO_CODE(enc, p, end) == 0x0d &&
87 ONIGENC_MBC_TO_CODE(enc, pnext, end) == 0x0a)
88 return 1;
89 if (ONIGENC_IS_MBC_NEWLINE(enc, p, end))
90 return 1;
91 return 0;
94 else {
95 return ONIGENC_IS_MBC_NEWLINE(enc, p, end);
98 #else /* USE_CRNL_AS_LINE_TERMINATOR */
99 # define ONIGENC_IS_MBC_NEWLINE_EX(enc,p,start,end,option,check_prev) \
100 ONIGENC_IS_MBC_NEWLINE((enc), (p), (end))
101 #endif /* USE_CRNL_AS_LINE_TERMINATOR */
103 #ifdef USE_CAPTURE_HISTORY
104 static void history_tree_free(OnigCaptureTreeNode* node);
106 static void
107 history_tree_clear(OnigCaptureTreeNode* node)
109 int i;
111 if (IS_NOT_NULL(node)) {
112 for (i = 0; i < node->num_childs; i++) {
113 if (IS_NOT_NULL(node->childs[i])) {
114 history_tree_free(node->childs[i]);
117 for (i = 0; i < node->allocated; i++) {
118 node->childs[i] = (OnigCaptureTreeNode* )0;
120 node->num_childs = 0;
121 node->beg = ONIG_REGION_NOTPOS;
122 node->end = ONIG_REGION_NOTPOS;
123 node->group = -1;
124 xfree(node->childs);
125 node->childs = (OnigCaptureTreeNode** )0;
129 static void
130 history_tree_free(OnigCaptureTreeNode* node)
132 history_tree_clear(node);
133 xfree(node);
136 static void
137 history_root_free(OnigRegion* r)
139 if (IS_NOT_NULL(r->history_root)) {
140 history_tree_free(r->history_root);
141 r->history_root = (OnigCaptureTreeNode* )0;
145 static OnigCaptureTreeNode*
146 history_node_new(void)
148 OnigCaptureTreeNode* node;
150 node = (OnigCaptureTreeNode* )xmalloc(sizeof(OnigCaptureTreeNode));
151 CHECK_NULL_RETURN(node);
152 node->childs = (OnigCaptureTreeNode** )0;
153 node->allocated = 0;
154 node->num_childs = 0;
155 node->group = -1;
156 node->beg = ONIG_REGION_NOTPOS;
157 node->end = ONIG_REGION_NOTPOS;
159 return node;
162 static int
163 history_tree_add_child(OnigCaptureTreeNode* parent, OnigCaptureTreeNode* child)
165 # define HISTORY_TREE_INIT_ALLOC_SIZE 8
167 if (parent->num_childs >= parent->allocated) {
168 int n, i;
170 if (IS_NULL(parent->childs)) {
171 n = HISTORY_TREE_INIT_ALLOC_SIZE;
172 parent->childs =
173 (OnigCaptureTreeNode** )xmalloc(sizeof(OnigCaptureTreeNode*) * n);
174 CHECK_NULL_RETURN_MEMERR(parent->childs);
176 else {
177 OnigCaptureTreeNode** tmp;
178 n = parent->allocated * 2;
179 tmp =
180 (OnigCaptureTreeNode** )xrealloc(parent->childs,
181 sizeof(OnigCaptureTreeNode*) * n);
182 if (tmp == 0) {
183 history_tree_clear(parent);
184 return ONIGERR_MEMORY;
186 parent->childs = tmp;
188 for (i = parent->allocated; i < n; i++) {
189 parent->childs[i] = (OnigCaptureTreeNode* )0;
191 parent->allocated = n;
194 parent->childs[parent->num_childs] = child;
195 parent->num_childs++;
196 return 0;
199 static OnigCaptureTreeNode*
200 history_tree_clone(OnigCaptureTreeNode* node)
202 int i, r;
203 OnigCaptureTreeNode *clone, *child;
205 clone = history_node_new();
206 CHECK_NULL_RETURN(clone);
208 clone->beg = node->beg;
209 clone->end = node->end;
210 for (i = 0; i < node->num_childs; i++) {
211 child = history_tree_clone(node->childs[i]);
212 if (IS_NULL(child)) {
213 history_tree_free(clone);
214 return (OnigCaptureTreeNode* )0;
216 r = history_tree_add_child(clone, child);
217 if (r != 0) {
218 history_tree_free(child);
219 history_tree_free(clone);
220 return (OnigCaptureTreeNode* )0;
224 return clone;
227 extern OnigCaptureTreeNode*
228 onig_get_capture_tree(OnigRegion* region)
230 return region->history_root;
232 #endif /* USE_CAPTURE_HISTORY */
234 extern void
235 onig_region_clear(OnigRegion* region)
237 int i;
239 for (i = 0; i < region->num_regs; i++) {
240 region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
242 #ifdef USE_CAPTURE_HISTORY
243 history_root_free(region);
244 #endif
247 extern int
248 onig_region_resize(OnigRegion* region, int n)
250 region->num_regs = n;
252 if (n < ONIG_NREGION)
253 n = ONIG_NREGION;
255 if (region->allocated == 0) {
256 region->beg = (OnigPosition* )xmalloc(n * sizeof(OnigPosition));
257 if (region->beg == 0)
258 return ONIGERR_MEMORY;
260 region->end = (OnigPosition* )xmalloc(n * sizeof(OnigPosition));
261 if (region->end == 0) {
262 xfree(region->beg);
263 return ONIGERR_MEMORY;
266 region->allocated = n;
268 else if (region->allocated < n) {
269 OnigPosition *tmp;
271 region->allocated = 0;
272 tmp = (OnigPosition* )xrealloc(region->beg, n * sizeof(OnigPosition));
273 if (tmp == 0) {
274 xfree(region->beg);
275 xfree(region->end);
276 return ONIGERR_MEMORY;
278 region->beg = tmp;
279 tmp = (OnigPosition* )xrealloc(region->end, n * sizeof(OnigPosition));
280 if (tmp == 0) {
281 xfree(region->beg);
282 xfree(region->end);
283 return ONIGERR_MEMORY;
285 region->end = tmp;
287 region->allocated = n;
290 return 0;
293 static int
294 onig_region_resize_clear(OnigRegion* region, int n)
296 int r;
298 r = onig_region_resize(region, n);
299 if (r != 0) return r;
300 onig_region_clear(region);
301 return 0;
304 extern int
305 onig_region_set(OnigRegion* region, int at, int beg, int end)
307 if (at < 0) return ONIGERR_INVALID_ARGUMENT;
309 if (at >= region->allocated) {
310 int r = onig_region_resize(region, at + 1);
311 if (r < 0) return r;
314 region->beg[at] = beg;
315 region->end[at] = end;
316 return 0;
319 extern void
320 onig_region_init(OnigRegion* region)
322 region->num_regs = 0;
323 region->allocated = 0;
324 region->beg = (OnigPosition* )0;
325 region->end = (OnigPosition* )0;
326 #ifdef USE_CAPTURE_HISTORY
327 region->history_root = (OnigCaptureTreeNode* )0;
328 #endif
331 extern OnigRegion*
332 onig_region_new(void)
334 OnigRegion* r;
336 r = (OnigRegion* )xmalloc(sizeof(OnigRegion));
337 if (r)
338 onig_region_init(r);
339 return r;
342 extern void
343 onig_region_free(OnigRegion* r, int free_self)
345 if (r) {
346 if (r->allocated > 0) {
347 if (r->beg) xfree(r->beg);
348 if (r->end) xfree(r->end);
349 r->allocated = 0;
351 #ifdef USE_CAPTURE_HISTORY
352 history_root_free(r);
353 #endif
354 if (free_self) xfree(r);
358 extern void
359 onig_region_copy(OnigRegion* to, const OnigRegion* from)
361 #define RREGC_SIZE (sizeof(int) * from->num_regs)
362 int i, r;
364 if (to == from) return;
366 r = onig_region_resize(to, from->num_regs);
367 if (r) return;
369 for (i = 0; i < from->num_regs; i++) {
370 to->beg[i] = from->beg[i];
371 to->end[i] = from->end[i];
373 to->num_regs = from->num_regs;
375 #ifdef USE_CAPTURE_HISTORY
376 history_root_free(to);
378 if (IS_NOT_NULL(from->history_root)) {
379 to->history_root = history_tree_clone(from->history_root);
381 #endif
385 /** stack **/
386 #define INVALID_STACK_INDEX -1
388 /* stack type */
389 /* used by normal-POP */
390 #define STK_ALT 0x0001
391 #define STK_LOOK_BEHIND_NOT 0x0002
392 #define STK_POS_NOT 0x0003
393 /* handled by normal-POP */
394 #define STK_MEM_START 0x0100
395 #define STK_MEM_END 0x8200
396 #define STK_REPEAT_INC 0x0300
397 #define STK_STATE_CHECK_MARK 0x1000
398 /* avoided by normal-POP */
399 #define STK_NULL_CHECK_START 0x3000
400 #define STK_NULL_CHECK_END 0x5000 /* for recursive call */
401 #define STK_MEM_END_MARK 0x8400
402 #define STK_POS 0x0500 /* used when POP-POS */
403 #define STK_STOP_BT 0x0600 /* mark for "(?>...)" */
404 #define STK_REPEAT 0x0700
405 #define STK_CALL_FRAME 0x0800
406 #define STK_RETURN 0x0900
407 #define STK_VOID 0x0a00 /* for fill a blank */
408 #define STK_ABSENT_POS 0x0b00 /* for absent */
409 #define STK_ABSENT 0x0c00 /* absent inner loop marker */
411 /* stack type check mask */
412 #define STK_MASK_POP_USED 0x00ff
413 #define STK_MASK_TO_VOID_TARGET 0x10ff
414 #define STK_MASK_MEM_END_OR_MARK 0x8000 /* MEM_END or MEM_END_MARK */
416 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
417 # define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start, arg_gpos) do {\
418 (msa).stack_p = (void* )0;\
419 (msa).options = (arg_option);\
420 (msa).region = (arg_region);\
421 (msa).start = (arg_start);\
422 (msa).gpos = (arg_gpos);\
423 (msa).best_len = ONIG_MISMATCH;\
424 } while(0)
425 #else
426 # define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start, arg_gpos) do {\
427 (msa).stack_p = (void* )0;\
428 (msa).options = (arg_option);\
429 (msa).region = (arg_region);\
430 (msa).start = (arg_start);\
431 (msa).gpos = (arg_gpos);\
432 } while(0)
433 #endif
435 #ifdef USE_COMBINATION_EXPLOSION_CHECK
437 # define STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE 16
439 # define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) do { \
440 if ((state_num) > 0 && str_len >= STATE_CHECK_STRING_THRESHOLD_LEN) {\
441 unsigned int size = (unsigned int )(((str_len) + 1) * (state_num) + 7) >> 3;\
442 offset = ((offset) * (state_num)) >> 3;\
443 if (size > 0 && offset < size && size < STATE_CHECK_BUFF_MAX_SIZE) {\
444 if (size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) {\
445 (msa).state_check_buff = (void* )xmalloc(size);\
446 CHECK_NULL_RETURN_MEMERR((msa).state_check_buff);\
448 else \
449 (msa).state_check_buff = (void* )xalloca(size);\
450 xmemset(((char* )((msa).state_check_buff)+(offset)), 0, \
451 (size_t )(size - (offset))); \
452 (msa).state_check_buff_size = size;\
454 else {\
455 (msa).state_check_buff = (void* )0;\
456 (msa).state_check_buff_size = 0;\
459 else {\
460 (msa).state_check_buff = (void* )0;\
461 (msa).state_check_buff_size = 0;\
463 } while(0)
465 # define MATCH_ARG_FREE(msa) do {\
466 if ((msa).stack_p) xfree((msa).stack_p);\
467 if ((msa).state_check_buff_size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) { \
468 if ((msa).state_check_buff) xfree((msa).state_check_buff);\
470 } while(0)
471 #else /* USE_COMBINATION_EXPLOSION_CHECK */
472 # define MATCH_ARG_FREE(msa) if ((msa).stack_p) xfree((msa).stack_p)
473 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
477 #define MAX_PTR_NUM 100
479 #define STACK_INIT(alloc_addr, heap_addr, ptr_num, stack_num) do {\
480 if (ptr_num > MAX_PTR_NUM) {\
481 alloc_addr = (char* )xmalloc(sizeof(OnigStackIndex) * (ptr_num));\
482 heap_addr = alloc_addr;\
483 if (msa->stack_p) {\
484 stk_alloc = (OnigStackType* )(msa->stack_p);\
485 stk_base = stk_alloc;\
486 stk = stk_base;\
487 stk_end = stk_base + msa->stack_n;\
488 } else {\
489 stk_alloc = (OnigStackType* )xalloca(sizeof(OnigStackType) * (stack_num));\
490 stk_base = stk_alloc;\
491 stk = stk_base;\
492 stk_end = stk_base + (stack_num);\
494 } else if (msa->stack_p) {\
495 alloc_addr = (char* )xalloca(sizeof(OnigStackIndex) * (ptr_num));\
496 heap_addr = NULL;\
497 stk_alloc = (OnigStackType* )(msa->stack_p);\
498 stk_base = stk_alloc;\
499 stk = stk_base;\
500 stk_end = stk_base + msa->stack_n;\
502 else {\
503 alloc_addr = (char* )xalloca(sizeof(OnigStackIndex) * (ptr_num)\
504 + sizeof(OnigStackType) * (stack_num));\
505 heap_addr = NULL;\
506 stk_alloc = (OnigStackType* )(alloc_addr + sizeof(OnigStackIndex) * (ptr_num));\
507 stk_base = stk_alloc;\
508 stk = stk_base;\
509 stk_end = stk_base + (stack_num);\
511 } while(0)
513 #define STACK_SAVE do{\
514 if (stk_base != stk_alloc) {\
515 msa->stack_p = stk_base;\
516 msa->stack_n = stk_end - stk_base; /* TODO: check overflow */\
518 } while(0)
520 static unsigned int MatchStackLimitSize = DEFAULT_MATCH_STACK_LIMIT_SIZE;
522 extern unsigned int
523 onig_get_match_stack_limit_size(void)
525 return MatchStackLimitSize;
528 extern int
529 onig_set_match_stack_limit_size(unsigned int size)
531 MatchStackLimitSize = size;
532 return 0;
535 static int
536 stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end,
537 OnigStackType** arg_stk, OnigStackType* stk_alloc, OnigMatchArg* msa)
539 size_t n;
540 OnigStackType *x, *stk_base, *stk_end, *stk;
542 stk_base = *arg_stk_base;
543 stk_end = *arg_stk_end;
544 stk = *arg_stk;
546 n = stk_end - stk_base;
547 if (stk_base == stk_alloc && IS_NULL(msa->stack_p)) {
548 x = (OnigStackType* )xmalloc(sizeof(OnigStackType) * n * 2);
549 if (IS_NULL(x)) {
550 STACK_SAVE;
551 return ONIGERR_MEMORY;
553 xmemcpy(x, stk_base, n * sizeof(OnigStackType));
554 n *= 2;
556 else {
557 unsigned int limit_size = MatchStackLimitSize;
558 n *= 2;
559 if (limit_size != 0 && n > limit_size) {
560 if ((unsigned int )(stk_end - stk_base) == limit_size)
561 return ONIGERR_MATCH_STACK_LIMIT_OVER;
562 else
563 n = limit_size;
565 x = (OnigStackType* )xrealloc(stk_base, sizeof(OnigStackType) * n);
566 if (IS_NULL(x)) {
567 STACK_SAVE;
568 return ONIGERR_MEMORY;
571 *arg_stk = x + (stk - stk_base);
572 *arg_stk_base = x;
573 *arg_stk_end = x + n;
574 return 0;
577 #define STACK_ENSURE(n) do {\
578 if (stk_end - stk < (n)) {\
579 int r = stack_double(&stk_base, &stk_end, &stk, stk_alloc, msa);\
580 if (r != 0) {\
581 STACK_SAVE;\
582 if (xmalloc_base) xfree(xmalloc_base);\
583 return r;\
586 } while(0)
588 #define STACK_AT(index) (stk_base + (index))
589 #define GET_STACK_INDEX(stk) ((stk) - stk_base)
591 #define STACK_PUSH_TYPE(stack_type) do {\
592 STACK_ENSURE(1);\
593 stk->type = (stack_type);\
594 STACK_INC;\
595 } while(0)
597 #define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0)
599 #ifdef USE_COMBINATION_EXPLOSION_CHECK
600 # define STATE_CHECK_POS(s,snum) \
601 (((s) - str) * num_comb_exp_check + ((snum) - 1))
602 # define STATE_CHECK_VAL(v,snum) do {\
603 if (state_check_buff != NULL) {\
604 ptrdiff_t x = STATE_CHECK_POS(s,snum);\
605 (v) = state_check_buff[x/8] & (1<<(x%8));\
607 else (v) = 0;\
608 } while(0)
611 # define ELSE_IF_STATE_CHECK_MARK(stk) \
612 else if ((stk)->type == STK_STATE_CHECK_MARK) { \
613 ptrdiff_t x = STATE_CHECK_POS(stk->u.state.pstr, stk->u.state.state_check);\
614 state_check_buff[x/8] |= (1<<(x%8)); \
617 # define STACK_PUSH(stack_type,pat,s,sprev,keep) do {\
618 STACK_ENSURE(1);\
619 stk->type = (stack_type);\
620 stk->u.state.pcode = (pat);\
621 stk->u.state.pstr = (s);\
622 stk->u.state.pstr_prev = (sprev);\
623 stk->u.state.state_check = 0;\
624 stk->u.state.pkeep = (keep);\
625 STACK_INC;\
626 } while(0)
628 # define STACK_PUSH_ENSURED(stack_type,pat) do {\
629 stk->type = (stack_type);\
630 stk->u.state.pcode = (pat);\
631 stk->u.state.state_check = 0;\
632 STACK_INC;\
633 } while(0)
635 # define STACK_PUSH_ALT_WITH_STATE_CHECK(pat,s,sprev,snum,keep) do {\
636 STACK_ENSURE(1);\
637 stk->type = STK_ALT;\
638 stk->u.state.pcode = (pat);\
639 stk->u.state.pstr = (s);\
640 stk->u.state.pstr_prev = (sprev);\
641 stk->u.state.state_check = ((state_check_buff != NULL) ? (snum) : 0);\
642 stk->u.state.pkeep = (keep);\
643 STACK_INC;\
644 } while(0)
646 # define STACK_PUSH_STATE_CHECK(s,snum) do {\
647 if (state_check_buff != NULL) {\
648 STACK_ENSURE(1);\
649 stk->type = STK_STATE_CHECK_MARK;\
650 stk->u.state.pstr = (s);\
651 stk->u.state.state_check = (snum);\
652 STACK_INC;\
654 } while(0)
656 #else /* USE_COMBINATION_EXPLOSION_CHECK */
658 # define ELSE_IF_STATE_CHECK_MARK(stk)
660 # define STACK_PUSH(stack_type,pat,s,sprev,keep) do {\
661 STACK_ENSURE(1);\
662 stk->type = (stack_type);\
663 stk->u.state.pcode = (pat);\
664 stk->u.state.pstr = (s);\
665 stk->u.state.pstr_prev = (sprev);\
666 stk->u.state.pkeep = (keep);\
667 STACK_INC;\
668 } while(0)
670 # define STACK_PUSH_ENSURED(stack_type,pat) do {\
671 stk->type = (stack_type);\
672 stk->u.state.pcode = (pat);\
673 STACK_INC;\
674 } while(0)
675 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
677 #define STACK_PUSH_ALT(pat,s,sprev,keep) STACK_PUSH(STK_ALT,pat,s,sprev,keep)
678 #define STACK_PUSH_POS(s,sprev,keep) STACK_PUSH(STK_POS,NULL_UCHARP,s,sprev,keep)
679 #define STACK_PUSH_POS_NOT(pat,s,sprev,keep) STACK_PUSH(STK_POS_NOT,pat,s,sprev,keep)
680 #define STACK_PUSH_ABSENT STACK_PUSH_TYPE(STK_ABSENT)
681 #define STACK_PUSH_STOP_BT STACK_PUSH_TYPE(STK_STOP_BT)
682 #define STACK_PUSH_LOOK_BEHIND_NOT(pat,s,sprev,keep) \
683 STACK_PUSH(STK_LOOK_BEHIND_NOT,pat,s,sprev,keep)
685 #define STACK_PUSH_REPEAT(id, pat) do {\
686 STACK_ENSURE(1);\
687 stk->type = STK_REPEAT;\
688 stk->u.repeat.num = (id);\
689 stk->u.repeat.pcode = (pat);\
690 stk->u.repeat.count = 0;\
691 STACK_INC;\
692 } while(0)
694 #define STACK_PUSH_REPEAT_INC(sindex) do {\
695 STACK_ENSURE(1);\
696 stk->type = STK_REPEAT_INC;\
697 stk->u.repeat_inc.si = (sindex);\
698 STACK_INC;\
699 } while(0)
701 #define STACK_PUSH_MEM_START(mnum, s) do {\
702 STACK_ENSURE(1);\
703 stk->type = STK_MEM_START;\
704 stk->u.mem.num = (mnum);\
705 stk->u.mem.pstr = (s);\
706 stk->u.mem.start = mem_start_stk[mnum];\
707 stk->u.mem.end = mem_end_stk[mnum];\
708 mem_start_stk[mnum] = GET_STACK_INDEX(stk);\
709 mem_end_stk[mnum] = INVALID_STACK_INDEX;\
710 STACK_INC;\
711 } while(0)
713 #define STACK_PUSH_MEM_END(mnum, s) do {\
714 STACK_ENSURE(1);\
715 stk->type = STK_MEM_END;\
716 stk->u.mem.num = (mnum);\
717 stk->u.mem.pstr = (s);\
718 stk->u.mem.start = mem_start_stk[mnum];\
719 stk->u.mem.end = mem_end_stk[mnum];\
720 mem_end_stk[mnum] = GET_STACK_INDEX(stk);\
721 STACK_INC;\
722 } while(0)
724 #define STACK_PUSH_MEM_END_MARK(mnum) do {\
725 STACK_ENSURE(1);\
726 stk->type = STK_MEM_END_MARK;\
727 stk->u.mem.num = (mnum);\
728 STACK_INC;\
729 } while(0)
731 #define STACK_GET_MEM_START(mnum, k) do {\
732 int level = 0;\
733 k = stk;\
734 while (k > stk_base) {\
735 k--;\
736 if ((k->type & STK_MASK_MEM_END_OR_MARK) != 0 \
737 && k->u.mem.num == (mnum)) {\
738 level++;\
740 else if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
741 if (level == 0) break;\
742 level--;\
745 } while(0)
747 #define STACK_GET_MEM_RANGE(k, mnum, start, end) do {\
748 int level = 0;\
749 while (k < stk) {\
750 if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
751 if (level == 0) (start) = k->u.mem.pstr;\
752 level++;\
754 else if (k->type == STK_MEM_END && k->u.mem.num == (mnum)) {\
755 level--;\
756 if (level == 0) {\
757 (end) = k->u.mem.pstr;\
758 break;\
761 k++;\
763 } while(0)
765 #define STACK_PUSH_NULL_CHECK_START(cnum, s) do {\
766 STACK_ENSURE(1);\
767 stk->type = STK_NULL_CHECK_START;\
768 stk->u.null_check.num = (cnum);\
769 stk->u.null_check.pstr = (s);\
770 STACK_INC;\
771 } while(0)
773 #define STACK_PUSH_NULL_CHECK_END(cnum) do {\
774 STACK_ENSURE(1);\
775 stk->type = STK_NULL_CHECK_END;\
776 stk->u.null_check.num = (cnum);\
777 STACK_INC;\
778 } while(0)
780 #define STACK_PUSH_CALL_FRAME(pat) do {\
781 STACK_ENSURE(1);\
782 stk->type = STK_CALL_FRAME;\
783 stk->u.call_frame.ret_addr = (pat);\
784 STACK_INC;\
785 } while(0)
787 #define STACK_PUSH_RETURN do {\
788 STACK_ENSURE(1);\
789 stk->type = STK_RETURN;\
790 STACK_INC;\
791 } while(0)
793 #define STACK_PUSH_ABSENT_POS(start, end) do {\
794 STACK_ENSURE(1);\
795 stk->type = STK_ABSENT_POS;\
796 stk->u.absent_pos.abs_pstr = (start);\
797 stk->u.absent_pos.end_pstr = (end);\
798 STACK_INC;\
799 } while(0)
802 #ifdef ONIG_DEBUG
803 # define STACK_BASE_CHECK(p, at) \
804 if ((p) < stk_base) {\
805 fprintf(stderr, "at %s\n", at);\
806 goto stack_error;\
808 #else
809 # define STACK_BASE_CHECK(p, at)
810 #endif
812 #define STACK_POP_ONE do {\
813 stk--;\
814 STACK_BASE_CHECK(stk, "STACK_POP_ONE"); \
815 } while(0)
817 #define STACK_POP do {\
818 switch (pop_level) {\
819 case STACK_POP_LEVEL_FREE:\
820 while (1) {\
821 stk--;\
822 STACK_BASE_CHECK(stk, "STACK_POP"); \
823 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
824 ELSE_IF_STATE_CHECK_MARK(stk);\
826 break;\
827 case STACK_POP_LEVEL_MEM_START:\
828 while (1) {\
829 stk--;\
830 STACK_BASE_CHECK(stk, "STACK_POP 2"); \
831 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
832 else if (stk->type == STK_MEM_START) {\
833 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
834 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
836 ELSE_IF_STATE_CHECK_MARK(stk);\
838 break;\
839 default:\
840 while (1) {\
841 stk--;\
842 STACK_BASE_CHECK(stk, "STACK_POP 3"); \
843 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
844 else if (stk->type == STK_MEM_START) {\
845 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
846 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
848 else if (stk->type == STK_REPEAT_INC) {\
849 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
851 else if (stk->type == STK_MEM_END) {\
852 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
853 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
855 ELSE_IF_STATE_CHECK_MARK(stk);\
857 break;\
859 } while(0)
861 #define STACK_POP_TIL_POS_NOT do {\
862 while (1) {\
863 stk--;\
864 STACK_BASE_CHECK(stk, "STACK_POP_TIL_POS_NOT"); \
865 if (stk->type == STK_POS_NOT) break;\
866 else if (stk->type == STK_MEM_START) {\
867 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
868 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
870 else if (stk->type == STK_REPEAT_INC) {\
871 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
873 else if (stk->type == STK_MEM_END) {\
874 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
875 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
877 ELSE_IF_STATE_CHECK_MARK(stk);\
879 } while(0)
881 #define STACK_POP_TIL_LOOK_BEHIND_NOT do {\
882 while (1) {\
883 stk--;\
884 STACK_BASE_CHECK(stk, "STACK_POP_TIL_LOOK_BEHIND_NOT"); \
885 if (stk->type == STK_LOOK_BEHIND_NOT) break;\
886 else if (stk->type == STK_MEM_START) {\
887 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
888 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
890 else if (stk->type == STK_REPEAT_INC) {\
891 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
893 else if (stk->type == STK_MEM_END) {\
894 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
895 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
897 ELSE_IF_STATE_CHECK_MARK(stk);\
899 } while(0)
901 #define STACK_POP_TIL_ABSENT do {\
902 while (1) {\
903 stk--;\
904 STACK_BASE_CHECK(stk, "STACK_POP_TIL_ABSENT"); \
905 if (stk->type == STK_ABSENT) break;\
906 else if (stk->type == STK_MEM_START) {\
907 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
908 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
910 else if (stk->type == STK_REPEAT_INC) {\
911 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
913 else if (stk->type == STK_MEM_END) {\
914 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
915 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
917 ELSE_IF_STATE_CHECK_MARK(stk);\
919 } while(0)
921 #define STACK_POP_ABSENT_POS(start, end) do {\
922 stk--;\
923 STACK_BASE_CHECK(stk, "STACK_POP_ABSENT_POS"); \
924 (start) = stk->u.absent_pos.abs_pstr;\
925 (end) = stk->u.absent_pos.end_pstr;\
926 } while(0)
928 #define STACK_POS_END(k) do {\
929 k = stk;\
930 while (1) {\
931 k--;\
932 STACK_BASE_CHECK(k, "STACK_POS_END"); \
933 if (IS_TO_VOID_TARGET(k)) {\
934 k->type = STK_VOID;\
936 else if (k->type == STK_POS) {\
937 k->type = STK_VOID;\
938 break;\
941 } while(0)
943 #define STACK_STOP_BT_END do {\
944 OnigStackType *k = stk;\
945 while (1) {\
946 k--;\
947 STACK_BASE_CHECK(k, "STACK_STOP_BT_END"); \
948 if (IS_TO_VOID_TARGET(k)) {\
949 k->type = STK_VOID;\
951 else if (k->type == STK_STOP_BT) {\
952 k->type = STK_VOID;\
953 break;\
956 } while(0)
958 #define STACK_NULL_CHECK(isnull,id,s) do {\
959 OnigStackType* k = stk;\
960 while (1) {\
961 k--;\
962 STACK_BASE_CHECK(k, "STACK_NULL_CHECK"); \
963 if (k->type == STK_NULL_CHECK_START) {\
964 if (k->u.null_check.num == (id)) {\
965 (isnull) = (k->u.null_check.pstr == (s));\
966 break;\
970 } while(0)
972 #define STACK_NULL_CHECK_REC(isnull,id,s) do {\
973 int level = 0;\
974 OnigStackType* k = stk;\
975 while (1) {\
976 k--;\
977 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_REC"); \
978 if (k->type == STK_NULL_CHECK_START) {\
979 if (k->u.null_check.num == (id)) {\
980 if (level == 0) {\
981 (isnull) = (k->u.null_check.pstr == (s));\
982 break;\
984 else level--;\
987 else if (k->type == STK_NULL_CHECK_END) {\
988 level++;\
991 } while(0)
993 #define STACK_NULL_CHECK_MEMST(isnull,id,s,reg) do {\
994 OnigStackType* k = stk;\
995 while (1) {\
996 k--;\
997 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST"); \
998 if (k->type == STK_NULL_CHECK_START) {\
999 if (k->u.null_check.num == (id)) {\
1000 if (k->u.null_check.pstr != (s)) {\
1001 (isnull) = 0;\
1002 break;\
1004 else {\
1005 UChar* endp;\
1006 (isnull) = 1;\
1007 while (k < stk) {\
1008 if (k->type == STK_MEM_START) {\
1009 if (k->u.mem.end == INVALID_STACK_INDEX) {\
1010 (isnull) = 0; break;\
1012 if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
1013 endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
1014 else\
1015 endp = (UChar* )k->u.mem.end;\
1016 if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
1017 (isnull) = 0; break;\
1019 else if (endp != s) {\
1020 (isnull) = -1; /* empty, but position changed */ \
1023 k++;\
1025 break;\
1030 } while(0)
1032 #define STACK_NULL_CHECK_MEMST_REC(isnull,id,s,reg) do {\
1033 int level = 0;\
1034 OnigStackType* k = stk;\
1035 while (1) {\
1036 k--;\
1037 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST_REC"); \
1038 if (k->type == STK_NULL_CHECK_START) {\
1039 if (k->u.null_check.num == (id)) {\
1040 if (level == 0) {\
1041 if (k->u.null_check.pstr != (s)) {\
1042 (isnull) = 0;\
1043 break;\
1045 else {\
1046 UChar* endp;\
1047 (isnull) = 1;\
1048 while (k < stk) {\
1049 if (k->type == STK_MEM_START) {\
1050 if (k->u.mem.end == INVALID_STACK_INDEX) {\
1051 (isnull) = 0; break;\
1053 if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
1054 endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
1055 else\
1056 endp = (UChar* )k->u.mem.end;\
1057 if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
1058 (isnull) = 0; break;\
1060 else if (endp != s) {\
1061 (isnull) = -1; /* empty, but position changed */ \
1064 k++;\
1066 break;\
1069 else {\
1070 level--;\
1074 else if (k->type == STK_NULL_CHECK_END) {\
1075 if (k->u.null_check.num == (id)) level++;\
1078 } while(0)
1080 #define STACK_GET_REPEAT(id, k) do {\
1081 int level = 0;\
1082 k = stk;\
1083 while (1) {\
1084 k--;\
1085 STACK_BASE_CHECK(k, "STACK_GET_REPEAT"); \
1086 if (k->type == STK_REPEAT) {\
1087 if (level == 0) {\
1088 if (k->u.repeat.num == (id)) {\
1089 break;\
1093 else if (k->type == STK_CALL_FRAME) level--;\
1094 else if (k->type == STK_RETURN) level++;\
1096 } while(0)
1098 #define STACK_RETURN(addr) do {\
1099 int level = 0;\
1100 OnigStackType* k = stk;\
1101 while (1) {\
1102 k--;\
1103 STACK_BASE_CHECK(k, "STACK_RETURN"); \
1104 if (k->type == STK_CALL_FRAME) {\
1105 if (level == 0) {\
1106 (addr) = k->u.call_frame.ret_addr;\
1107 break;\
1109 else level--;\
1111 else if (k->type == STK_RETURN)\
1112 level++;\
1114 } while(0)
1117 #define STRING_CMP(s1,s2,len) do {\
1118 while (len-- > 0) {\
1119 if (*s1++ != *s2++) goto fail;\
1121 } while(0)
1123 #define STRING_CMP_IC(case_fold_flag,s1,ps2,len,text_end) do {\
1124 if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len, text_end) == 0) \
1125 goto fail; \
1126 } while(0)
1128 static int string_cmp_ic(OnigEncoding enc, int case_fold_flag,
1129 UChar* s1, UChar** ps2, OnigDistance mblen, const UChar* text_end)
1131 UChar buf1[ONIGENC_MBC_CASE_FOLD_MAXLEN];
1132 UChar buf2[ONIGENC_MBC_CASE_FOLD_MAXLEN];
1133 UChar *p1, *p2, *end1, *s2;
1134 int len1, len2;
1136 s2 = *ps2;
1137 end1 = s1 + mblen;
1138 while (s1 < end1) {
1139 len1 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s1, text_end, buf1);
1140 len2 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s2, text_end, buf2);
1141 if (len1 != len2) return 0;
1142 p1 = buf1;
1143 p2 = buf2;
1144 while (len1-- > 0) {
1145 if (*p1 != *p2) return 0;
1146 p1++;
1147 p2++;
1151 *ps2 = s2;
1152 return 1;
1155 #define STRING_CMP_VALUE(s1,s2,len,is_fail) do {\
1156 is_fail = 0;\
1157 while (len-- > 0) {\
1158 if (*s1++ != *s2++) {\
1159 is_fail = 1; break;\
1162 } while(0)
1164 #define STRING_CMP_VALUE_IC(case_fold_flag,s1,ps2,len,text_end,is_fail) do {\
1165 if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len, text_end) == 0) \
1166 is_fail = 1; \
1167 else \
1168 is_fail = 0; \
1169 } while(0)
1172 #define IS_EMPTY_STR (str == end)
1173 #define ON_STR_BEGIN(s) ((s) == str)
1174 #define ON_STR_END(s) ((s) == end)
1175 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
1176 # define DATA_ENSURE_CHECK1 (s < right_range)
1177 # define DATA_ENSURE_CHECK(n) (s + (n) <= right_range)
1178 # define DATA_ENSURE(n) if (s + (n) > right_range) goto fail
1179 # define ABSENT_END_POS right_range
1180 #else
1181 # define DATA_ENSURE_CHECK1 (s < end)
1182 # define DATA_ENSURE_CHECK(n) (s + (n) <= end)
1183 # define DATA_ENSURE(n) if (s + (n) > end) goto fail
1184 # define ABSENT_END_POS end
1185 #endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */
1188 #ifdef USE_CAPTURE_HISTORY
1189 static int
1190 make_capture_history_tree(OnigCaptureTreeNode* node, OnigStackType** kp,
1191 OnigStackType* stk_top, UChar* str, regex_t* reg)
1193 int n, r;
1194 OnigCaptureTreeNode* child;
1195 OnigStackType* k = *kp;
1197 while (k < stk_top) {
1198 if (k->type == STK_MEM_START) {
1199 n = k->u.mem.num;
1200 if (n <= ONIG_MAX_CAPTURE_HISTORY_GROUP &&
1201 BIT_STATUS_AT(reg->capture_history, n) != 0) {
1202 child = history_node_new();
1203 CHECK_NULL_RETURN_MEMERR(child);
1204 child->group = n;
1205 child->beg = k->u.mem.pstr - str;
1206 r = history_tree_add_child(node, child);
1207 if (r != 0) {
1208 history_tree_free(child);
1209 return r;
1211 *kp = (k + 1);
1212 r = make_capture_history_tree(child, kp, stk_top, str, reg);
1213 if (r != 0) return r;
1215 k = *kp;
1216 child->end = k->u.mem.pstr - str;
1219 else if (k->type == STK_MEM_END) {
1220 if (k->u.mem.num == node->group) {
1221 node->end = k->u.mem.pstr - str;
1222 *kp = k;
1223 return 0;
1226 k++;
1229 return 1; /* 1: root node ending. */
1231 #endif /* USE_CAPTURE_HISTORY */
1233 #ifdef USE_BACKREF_WITH_LEVEL
1234 static int mem_is_in_memp(int mem, int num, UChar* memp)
1236 int i;
1237 MemNumType m;
1239 for (i = 0; i < num; i++) {
1240 GET_MEMNUM_INC(m, memp);
1241 if (mem == (int )m) return 1;
1243 return 0;
1246 static int backref_match_at_nested_level(regex_t* reg,
1247 OnigStackType* top, OnigStackType* stk_base,
1248 int ignore_case, int case_fold_flag,
1249 int nest, int mem_num, UChar* memp, UChar** s, const UChar* send)
1251 UChar *ss, *p, *pstart, *pend = NULL_UCHARP;
1252 int level;
1253 OnigStackType* k;
1255 level = 0;
1256 k = top;
1257 k--;
1258 while (k >= stk_base) {
1259 if (k->type == STK_CALL_FRAME) {
1260 level--;
1262 else if (k->type == STK_RETURN) {
1263 level++;
1265 else if (level == nest) {
1266 if (k->type == STK_MEM_START) {
1267 if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
1268 pstart = k->u.mem.pstr;
1269 if (pend != NULL_UCHARP) {
1270 if (pend - pstart > send - *s) return 0; /* or goto next_mem; */
1271 p = pstart;
1272 ss = *s;
1274 if (ignore_case != 0) {
1275 if (string_cmp_ic(reg->enc, case_fold_flag,
1276 pstart, &ss, pend - pstart, send) == 0)
1277 return 0; /* or goto next_mem; */
1279 else {
1280 while (p < pend) {
1281 if (*p++ != *ss++) return 0; /* or goto next_mem; */
1285 *s = ss;
1286 return 1;
1290 else if (k->type == STK_MEM_END) {
1291 if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
1292 pend = k->u.mem.pstr;
1296 k--;
1299 return 0;
1301 #endif /* USE_BACKREF_WITH_LEVEL */
1304 #ifdef ONIG_DEBUG_STATISTICS
1306 # ifdef _WIN32
1307 # include <windows.h>
1308 static LARGE_INTEGER ts, te, freq;
1309 # define GETTIME(t) QueryPerformanceCounter(&(t))
1310 # define TIMEDIFF(te,ts) (unsigned long )(((te).QuadPart - (ts).QuadPart) \
1311 * 1000000 / freq.QuadPart)
1312 # else /* _WIN32 */
1314 # define USE_TIMEOFDAY
1316 # ifdef USE_TIMEOFDAY
1317 # ifdef HAVE_SYS_TIME_H
1318 # include <sys/time.h>
1319 # endif
1320 # ifdef HAVE_UNISTD_H
1321 # include <unistd.h>
1322 # endif
1323 static struct timeval ts, te;
1324 # define GETTIME(t) gettimeofday(&(t), (struct timezone* )0)
1325 # define TIMEDIFF(te,ts) (((te).tv_usec - (ts).tv_usec) + \
1326 (((te).tv_sec - (ts).tv_sec)*1000000))
1327 # else /* USE_TIMEOFDAY */
1328 # ifdef HAVE_SYS_TIMES_H
1329 # include <sys/times.h>
1330 # endif
1331 static struct tms ts, te;
1332 # define GETTIME(t) times(&(t))
1333 # define TIMEDIFF(te,ts) ((te).tms_utime - (ts).tms_utime)
1334 # endif /* USE_TIMEOFDAY */
1336 # endif /* _WIN32 */
1338 static int OpCounter[256];
1339 static int OpPrevCounter[256];
1340 static unsigned long OpTime[256];
1341 static int OpCurr = OP_FINISH;
1342 static int OpPrevTarget = OP_FAIL;
1343 static int MaxStackDepth = 0;
1345 # define MOP_IN(opcode) do {\
1346 if (opcode == OpPrevTarget) OpPrevCounter[OpCurr]++;\
1347 OpCurr = opcode;\
1348 OpCounter[opcode]++;\
1349 GETTIME(ts);\
1350 } while(0)
1352 # define MOP_OUT do {\
1353 GETTIME(te);\
1354 OpTime[OpCurr] += TIMEDIFF(te, ts);\
1355 } while(0)
1357 extern void
1358 onig_statistics_init(void)
1360 int i;
1361 for (i = 0; i < 256; i++) {
1362 OpCounter[i] = OpPrevCounter[i] = 0; OpTime[i] = 0;
1364 MaxStackDepth = 0;
1365 # ifdef _WIN32
1366 QueryPerformanceFrequency(&freq);
1367 # endif
1370 extern void
1371 onig_print_statistics(FILE* f)
1373 int i;
1374 fprintf(f, " count prev time\n");
1375 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
1376 fprintf(f, "%8d: %8d: %10lu: %s\n",
1377 OpCounter[i], OpPrevCounter[i], OpTime[i], OnigOpInfo[i].name);
1379 fprintf(f, "\nmax stack depth: %d\n", MaxStackDepth);
1382 # define STACK_INC do {\
1383 stk++;\
1384 if (stk - stk_base > MaxStackDepth) \
1385 MaxStackDepth = stk - stk_base;\
1386 } while(0)
1388 #else /* ONIG_DEBUG_STATISTICS */
1389 # define STACK_INC stk++
1391 # define MOP_IN(opcode)
1392 # define MOP_OUT
1393 #endif /* ONIG_DEBUG_STATISTICS */
1396 #ifdef ONIG_DEBUG_MATCH
1397 static char *
1398 stack_type_str(int stack_type)
1400 switch (stack_type) {
1401 case STK_ALT: return "Alt ";
1402 case STK_LOOK_BEHIND_NOT: return "LBNot ";
1403 case STK_POS_NOT: return "PosNot";
1404 case STK_MEM_START: return "MemS ";
1405 case STK_MEM_END: return "MemE ";
1406 case STK_REPEAT_INC: return "RepInc";
1407 case STK_STATE_CHECK_MARK: return "StChMk";
1408 case STK_NULL_CHECK_START: return "NulChS";
1409 case STK_NULL_CHECK_END: return "NulChE";
1410 case STK_MEM_END_MARK: return "MemEMk";
1411 case STK_POS: return "Pos ";
1412 case STK_STOP_BT: return "StopBt";
1413 case STK_REPEAT: return "Rep ";
1414 case STK_CALL_FRAME: return "Call ";
1415 case STK_RETURN: return "Ret ";
1416 case STK_VOID: return "Void ";
1417 case STK_ABSENT_POS: return "AbsPos";
1418 case STK_ABSENT: return "Absent";
1419 default: return " ";
1422 #endif
1424 /* match data(str - end) from position (sstart). */
1425 /* if sstart == str then set sprev to NULL. */
1426 static OnigPosition
1427 match_at(regex_t* reg, const UChar* str, const UChar* end,
1428 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
1429 const UChar* right_range,
1430 #endif
1431 const UChar* sstart, UChar* sprev, OnigMatchArg* msa)
1433 static const UChar FinishCode[] = { OP_FINISH };
1435 int i, num_mem, pop_level;
1436 ptrdiff_t n, best_len;
1437 LengthType tlen, tlen2;
1438 MemNumType mem;
1439 RelAddrType addr;
1440 OnigOptionType option = reg->options;
1441 OnigEncoding encode = reg->enc;
1442 OnigCaseFoldType case_fold_flag = reg->case_fold_flag;
1443 UChar *s, *q, *sbegin;
1444 UChar *p = reg->p;
1445 UChar *pkeep;
1446 char *alloca_base;
1447 char *xmalloc_base = NULL;
1448 OnigStackType *stk_alloc, *stk_base, *stk, *stk_end;
1449 OnigStackType *stkp; /* used as any purpose. */
1450 OnigStackIndex si;
1451 OnigStackIndex *repeat_stk;
1452 OnigStackIndex *mem_start_stk, *mem_end_stk;
1453 #ifdef USE_COMBINATION_EXPLOSION_CHECK
1454 int scv;
1455 unsigned char* state_check_buff = msa->state_check_buff;
1456 int num_comb_exp_check = reg->num_comb_exp_check;
1457 #endif
1459 #if USE_TOKEN_THREADED_VM
1460 # define OP_OFFSET 1
1461 # define VM_LOOP JUMP;
1462 # define VM_LOOP_END
1463 # define CASE(x) L_##x: sbegin = s; OPCODE_EXEC_HOOK;
1464 # define DEFAULT L_DEFAULT:
1465 # define NEXT sprev = sbegin; JUMP
1466 # define JUMP RB_GNUC_EXTENSION_BLOCK(goto *oplabels[*p++])
1468 RB_GNUC_EXTENSION static const void *oplabels[] = {
1469 &&L_OP_FINISH, /* matching process terminator (no more alternative) */
1470 &&L_OP_END, /* pattern code terminator (success end) */
1472 &&L_OP_EXACT1, /* single byte, N = 1 */
1473 &&L_OP_EXACT2, /* single byte, N = 2 */
1474 &&L_OP_EXACT3, /* single byte, N = 3 */
1475 &&L_OP_EXACT4, /* single byte, N = 4 */
1476 &&L_OP_EXACT5, /* single byte, N = 5 */
1477 &&L_OP_EXACTN, /* single byte */
1478 &&L_OP_EXACTMB2N1, /* mb-length = 2 N = 1 */
1479 &&L_OP_EXACTMB2N2, /* mb-length = 2 N = 2 */
1480 &&L_OP_EXACTMB2N3, /* mb-length = 2 N = 3 */
1481 &&L_OP_EXACTMB2N, /* mb-length = 2 */
1482 &&L_OP_EXACTMB3N, /* mb-length = 3 */
1483 &&L_OP_EXACTMBN, /* other length */
1485 &&L_OP_EXACT1_IC, /* single byte, N = 1, ignore case */
1486 &&L_OP_EXACTN_IC, /* single byte, ignore case */
1488 &&L_OP_CCLASS,
1489 &&L_OP_CCLASS_MB,
1490 &&L_OP_CCLASS_MIX,
1491 &&L_OP_CCLASS_NOT,
1492 &&L_OP_CCLASS_MB_NOT,
1493 &&L_OP_CCLASS_MIX_NOT,
1495 &&L_OP_ANYCHAR, /* "." */
1496 &&L_OP_ANYCHAR_ML, /* "." multi-line */
1497 &&L_OP_ANYCHAR_STAR, /* ".*" */
1498 &&L_OP_ANYCHAR_ML_STAR, /* ".*" multi-line */
1499 &&L_OP_ANYCHAR_STAR_PEEK_NEXT,
1500 &&L_OP_ANYCHAR_ML_STAR_PEEK_NEXT,
1502 &&L_OP_WORD,
1503 &&L_OP_NOT_WORD,
1504 &&L_OP_WORD_BOUND,
1505 &&L_OP_NOT_WORD_BOUND,
1506 # ifdef USE_WORD_BEGIN_END
1507 &&L_OP_WORD_BEGIN,
1508 &&L_OP_WORD_END,
1509 # else
1510 &&L_DEFAULT,
1511 &&L_DEFAULT,
1512 # endif
1513 &&L_OP_ASCII_WORD,
1514 &&L_OP_NOT_ASCII_WORD,
1515 &&L_OP_ASCII_WORD_BOUND,
1516 &&L_OP_NOT_ASCII_WORD_BOUND,
1517 # ifdef USE_WORD_BEGIN_END
1518 &&L_OP_ASCII_WORD_BEGIN,
1519 &&L_OP_ASCII_WORD_END,
1520 # else
1521 &&L_DEFAULT,
1522 &&L_DEFAULT,
1523 # endif
1525 &&L_OP_BEGIN_BUF,
1526 &&L_OP_END_BUF,
1527 &&L_OP_BEGIN_LINE,
1528 &&L_OP_END_LINE,
1529 &&L_OP_SEMI_END_BUF,
1530 &&L_OP_BEGIN_POSITION,
1532 &&L_OP_BACKREF1,
1533 &&L_OP_BACKREF2,
1534 &&L_OP_BACKREFN,
1535 &&L_OP_BACKREFN_IC,
1536 &&L_OP_BACKREF_MULTI,
1537 &&L_OP_BACKREF_MULTI_IC,
1538 # ifdef USE_BACKREF_WITH_LEVEL
1539 &&L_OP_BACKREF_WITH_LEVEL, /* \k<xxx+n>, \k<xxx-n> */
1540 # else
1541 &&L_DEFAULT,
1542 # endif
1543 &&L_OP_MEMORY_START,
1544 &&L_OP_MEMORY_START_PUSH, /* push back-tracker to stack */
1545 &&L_OP_MEMORY_END_PUSH, /* push back-tracker to stack */
1546 # ifdef USE_SUBEXP_CALL
1547 &&L_OP_MEMORY_END_PUSH_REC, /* push back-tracker to stack */
1548 # else
1549 &&L_DEFAULT,
1550 # endif
1551 &&L_OP_MEMORY_END,
1552 # ifdef USE_SUBEXP_CALL
1553 &&L_OP_MEMORY_END_REC, /* push marker to stack */
1554 # else
1555 &&L_DEFAULT,
1556 # endif
1558 &&L_OP_KEEP,
1560 &&L_OP_FAIL, /* pop stack and move */
1561 &&L_OP_JUMP,
1562 &&L_OP_PUSH,
1563 &&L_OP_POP,
1564 # ifdef USE_OP_PUSH_OR_JUMP_EXACT
1565 &&L_OP_PUSH_OR_JUMP_EXACT1, /* if match exact then push, else jump. */
1566 # else
1567 &&L_DEFAULT,
1568 # endif
1569 &&L_OP_PUSH_IF_PEEK_NEXT, /* if match exact then push, else none. */
1570 &&L_OP_REPEAT, /* {n,m} */
1571 &&L_OP_REPEAT_NG, /* {n,m}? (non greedy) */
1572 &&L_OP_REPEAT_INC,
1573 &&L_OP_REPEAT_INC_NG, /* non greedy */
1574 &&L_OP_REPEAT_INC_SG, /* search and get in stack */
1575 &&L_OP_REPEAT_INC_NG_SG, /* search and get in stack (non greedy) */
1576 &&L_OP_NULL_CHECK_START, /* null loop checker start */
1577 &&L_OP_NULL_CHECK_END, /* null loop checker end */
1578 # ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
1579 &&L_OP_NULL_CHECK_END_MEMST, /* null loop checker end (with capture status) */
1580 # else
1581 &&L_DEFAULT,
1582 # endif
1583 # ifdef USE_SUBEXP_CALL
1584 &&L_OP_NULL_CHECK_END_MEMST_PUSH, /* with capture status and push check-end */
1585 # else
1586 &&L_DEFAULT,
1587 # endif
1589 &&L_OP_PUSH_POS, /* (?=...) start */
1590 &&L_OP_POP_POS, /* (?=...) end */
1591 &&L_OP_PUSH_POS_NOT, /* (?!...) start */
1592 &&L_OP_FAIL_POS, /* (?!...) end */
1593 &&L_OP_PUSH_STOP_BT, /* (?>...) start */
1594 &&L_OP_POP_STOP_BT, /* (?>...) end */
1595 &&L_OP_LOOK_BEHIND, /* (?<=...) start (no needs end opcode) */
1596 &&L_OP_PUSH_LOOK_BEHIND_NOT, /* (?<!...) start */
1597 &&L_OP_FAIL_LOOK_BEHIND_NOT, /* (?<!...) end */
1598 &&L_OP_PUSH_ABSENT_POS, /* (?~...) start */
1599 &&L_OP_ABSENT, /* (?~...) start of inner loop */
1600 &&L_OP_ABSENT_END, /* (?~...) end */
1602 # ifdef USE_SUBEXP_CALL
1603 &&L_OP_CALL, /* \g<name> */
1604 &&L_OP_RETURN,
1605 # else
1606 &&L_DEFAULT,
1607 &&L_DEFAULT,
1608 # endif
1609 &&L_OP_CONDITION,
1611 # ifdef USE_COMBINATION_EXPLOSION_CHECK
1612 &&L_OP_STATE_CHECK_PUSH, /* combination explosion check and push */
1613 &&L_OP_STATE_CHECK_PUSH_OR_JUMP, /* check ok -> push, else jump */
1614 &&L_OP_STATE_CHECK, /* check only */
1615 # else
1616 &&L_DEFAULT,
1617 &&L_DEFAULT,
1618 &&L_DEFAULT,
1619 # endif
1620 # ifdef USE_COMBINATION_EXPLOSION_CHECK
1621 &&L_OP_STATE_CHECK_ANYCHAR_STAR,
1622 &&L_OP_STATE_CHECK_ANYCHAR_ML_STAR,
1623 # else
1624 &&L_DEFAULT,
1625 &&L_DEFAULT,
1626 # endif
1627 /* no need: IS_DYNAMIC_OPTION() == 0 */
1628 # if 0 /* no need: IS_DYNAMIC_OPTION() == 0 */
1629 &&L_OP_SET_OPTION_PUSH, /* set option and push recover option */
1630 &&L_OP_SET_OPTION /* set option */
1631 # else
1632 &&L_DEFAULT,
1633 &&L_DEFAULT
1634 # endif
1636 #else /* USE_TOKEN_THREADED_VM */
1638 # define OP_OFFSET 0
1639 # define VM_LOOP \
1640 while (1) { \
1641 OPCODE_EXEC_HOOK; \
1642 sbegin = s; \
1643 switch (*p++) {
1644 # define VM_LOOP_END } sprev = sbegin; }
1645 # define CASE(x) case x:
1646 # define DEFAULT default:
1647 # define NEXT break
1648 # define JUMP continue; break
1649 #endif /* USE_TOKEN_THREADED_VM */
1652 #ifdef USE_SUBEXP_CALL
1653 /* Stack #0 is used to store the pattern itself and used for (?R), \g<0>,
1654 etc. Additional space is required. */
1655 # define ADD_NUMMEM 1
1656 #else
1657 /* Stack #0 not is used. */
1658 # define ADD_NUMMEM 0
1659 #endif
1661 n = reg->num_repeat + (reg->num_mem + ADD_NUMMEM) * 2;
1663 STACK_INIT(alloca_base, xmalloc_base, n, INIT_MATCH_STACK_SIZE);
1664 pop_level = reg->stack_pop_level;
1665 num_mem = reg->num_mem;
1666 repeat_stk = (OnigStackIndex* )alloca_base;
1668 mem_start_stk = (OnigStackIndex* )(repeat_stk + reg->num_repeat);
1669 mem_end_stk = mem_start_stk + (num_mem + ADD_NUMMEM);
1671 OnigStackIndex *pp = mem_start_stk;
1672 for (; pp < repeat_stk + n; pp += 2) {
1673 pp[0] = INVALID_STACK_INDEX;
1674 pp[1] = INVALID_STACK_INDEX;
1677 #ifndef USE_SUBEXP_CALL
1678 mem_start_stk--; /* for index start from 1,
1679 mem_start_stk[1]..mem_start_stk[num_mem] */
1680 mem_end_stk--; /* for index start from 1,
1681 mem_end_stk[1]..mem_end_stk[num_mem] */
1682 #endif
1684 #ifdef ONIG_DEBUG_MATCH
1685 fprintf(stderr, "match_at: str: %"PRIuPTR" (%p), end: %"PRIuPTR" (%p), start: %"PRIuPTR" (%p), sprev: %"PRIuPTR" (%p)\n",
1686 (uintptr_t )str, str, (uintptr_t )end, end, (uintptr_t )sstart, sstart, (uintptr_t )sprev, sprev);
1687 fprintf(stderr, "size: %d, start offset: %d\n",
1688 (int )(end - str), (int )(sstart - str));
1689 fprintf(stderr, "\n ofs> str stk:type addr:opcode\n");
1690 #endif
1692 STACK_PUSH_ENSURED(STK_ALT, (UChar* )FinishCode); /* bottom stack */
1693 best_len = ONIG_MISMATCH;
1694 s = (UChar* )sstart;
1695 pkeep = (UChar* )sstart;
1698 #ifdef ONIG_DEBUG_MATCH
1699 # define OPCODE_EXEC_HOOK \
1700 if (s) { \
1701 UChar *op, *q, *bp, buf[50]; \
1702 int len; \
1703 op = p - OP_OFFSET; \
1704 fprintf(stderr, "%4"PRIdPTR"> \"", (*op == OP_FINISH) ? (ptrdiff_t )-1 : s - str); \
1705 bp = buf; \
1706 q = s; \
1707 if (*op != OP_FINISH) { /* s may not be a valid pointer if OP_FINISH. */ \
1708 for (i = 0; i < 7 && q < end; i++) { \
1709 len = enclen(encode, q, end); \
1710 while (len-- > 0) *bp++ = *q++; \
1712 if (q < end) { xmemcpy(bp, "...", 3); bp += 3; } \
1714 xmemcpy(bp, "\"", 1); bp += 1; \
1715 *bp = 0; \
1716 fputs((char* )buf, stderr); \
1717 for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr); \
1718 fprintf(stderr, "%4"PRIdPTR":%s %4"PRIdPTR":", \
1719 stk - stk_base - 1, \
1720 (stk > stk_base) ? stack_type_str(stk[-1].type) : " ", \
1721 (op == FinishCode) ? (ptrdiff_t )-1 : op - reg->p); \
1722 onig_print_compiled_byte_code(stderr, op, reg->p+reg->used, NULL, encode); \
1723 fprintf(stderr, "\n"); \
1725 #else
1726 # define OPCODE_EXEC_HOOK ((void) 0)
1727 #endif
1730 VM_LOOP {
1731 CASE(OP_END) MOP_IN(OP_END);
1732 n = s - sstart;
1733 if (n > best_len) {
1734 OnigRegion* region;
1735 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
1736 if (IS_FIND_LONGEST(option)) {
1737 if (n > msa->best_len) {
1738 msa->best_len = n;
1739 msa->best_s = (UChar* )sstart;
1741 else
1742 goto end_best_len;
1744 #endif
1745 best_len = n;
1746 region = msa->region;
1747 if (region) {
1748 region->beg[0] = ((pkeep > s) ? s : pkeep) - str;
1749 region->end[0] = s - str;
1750 for (i = 1; i <= num_mem; i++) {
1751 if (mem_end_stk[i] != INVALID_STACK_INDEX) {
1752 if (BIT_STATUS_AT(reg->bt_mem_start, i))
1753 region->beg[i] = STACK_AT(mem_start_stk[i])->u.mem.pstr - str;
1754 else
1755 region->beg[i] = (UChar* )((void* )mem_start_stk[i]) - str;
1757 region->end[i] = (BIT_STATUS_AT(reg->bt_mem_end, i)
1758 ? STACK_AT(mem_end_stk[i])->u.mem.pstr
1759 : (UChar* )((void* )mem_end_stk[i])) - str;
1761 else {
1762 region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
1766 #ifdef USE_CAPTURE_HISTORY
1767 if (reg->capture_history != 0) {
1768 int r;
1769 OnigCaptureTreeNode* node;
1771 if (IS_NULL(region->history_root)) {
1772 region->history_root = node = history_node_new();
1773 CHECK_NULL_RETURN_MEMERR(node);
1775 else {
1776 node = region->history_root;
1777 history_tree_clear(node);
1780 node->group = 0;
1781 node->beg = ((pkeep > s) ? s : pkeep) - str;
1782 node->end = s - str;
1784 stkp = stk_base;
1785 r = make_capture_history_tree(region->history_root, &stkp,
1786 stk, (UChar* )str, reg);
1787 if (r < 0) {
1788 best_len = r; /* error code */
1789 goto finish;
1792 #endif /* USE_CAPTURE_HISTORY */
1793 } /* if (region) */
1794 } /* n > best_len */
1796 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
1797 end_best_len:
1798 #endif
1799 MOP_OUT;
1801 if (IS_FIND_CONDITION(option)) {
1802 if (IS_FIND_NOT_EMPTY(option) && s == sstart) {
1803 best_len = ONIG_MISMATCH;
1804 goto fail; /* for retry */
1806 if (IS_FIND_LONGEST(option) && DATA_ENSURE_CHECK1) {
1807 goto fail; /* for retry */
1811 /* default behavior: return first-matching result. */
1812 goto finish;
1813 NEXT;
1815 CASE(OP_EXACT1) MOP_IN(OP_EXACT1);
1816 DATA_ENSURE(1);
1817 if (*p != *s) goto fail;
1818 p++; s++;
1819 MOP_OUT;
1820 NEXT;
1822 CASE(OP_EXACT1_IC) MOP_IN(OP_EXACT1_IC);
1824 int len;
1825 UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
1827 DATA_ENSURE(1);
1828 len = ONIGENC_MBC_CASE_FOLD(encode,
1829 /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
1830 case_fold_flag,
1831 &s, end, lowbuf);
1832 DATA_ENSURE(0);
1833 q = lowbuf;
1834 while (len-- > 0) {
1835 if (*p != *q) {
1836 goto fail;
1838 p++; q++;
1841 MOP_OUT;
1842 NEXT;
1844 CASE(OP_EXACT2) MOP_IN(OP_EXACT2);
1845 DATA_ENSURE(2);
1846 if (*p != *s) goto fail;
1847 p++; s++;
1848 if (*p != *s) goto fail;
1849 sprev = s;
1850 p++; s++;
1851 MOP_OUT;
1852 JUMP;
1854 CASE(OP_EXACT3) MOP_IN(OP_EXACT3);
1855 DATA_ENSURE(3);
1856 if (*p != *s) goto fail;
1857 p++; s++;
1858 if (*p != *s) goto fail;
1859 p++; s++;
1860 if (*p != *s) goto fail;
1861 sprev = s;
1862 p++; s++;
1863 MOP_OUT;
1864 JUMP;
1866 CASE(OP_EXACT4) MOP_IN(OP_EXACT4);
1867 DATA_ENSURE(4);
1868 if (*p != *s) goto fail;
1869 p++; s++;
1870 if (*p != *s) goto fail;
1871 p++; s++;
1872 if (*p != *s) goto fail;
1873 p++; s++;
1874 if (*p != *s) goto fail;
1875 sprev = s;
1876 p++; s++;
1877 MOP_OUT;
1878 JUMP;
1880 CASE(OP_EXACT5) MOP_IN(OP_EXACT5);
1881 DATA_ENSURE(5);
1882 if (*p != *s) goto fail;
1883 p++; s++;
1884 if (*p != *s) goto fail;
1885 p++; s++;
1886 if (*p != *s) goto fail;
1887 p++; s++;
1888 if (*p != *s) goto fail;
1889 p++; s++;
1890 if (*p != *s) goto fail;
1891 sprev = s;
1892 p++; s++;
1893 MOP_OUT;
1894 JUMP;
1896 CASE(OP_EXACTN) MOP_IN(OP_EXACTN);
1897 GET_LENGTH_INC(tlen, p);
1898 DATA_ENSURE(tlen);
1899 while (tlen-- > 0) {
1900 if (*p++ != *s++) goto fail;
1902 sprev = s - 1;
1903 MOP_OUT;
1904 JUMP;
1906 CASE(OP_EXACTN_IC) MOP_IN(OP_EXACTN_IC);
1908 int len;
1909 UChar *q, *endp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
1911 GET_LENGTH_INC(tlen, p);
1912 endp = p + tlen;
1914 while (p < endp) {
1915 sprev = s;
1916 DATA_ENSURE(1);
1917 len = ONIGENC_MBC_CASE_FOLD(encode,
1918 /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
1919 case_fold_flag,
1920 &s, end, lowbuf);
1921 DATA_ENSURE(0);
1922 q = lowbuf;
1923 while (len-- > 0) {
1924 if (*p != *q) goto fail;
1925 p++; q++;
1930 MOP_OUT;
1931 JUMP;
1933 CASE(OP_EXACTMB2N1) MOP_IN(OP_EXACTMB2N1);
1934 DATA_ENSURE(2);
1935 if (*p != *s) goto fail;
1936 p++; s++;
1937 if (*p != *s) goto fail;
1938 p++; s++;
1939 MOP_OUT;
1940 NEXT;
1942 CASE(OP_EXACTMB2N2) MOP_IN(OP_EXACTMB2N2);
1943 DATA_ENSURE(4);
1944 if (*p != *s) goto fail;
1945 p++; s++;
1946 if (*p != *s) goto fail;
1947 p++; s++;
1948 sprev = s;
1949 if (*p != *s) goto fail;
1950 p++; s++;
1951 if (*p != *s) goto fail;
1952 p++; s++;
1953 MOP_OUT;
1954 JUMP;
1956 CASE(OP_EXACTMB2N3) MOP_IN(OP_EXACTMB2N3);
1957 DATA_ENSURE(6);
1958 if (*p != *s) goto fail;
1959 p++; s++;
1960 if (*p != *s) goto fail;
1961 p++; s++;
1962 if (*p != *s) goto fail;
1963 p++; s++;
1964 if (*p != *s) goto fail;
1965 p++; s++;
1966 sprev = s;
1967 if (*p != *s) goto fail;
1968 p++; s++;
1969 if (*p != *s) goto fail;
1970 p++; s++;
1971 MOP_OUT;
1972 JUMP;
1974 CASE(OP_EXACTMB2N) MOP_IN(OP_EXACTMB2N);
1975 GET_LENGTH_INC(tlen, p);
1976 DATA_ENSURE(tlen * 2);
1977 while (tlen-- > 0) {
1978 if (*p != *s) goto fail;
1979 p++; s++;
1980 if (*p != *s) goto fail;
1981 p++; s++;
1983 sprev = s - 2;
1984 MOP_OUT;
1985 JUMP;
1987 CASE(OP_EXACTMB3N) MOP_IN(OP_EXACTMB3N);
1988 GET_LENGTH_INC(tlen, p);
1989 DATA_ENSURE(tlen * 3);
1990 while (tlen-- > 0) {
1991 if (*p != *s) goto fail;
1992 p++; s++;
1993 if (*p != *s) goto fail;
1994 p++; s++;
1995 if (*p != *s) goto fail;
1996 p++; s++;
1998 sprev = s - 3;
1999 MOP_OUT;
2000 JUMP;
2002 CASE(OP_EXACTMBN) MOP_IN(OP_EXACTMBN);
2003 GET_LENGTH_INC(tlen, p); /* mb-len */
2004 GET_LENGTH_INC(tlen2, p); /* string len */
2005 tlen2 *= tlen;
2006 DATA_ENSURE(tlen2);
2007 while (tlen2-- > 0) {
2008 if (*p != *s) goto fail;
2009 p++; s++;
2011 sprev = s - tlen;
2012 MOP_OUT;
2013 JUMP;
2015 CASE(OP_CCLASS) MOP_IN(OP_CCLASS);
2016 DATA_ENSURE(1);
2017 if (BITSET_AT(((BitSetRef )p), *s) == 0) goto fail;
2018 p += SIZE_BITSET;
2019 s += enclen(encode, s, end); /* OP_CCLASS can match mb-code. \D, \S */
2020 MOP_OUT;
2021 NEXT;
2023 CASE(OP_CCLASS_MB) MOP_IN(OP_CCLASS_MB);
2024 if (! ONIGENC_IS_MBC_HEAD(encode, s, end)) goto fail;
2026 cclass_mb:
2027 GET_LENGTH_INC(tlen, p);
2029 OnigCodePoint code;
2030 UChar *ss;
2031 int mb_len;
2033 DATA_ENSURE(1);
2034 mb_len = enclen(encode, s, end);
2035 DATA_ENSURE(mb_len);
2036 ss = s;
2037 s += mb_len;
2038 code = ONIGENC_MBC_TO_CODE(encode, ss, s);
2040 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
2041 if (! onig_is_in_code_range(p, code)) goto fail;
2042 #else
2043 q = p;
2044 ALIGNMENT_RIGHT(q);
2045 if (! onig_is_in_code_range(q, code)) goto fail;
2046 #endif
2048 p += tlen;
2049 MOP_OUT;
2050 NEXT;
2052 CASE(OP_CCLASS_MIX) MOP_IN(OP_CCLASS_MIX);
2053 DATA_ENSURE(1);
2054 if (ONIGENC_IS_MBC_HEAD(encode, s, end)) {
2055 p += SIZE_BITSET;
2056 goto cclass_mb;
2058 else {
2059 if (BITSET_AT(((BitSetRef )p), *s) == 0)
2060 goto fail;
2062 p += SIZE_BITSET;
2063 GET_LENGTH_INC(tlen, p);
2064 p += tlen;
2065 s++;
2067 MOP_OUT;
2068 NEXT;
2070 CASE(OP_CCLASS_NOT) MOP_IN(OP_CCLASS_NOT);
2071 DATA_ENSURE(1);
2072 if (BITSET_AT(((BitSetRef )p), *s) != 0) goto fail;
2073 p += SIZE_BITSET;
2074 s += enclen(encode, s, end);
2075 MOP_OUT;
2076 NEXT;
2078 CASE(OP_CCLASS_MB_NOT) MOP_IN(OP_CCLASS_MB_NOT);
2079 DATA_ENSURE(1);
2080 if (! ONIGENC_IS_MBC_HEAD(encode, s, end)) {
2081 s++;
2082 GET_LENGTH_INC(tlen, p);
2083 p += tlen;
2084 goto cc_mb_not_success;
2087 cclass_mb_not:
2088 GET_LENGTH_INC(tlen, p);
2090 OnigCodePoint code;
2091 UChar *ss;
2092 int mb_len = enclen(encode, s, end);
2094 if (! DATA_ENSURE_CHECK(mb_len)) {
2095 DATA_ENSURE(1);
2096 s = (UChar* )end;
2097 p += tlen;
2098 goto cc_mb_not_success;
2101 ss = s;
2102 s += mb_len;
2103 code = ONIGENC_MBC_TO_CODE(encode, ss, s);
2105 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
2106 if (onig_is_in_code_range(p, code)) goto fail;
2107 #else
2108 q = p;
2109 ALIGNMENT_RIGHT(q);
2110 if (onig_is_in_code_range(q, code)) goto fail;
2111 #endif
2113 p += tlen;
2115 cc_mb_not_success:
2116 MOP_OUT;
2117 NEXT;
2119 CASE(OP_CCLASS_MIX_NOT) MOP_IN(OP_CCLASS_MIX_NOT);
2120 DATA_ENSURE(1);
2121 if (ONIGENC_IS_MBC_HEAD(encode, s, end)) {
2122 p += SIZE_BITSET;
2123 goto cclass_mb_not;
2125 else {
2126 if (BITSET_AT(((BitSetRef )p), *s) != 0)
2127 goto fail;
2129 p += SIZE_BITSET;
2130 GET_LENGTH_INC(tlen, p);
2131 p += tlen;
2132 s++;
2134 MOP_OUT;
2135 NEXT;
2137 CASE(OP_ANYCHAR) MOP_IN(OP_ANYCHAR);
2138 DATA_ENSURE(1);
2139 n = enclen(encode, s, end);
2140 DATA_ENSURE(n);
2141 if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
2142 s += n;
2143 MOP_OUT;
2144 NEXT;
2146 CASE(OP_ANYCHAR_ML) MOP_IN(OP_ANYCHAR_ML);
2147 DATA_ENSURE(1);
2148 n = enclen(encode, s, end);
2149 DATA_ENSURE(n);
2150 s += n;
2151 MOP_OUT;
2152 NEXT;
2154 CASE(OP_ANYCHAR_STAR) MOP_IN(OP_ANYCHAR_STAR);
2155 while (DATA_ENSURE_CHECK1) {
2156 STACK_PUSH_ALT(p, s, sprev, pkeep);
2157 n = enclen(encode, s, end);
2158 DATA_ENSURE(n);
2159 if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
2160 sprev = s;
2161 s += n;
2163 MOP_OUT;
2164 JUMP;
2166 CASE(OP_ANYCHAR_ML_STAR) MOP_IN(OP_ANYCHAR_ML_STAR);
2167 while (DATA_ENSURE_CHECK1) {
2168 STACK_PUSH_ALT(p, s, sprev, pkeep);
2169 n = enclen(encode, s, end);
2170 if (n > 1) {
2171 DATA_ENSURE(n);
2172 sprev = s;
2173 s += n;
2175 else {
2176 sprev = s;
2177 s++;
2180 MOP_OUT;
2181 JUMP;
2183 CASE(OP_ANYCHAR_STAR_PEEK_NEXT) MOP_IN(OP_ANYCHAR_STAR_PEEK_NEXT);
2184 while (DATA_ENSURE_CHECK1) {
2185 if (*p == *s) {
2186 STACK_PUSH_ALT(p + 1, s, sprev, pkeep);
2188 n = enclen(encode, s, end);
2189 DATA_ENSURE(n);
2190 if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
2191 sprev = s;
2192 s += n;
2194 p++;
2195 MOP_OUT;
2196 NEXT;
2198 CASE(OP_ANYCHAR_ML_STAR_PEEK_NEXT)MOP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT);
2199 while (DATA_ENSURE_CHECK1) {
2200 if (*p == *s) {
2201 STACK_PUSH_ALT(p + 1, s, sprev, pkeep);
2203 n = enclen(encode, s, end);
2204 if (n > 1) {
2205 DATA_ENSURE(n);
2206 sprev = s;
2207 s += n;
2209 else {
2210 sprev = s;
2211 s++;
2214 p++;
2215 MOP_OUT;
2216 NEXT;
2218 #ifdef USE_COMBINATION_EXPLOSION_CHECK
2219 CASE(OP_STATE_CHECK_ANYCHAR_STAR) MOP_IN(OP_STATE_CHECK_ANYCHAR_STAR);
2220 GET_STATE_CHECK_NUM_INC(mem, p);
2221 while (DATA_ENSURE_CHECK1) {
2222 STATE_CHECK_VAL(scv, mem);
2223 if (scv) goto fail;
2225 STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem, pkeep);
2226 n = enclen(encode, s, end);
2227 DATA_ENSURE(n);
2228 if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
2229 sprev = s;
2230 s += n;
2232 MOP_OUT;
2233 NEXT;
2235 CASE(OP_STATE_CHECK_ANYCHAR_ML_STAR)
2236 MOP_IN(OP_STATE_CHECK_ANYCHAR_ML_STAR);
2238 GET_STATE_CHECK_NUM_INC(mem, p);
2239 while (DATA_ENSURE_CHECK1) {
2240 STATE_CHECK_VAL(scv, mem);
2241 if (scv) goto fail;
2243 STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem, pkeep);
2244 n = enclen(encode, s, end);
2245 if (n > 1) {
2246 DATA_ENSURE(n);
2247 sprev = s;
2248 s += n;
2250 else {
2251 sprev = s;
2252 s++;
2255 MOP_OUT;
2256 NEXT;
2257 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
2259 CASE(OP_WORD) MOP_IN(OP_WORD);
2260 DATA_ENSURE(1);
2261 if (! ONIGENC_IS_MBC_WORD(encode, s, end))
2262 goto fail;
2264 s += enclen(encode, s, end);
2265 MOP_OUT;
2266 NEXT;
2268 CASE(OP_ASCII_WORD) MOP_IN(OP_ASCII_WORD);
2269 DATA_ENSURE(1);
2270 if (! ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
2271 goto fail;
2273 s += enclen(encode, s, end);
2274 MOP_OUT;
2275 NEXT;
2277 CASE(OP_NOT_WORD) MOP_IN(OP_NOT_WORD);
2278 DATA_ENSURE(1);
2279 if (ONIGENC_IS_MBC_WORD(encode, s, end))
2280 goto fail;
2282 s += enclen(encode, s, end);
2283 MOP_OUT;
2284 NEXT;
2286 CASE(OP_NOT_ASCII_WORD) MOP_IN(OP_NOT_ASCII_WORD);
2287 DATA_ENSURE(1);
2288 if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
2289 goto fail;
2291 s += enclen(encode, s, end);
2292 MOP_OUT;
2293 NEXT;
2295 CASE(OP_WORD_BOUND) MOP_IN(OP_WORD_BOUND);
2296 if (ON_STR_BEGIN(s)) {
2297 DATA_ENSURE(1);
2298 if (! ONIGENC_IS_MBC_WORD(encode, s, end))
2299 goto fail;
2301 else if (ON_STR_END(s)) {
2302 if (! ONIGENC_IS_MBC_WORD(encode, sprev, end))
2303 goto fail;
2305 else {
2306 if (ONIGENC_IS_MBC_WORD(encode, s, end)
2307 == ONIGENC_IS_MBC_WORD(encode, sprev, end))
2308 goto fail;
2310 MOP_OUT;
2311 JUMP;
2313 CASE(OP_ASCII_WORD_BOUND) MOP_IN(OP_ASCII_WORD_BOUND);
2314 if (ON_STR_BEGIN(s)) {
2315 DATA_ENSURE(1);
2316 if (! ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
2317 goto fail;
2319 else if (ON_STR_END(s)) {
2320 if (! ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
2321 goto fail;
2323 else {
2324 if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)
2325 == ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
2326 goto fail;
2328 MOP_OUT;
2329 JUMP;
2331 CASE(OP_NOT_WORD_BOUND) MOP_IN(OP_NOT_WORD_BOUND);
2332 if (ON_STR_BEGIN(s)) {
2333 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end))
2334 goto fail;
2336 else if (ON_STR_END(s)) {
2337 if (ONIGENC_IS_MBC_WORD(encode, sprev, end))
2338 goto fail;
2340 else {
2341 if (ONIGENC_IS_MBC_WORD(encode, s, end)
2342 != ONIGENC_IS_MBC_WORD(encode, sprev, end))
2343 goto fail;
2345 MOP_OUT;
2346 JUMP;
2348 CASE(OP_NOT_ASCII_WORD_BOUND) MOP_IN(OP_NOT_ASCII_WORD_BOUND);
2349 if (ON_STR_BEGIN(s)) {
2350 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
2351 goto fail;
2353 else if (ON_STR_END(s)) {
2354 if (ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
2355 goto fail;
2357 else {
2358 if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)
2359 != ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
2360 goto fail;
2362 MOP_OUT;
2363 JUMP;
2365 #ifdef USE_WORD_BEGIN_END
2366 CASE(OP_WORD_BEGIN) MOP_IN(OP_WORD_BEGIN);
2367 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end)) {
2368 if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
2369 MOP_OUT;
2370 JUMP;
2373 goto fail;
2374 NEXT;
2376 CASE(OP_ASCII_WORD_BEGIN) MOP_IN(OP_ASCII_WORD_BEGIN);
2377 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) {
2378 if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) {
2379 MOP_OUT;
2380 JUMP;
2383 goto fail;
2384 NEXT;
2386 CASE(OP_WORD_END) MOP_IN(OP_WORD_END);
2387 if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
2388 if (ON_STR_END(s) || !ONIGENC_IS_MBC_WORD(encode, s, end)) {
2389 MOP_OUT;
2390 JUMP;
2393 goto fail;
2394 NEXT;
2396 CASE(OP_ASCII_WORD_END) MOP_IN(OP_ASCII_WORD_END);
2397 if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) {
2398 if (ON_STR_END(s) || !ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) {
2399 MOP_OUT;
2400 JUMP;
2403 goto fail;
2404 NEXT;
2405 #endif
2407 CASE(OP_BEGIN_BUF) MOP_IN(OP_BEGIN_BUF);
2408 if (! ON_STR_BEGIN(s)) goto fail;
2409 if (IS_NOTBOS(msa->options)) goto fail;
2411 MOP_OUT;
2412 JUMP;
2414 CASE(OP_END_BUF) MOP_IN(OP_END_BUF);
2415 if (! ON_STR_END(s)) goto fail;
2416 if (IS_NOTEOS(msa->options)) goto fail;
2418 MOP_OUT;
2419 JUMP;
2421 CASE(OP_BEGIN_LINE) MOP_IN(OP_BEGIN_LINE);
2422 if (ON_STR_BEGIN(s)) {
2423 if (IS_NOTBOL(msa->options)) goto fail;
2424 MOP_OUT;
2425 JUMP;
2427 else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)
2428 #ifdef USE_CRNL_AS_LINE_TERMINATOR
2429 && !(IS_NEWLINE_CRLF(option)
2430 && ONIGENC_IS_MBC_CRNL(encode, sprev, end))
2431 #endif
2432 && !ON_STR_END(s)) {
2433 MOP_OUT;
2434 JUMP;
2436 goto fail;
2437 NEXT;
2439 CASE(OP_END_LINE) MOP_IN(OP_END_LINE);
2440 if (ON_STR_END(s)) {
2441 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2442 if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE_EX(encode, sprev, str, end, option, 1)) {
2443 #endif
2444 if (IS_NOTEOL(msa->options)) goto fail;
2445 MOP_OUT;
2446 JUMP;
2447 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2449 #endif
2451 else if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 1)) {
2452 MOP_OUT;
2453 JUMP;
2455 goto fail;
2456 NEXT;
2458 CASE(OP_SEMI_END_BUF) MOP_IN(OP_SEMI_END_BUF);
2459 if (ON_STR_END(s)) {
2460 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2461 if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE_EX(encode, sprev, str, end, option, 1)) {
2462 #endif
2463 if (IS_NOTEOL(msa->options)) goto fail;
2464 MOP_OUT;
2465 JUMP;
2466 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2468 #endif
2470 else if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 1)) {
2471 UChar* ss = s + enclen(encode, s, end);
2472 if (ON_STR_END(ss)) {
2473 MOP_OUT;
2474 JUMP;
2476 #ifdef USE_CRNL_AS_LINE_TERMINATOR
2477 else if (IS_NEWLINE_CRLF(option)
2478 && ONIGENC_IS_MBC_CRNL(encode, s, end)) {
2479 ss += enclen(encode, ss, end);
2480 if (ON_STR_END(ss)) {
2481 MOP_OUT;
2482 JUMP;
2485 #endif
2487 goto fail;
2488 NEXT;
2490 CASE(OP_BEGIN_POSITION) MOP_IN(OP_BEGIN_POSITION);
2491 if (s != msa->gpos)
2492 goto fail;
2494 MOP_OUT;
2495 JUMP;
2497 CASE(OP_MEMORY_START_PUSH) MOP_IN(OP_MEMORY_START_PUSH);
2498 GET_MEMNUM_INC(mem, p);
2499 STACK_PUSH_MEM_START(mem, s);
2500 MOP_OUT;
2501 JUMP;
2503 CASE(OP_MEMORY_START) MOP_IN(OP_MEMORY_START);
2504 GET_MEMNUM_INC(mem, p);
2505 mem_start_stk[mem] = (OnigStackIndex )((void* )s);
2506 mem_end_stk[mem] = INVALID_STACK_INDEX;
2507 MOP_OUT;
2508 JUMP;
2510 CASE(OP_MEMORY_END_PUSH) MOP_IN(OP_MEMORY_END_PUSH);
2511 GET_MEMNUM_INC(mem, p);
2512 STACK_PUSH_MEM_END(mem, s);
2513 MOP_OUT;
2514 JUMP;
2516 CASE(OP_MEMORY_END) MOP_IN(OP_MEMORY_END);
2517 GET_MEMNUM_INC(mem, p);
2518 mem_end_stk[mem] = (OnigStackIndex )((void* )s);
2519 MOP_OUT;
2520 JUMP;
2522 CASE(OP_KEEP) MOP_IN(OP_KEEP);
2523 pkeep = s;
2524 MOP_OUT;
2525 JUMP;
2527 #ifdef USE_SUBEXP_CALL
2528 CASE(OP_MEMORY_END_PUSH_REC) MOP_IN(OP_MEMORY_END_PUSH_REC);
2529 GET_MEMNUM_INC(mem, p);
2530 STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */
2531 STACK_PUSH_MEM_END(mem, s);
2532 mem_start_stk[mem] = GET_STACK_INDEX(stkp);
2533 MOP_OUT;
2534 JUMP;
2536 CASE(OP_MEMORY_END_REC) MOP_IN(OP_MEMORY_END_REC);
2537 GET_MEMNUM_INC(mem, p);
2538 mem_end_stk[mem] = (OnigStackIndex )((void* )s);
2539 STACK_GET_MEM_START(mem, stkp);
2541 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2542 mem_start_stk[mem] = GET_STACK_INDEX(stkp);
2543 else
2544 mem_start_stk[mem] = (OnigStackIndex )((void* )stkp->u.mem.pstr);
2546 STACK_PUSH_MEM_END_MARK(mem);
2547 MOP_OUT;
2548 JUMP;
2549 #endif
2551 CASE(OP_BACKREF1) MOP_IN(OP_BACKREF1);
2552 mem = 1;
2553 goto backref;
2554 NEXT;
2556 CASE(OP_BACKREF2) MOP_IN(OP_BACKREF2);
2557 mem = 2;
2558 goto backref;
2559 NEXT;
2561 CASE(OP_BACKREFN) MOP_IN(OP_BACKREFN);
2562 GET_MEMNUM_INC(mem, p);
2563 backref:
2565 int len;
2566 UChar *pstart, *pend;
2568 /* if you want to remove following line,
2569 you should check in parse and compile time. */
2570 if (mem > num_mem) goto fail;
2571 if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
2572 if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
2574 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2575 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
2576 else
2577 pstart = (UChar* )((void* )mem_start_stk[mem]);
2579 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
2580 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
2581 : (UChar* )((void* )mem_end_stk[mem]));
2582 n = pend - pstart;
2583 DATA_ENSURE(n);
2584 sprev = s;
2585 STRING_CMP(pstart, s, n);
2586 while (sprev + (len = enclen(encode, sprev, end)) < s)
2587 sprev += len;
2589 MOP_OUT;
2590 JUMP;
2593 CASE(OP_BACKREFN_IC) MOP_IN(OP_BACKREFN_IC);
2594 GET_MEMNUM_INC(mem, p);
2596 int len;
2597 UChar *pstart, *pend;
2599 /* if you want to remove following line,
2600 you should check in parse and compile time. */
2601 if (mem > num_mem) goto fail;
2602 if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
2603 if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
2605 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2606 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
2607 else
2608 pstart = (UChar* )((void* )mem_start_stk[mem]);
2610 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
2611 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
2612 : (UChar* )((void* )mem_end_stk[mem]));
2613 n = pend - pstart;
2614 DATA_ENSURE(n);
2615 sprev = s;
2616 STRING_CMP_IC(case_fold_flag, pstart, &s, (int)n, end);
2617 while (sprev + (len = enclen(encode, sprev, end)) < s)
2618 sprev += len;
2620 MOP_OUT;
2621 JUMP;
2623 NEXT;
2625 CASE(OP_BACKREF_MULTI) MOP_IN(OP_BACKREF_MULTI);
2627 int len, is_fail;
2628 UChar *pstart, *pend, *swork;
2630 GET_LENGTH_INC(tlen, p);
2631 for (i = 0; i < tlen; i++) {
2632 GET_MEMNUM_INC(mem, p);
2634 if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
2635 if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
2637 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2638 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
2639 else
2640 pstart = (UChar* )((void* )mem_start_stk[mem]);
2642 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
2643 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
2644 : (UChar* )((void* )mem_end_stk[mem]));
2645 n = pend - pstart;
2646 DATA_ENSURE(n);
2647 sprev = s;
2648 swork = s;
2649 STRING_CMP_VALUE(pstart, swork, n, is_fail);
2650 if (is_fail) continue;
2651 s = swork;
2652 while (sprev + (len = enclen(encode, sprev, end)) < s)
2653 sprev += len;
2655 p += (SIZE_MEMNUM * (tlen - i - 1));
2656 break; /* success */
2658 if (i == tlen) goto fail;
2659 MOP_OUT;
2660 JUMP;
2662 NEXT;
2664 CASE(OP_BACKREF_MULTI_IC) MOP_IN(OP_BACKREF_MULTI_IC);
2666 int len, is_fail;
2667 UChar *pstart, *pend, *swork;
2669 GET_LENGTH_INC(tlen, p);
2670 for (i = 0; i < tlen; i++) {
2671 GET_MEMNUM_INC(mem, p);
2673 if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
2674 if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
2676 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2677 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
2678 else
2679 pstart = (UChar* )((void* )mem_start_stk[mem]);
2681 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
2682 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
2683 : (UChar* )((void* )mem_end_stk[mem]));
2684 n = pend - pstart;
2685 DATA_ENSURE(n);
2686 sprev = s;
2687 swork = s;
2688 STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, end, is_fail);
2689 if (is_fail) continue;
2690 s = swork;
2691 while (sprev + (len = enclen(encode, sprev, end)) < s)
2692 sprev += len;
2694 p += (SIZE_MEMNUM * (tlen - i - 1));
2695 break; /* success */
2697 if (i == tlen) goto fail;
2698 MOP_OUT;
2699 JUMP;
2702 #ifdef USE_BACKREF_WITH_LEVEL
2703 CASE(OP_BACKREF_WITH_LEVEL)
2705 int len;
2706 OnigOptionType ic;
2707 LengthType level;
2709 GET_OPTION_INC(ic, p);
2710 GET_LENGTH_INC(level, p);
2711 GET_LENGTH_INC(tlen, p);
2713 sprev = s;
2714 if (backref_match_at_nested_level(reg, stk, stk_base, ic,
2715 case_fold_flag, (int )level, (int )tlen, p, &s, end)) {
2716 while (sprev + (len = enclen(encode, sprev, end)) < s)
2717 sprev += len;
2719 p += (SIZE_MEMNUM * tlen);
2721 else
2722 goto fail;
2724 MOP_OUT;
2725 JUMP;
2728 #endif
2730 #if 0 /* no need: IS_DYNAMIC_OPTION() == 0 */
2731 CASE(OP_SET_OPTION_PUSH) MOP_IN(OP_SET_OPTION_PUSH);
2732 GET_OPTION_INC(option, p);
2733 STACK_PUSH_ALT(p, s, sprev, pkeep);
2734 p += SIZE_OP_SET_OPTION + SIZE_OP_FAIL;
2735 MOP_OUT;
2736 JUMP;
2738 CASE(OP_SET_OPTION) MOP_IN(OP_SET_OPTION);
2739 GET_OPTION_INC(option, p);
2740 MOP_OUT;
2741 JUMP;
2742 #endif
2744 CASE(OP_NULL_CHECK_START) MOP_IN(OP_NULL_CHECK_START);
2745 GET_MEMNUM_INC(mem, p); /* mem: null check id */
2746 STACK_PUSH_NULL_CHECK_START(mem, s);
2747 MOP_OUT;
2748 JUMP;
2750 CASE(OP_NULL_CHECK_END) MOP_IN(OP_NULL_CHECK_END);
2752 int isnull;
2754 GET_MEMNUM_INC(mem, p); /* mem: null check id */
2755 STACK_NULL_CHECK(isnull, mem, s);
2756 if (isnull) {
2757 #ifdef ONIG_DEBUG_MATCH
2758 fprintf(stderr, "NULL_CHECK_END: skip id:%d, s:%"PRIuPTR" (%p)\n",
2759 (int )mem, (uintptr_t )s, s);
2760 #endif
2761 null_check_found:
2762 /* empty loop founded, skip next instruction */
2763 switch (*p++) {
2764 case OP_JUMP:
2765 case OP_PUSH:
2766 p += SIZE_RELADDR;
2767 break;
2768 case OP_REPEAT_INC:
2769 case OP_REPEAT_INC_NG:
2770 case OP_REPEAT_INC_SG:
2771 case OP_REPEAT_INC_NG_SG:
2772 p += SIZE_MEMNUM;
2773 break;
2774 default:
2775 goto unexpected_bytecode_error;
2776 break;
2780 MOP_OUT;
2781 JUMP;
2783 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
2784 CASE(OP_NULL_CHECK_END_MEMST) MOP_IN(OP_NULL_CHECK_END_MEMST);
2786 int isnull;
2788 GET_MEMNUM_INC(mem, p); /* mem: null check id */
2789 STACK_NULL_CHECK_MEMST(isnull, mem, s, reg);
2790 if (isnull) {
2791 # ifdef ONIG_DEBUG_MATCH
2792 fprintf(stderr, "NULL_CHECK_END_MEMST: skip id:%d, s:%"PRIuPTR" (%p)\n",
2793 (int )mem, (uintptr_t )s, s);
2794 # endif
2795 if (isnull == -1) goto fail;
2796 goto null_check_found;
2799 MOP_OUT;
2800 JUMP;
2801 #endif
2803 #ifdef USE_SUBEXP_CALL
2804 CASE(OP_NULL_CHECK_END_MEMST_PUSH)
2805 MOP_IN(OP_NULL_CHECK_END_MEMST_PUSH);
2807 int isnull;
2809 GET_MEMNUM_INC(mem, p); /* mem: null check id */
2810 # ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
2811 STACK_NULL_CHECK_MEMST_REC(isnull, mem, s, reg);
2812 # else
2813 STACK_NULL_CHECK_REC(isnull, mem, s);
2814 # endif
2815 if (isnull) {
2816 # ifdef ONIG_DEBUG_MATCH
2817 fprintf(stderr, "NULL_CHECK_END_MEMST_PUSH: skip id:%d, s:%"PRIuPTR" (%p)\n",
2818 (int )mem, (uintptr_t )s, s);
2819 # endif
2820 if (isnull == -1) goto fail;
2821 goto null_check_found;
2823 else {
2824 STACK_PUSH_NULL_CHECK_END(mem);
2827 MOP_OUT;
2828 JUMP;
2829 #endif
2831 CASE(OP_JUMP) MOP_IN(OP_JUMP);
2832 GET_RELADDR_INC(addr, p);
2833 p += addr;
2834 MOP_OUT;
2835 CHECK_INTERRUPT_IN_MATCH_AT;
2836 JUMP;
2838 CASE(OP_PUSH) MOP_IN(OP_PUSH);
2839 GET_RELADDR_INC(addr, p);
2840 STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
2841 MOP_OUT;
2842 JUMP;
2844 #ifdef USE_COMBINATION_EXPLOSION_CHECK
2845 CASE(OP_STATE_CHECK_PUSH) MOP_IN(OP_STATE_CHECK_PUSH);
2846 GET_STATE_CHECK_NUM_INC(mem, p);
2847 STATE_CHECK_VAL(scv, mem);
2848 if (scv) goto fail;
2850 GET_RELADDR_INC(addr, p);
2851 STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem, pkeep);
2852 MOP_OUT;
2853 JUMP;
2855 CASE(OP_STATE_CHECK_PUSH_OR_JUMP) MOP_IN(OP_STATE_CHECK_PUSH_OR_JUMP);
2856 GET_STATE_CHECK_NUM_INC(mem, p);
2857 GET_RELADDR_INC(addr, p);
2858 STATE_CHECK_VAL(scv, mem);
2859 if (scv) {
2860 p += addr;
2862 else {
2863 STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem, pkeep);
2865 MOP_OUT;
2866 JUMP;
2868 CASE(OP_STATE_CHECK) MOP_IN(OP_STATE_CHECK);
2869 GET_STATE_CHECK_NUM_INC(mem, p);
2870 STATE_CHECK_VAL(scv, mem);
2871 if (scv) goto fail;
2873 STACK_PUSH_STATE_CHECK(s, mem);
2874 MOP_OUT;
2875 JUMP;
2876 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
2878 CASE(OP_POP) MOP_IN(OP_POP);
2879 STACK_POP_ONE;
2880 MOP_OUT;
2881 JUMP;
2883 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
2884 CASE(OP_PUSH_OR_JUMP_EXACT1) MOP_IN(OP_PUSH_OR_JUMP_EXACT1);
2885 GET_RELADDR_INC(addr, p);
2886 if (*p == *s && DATA_ENSURE_CHECK1) {
2887 p++;
2888 STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
2889 MOP_OUT;
2890 JUMP;
2892 p += (addr + 1);
2893 MOP_OUT;
2894 JUMP;
2895 #endif
2897 CASE(OP_PUSH_IF_PEEK_NEXT) MOP_IN(OP_PUSH_IF_PEEK_NEXT);
2898 GET_RELADDR_INC(addr, p);
2899 if (*p == *s) {
2900 p++;
2901 STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
2902 MOP_OUT;
2903 JUMP;
2905 p++;
2906 MOP_OUT;
2907 JUMP;
2909 CASE(OP_REPEAT) MOP_IN(OP_REPEAT);
2911 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2912 GET_RELADDR_INC(addr, p);
2914 STACK_ENSURE(1);
2915 repeat_stk[mem] = GET_STACK_INDEX(stk);
2916 STACK_PUSH_REPEAT(mem, p);
2918 if (reg->repeat_range[mem].lower == 0) {
2919 STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
2922 MOP_OUT;
2923 JUMP;
2925 CASE(OP_REPEAT_NG) MOP_IN(OP_REPEAT_NG);
2927 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2928 GET_RELADDR_INC(addr, p);
2930 STACK_ENSURE(1);
2931 repeat_stk[mem] = GET_STACK_INDEX(stk);
2932 STACK_PUSH_REPEAT(mem, p);
2934 if (reg->repeat_range[mem].lower == 0) {
2935 STACK_PUSH_ALT(p, s, sprev, pkeep);
2936 p += addr;
2939 MOP_OUT;
2940 JUMP;
2942 CASE(OP_REPEAT_INC) MOP_IN(OP_REPEAT_INC);
2943 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2944 si = repeat_stk[mem];
2945 stkp = STACK_AT(si);
2947 repeat_inc:
2948 stkp->u.repeat.count++;
2949 if (stkp->u.repeat.count >= reg->repeat_range[mem].upper) {
2950 /* end of repeat. Nothing to do. */
2952 else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
2953 STACK_PUSH_ALT(p, s, sprev, pkeep);
2954 p = STACK_AT(si)->u.repeat.pcode; /* Don't use stkp after PUSH. */
2956 else {
2957 p = stkp->u.repeat.pcode;
2959 STACK_PUSH_REPEAT_INC(si);
2960 MOP_OUT;
2961 CHECK_INTERRUPT_IN_MATCH_AT;
2962 JUMP;
2964 CASE(OP_REPEAT_INC_SG) MOP_IN(OP_REPEAT_INC_SG);
2965 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2966 STACK_GET_REPEAT(mem, stkp);
2967 si = GET_STACK_INDEX(stkp);
2968 goto repeat_inc;
2969 NEXT;
2971 CASE(OP_REPEAT_INC_NG) MOP_IN(OP_REPEAT_INC_NG);
2972 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2973 si = repeat_stk[mem];
2974 stkp = STACK_AT(si);
2976 repeat_inc_ng:
2977 stkp->u.repeat.count++;
2978 if (stkp->u.repeat.count < reg->repeat_range[mem].upper) {
2979 if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
2980 UChar* pcode = stkp->u.repeat.pcode;
2982 STACK_PUSH_REPEAT_INC(si);
2983 STACK_PUSH_ALT(pcode, s, sprev, pkeep);
2985 else {
2986 p = stkp->u.repeat.pcode;
2987 STACK_PUSH_REPEAT_INC(si);
2990 else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) {
2991 STACK_PUSH_REPEAT_INC(si);
2993 MOP_OUT;
2994 CHECK_INTERRUPT_IN_MATCH_AT;
2995 JUMP;
2997 CASE(OP_REPEAT_INC_NG_SG) MOP_IN(OP_REPEAT_INC_NG_SG);
2998 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2999 STACK_GET_REPEAT(mem, stkp);
3000 si = GET_STACK_INDEX(stkp);
3001 goto repeat_inc_ng;
3002 NEXT;
3004 CASE(OP_PUSH_POS) MOP_IN(OP_PUSH_POS);
3005 STACK_PUSH_POS(s, sprev, pkeep);
3006 MOP_OUT;
3007 JUMP;
3009 CASE(OP_POP_POS) MOP_IN(OP_POP_POS);
3011 STACK_POS_END(stkp);
3012 s = stkp->u.state.pstr;
3013 sprev = stkp->u.state.pstr_prev;
3015 MOP_OUT;
3016 JUMP;
3018 CASE(OP_PUSH_POS_NOT) MOP_IN(OP_PUSH_POS_NOT);
3019 GET_RELADDR_INC(addr, p);
3020 STACK_PUSH_POS_NOT(p + addr, s, sprev, pkeep);
3021 MOP_OUT;
3022 JUMP;
3024 CASE(OP_FAIL_POS) MOP_IN(OP_FAIL_POS);
3025 STACK_POP_TIL_POS_NOT;
3026 goto fail;
3027 NEXT;
3029 CASE(OP_PUSH_STOP_BT) MOP_IN(OP_PUSH_STOP_BT);
3030 STACK_PUSH_STOP_BT;
3031 MOP_OUT;
3032 JUMP;
3034 CASE(OP_POP_STOP_BT) MOP_IN(OP_POP_STOP_BT);
3035 STACK_STOP_BT_END;
3036 MOP_OUT;
3037 JUMP;
3039 CASE(OP_LOOK_BEHIND) MOP_IN(OP_LOOK_BEHIND);
3040 GET_LENGTH_INC(tlen, p);
3041 s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, end, (int )tlen);
3042 if (IS_NULL(s)) goto fail;
3043 sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s, end);
3044 MOP_OUT;
3045 JUMP;
3047 CASE(OP_PUSH_LOOK_BEHIND_NOT) MOP_IN(OP_PUSH_LOOK_BEHIND_NOT);
3048 GET_RELADDR_INC(addr, p);
3049 GET_LENGTH_INC(tlen, p);
3050 q = (UChar* )ONIGENC_STEP_BACK(encode, str, s, end, (int )tlen);
3051 if (IS_NULL(q)) {
3052 /* too short case -> success. ex. /(?<!XXX)a/.match("a")
3053 If you want to change to fail, replace following line. */
3054 p += addr;
3055 /* goto fail; */
3057 else {
3058 STACK_PUSH_LOOK_BEHIND_NOT(p + addr, s, sprev, pkeep);
3059 s = q;
3060 sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s, end);
3062 MOP_OUT;
3063 JUMP;
3065 CASE(OP_FAIL_LOOK_BEHIND_NOT) MOP_IN(OP_FAIL_LOOK_BEHIND_NOT);
3066 STACK_POP_TIL_LOOK_BEHIND_NOT;
3067 goto fail;
3068 NEXT;
3070 CASE(OP_PUSH_ABSENT_POS) MOP_IN(OP_PUSH_ABSENT_POS);
3071 /* Save the absent-start-pos and the original end-pos. */
3072 STACK_PUSH_ABSENT_POS(s, ABSENT_END_POS);
3073 MOP_OUT;
3074 JUMP;
3076 CASE(OP_ABSENT) MOP_IN(OP_ABSENT);
3078 const UChar* aend = ABSENT_END_POS;
3079 UChar* absent;
3080 UChar* selfp = p - 1;
3082 STACK_POP_ABSENT_POS(absent, ABSENT_END_POS); /* Restore end-pos. */
3083 GET_RELADDR_INC(addr, p);
3084 #ifdef ONIG_DEBUG_MATCH
3085 fprintf(stderr, "ABSENT: s:%p, end:%p, absent:%p, aend:%p\n", s, end, absent, aend);
3086 #endif
3087 if ((absent > aend) && (s > absent)) {
3088 /* An empty match occurred in (?~...) at the start point.
3089 * Never match. */
3090 STACK_POP;
3091 goto fail;
3093 else if ((s >= aend) && (s > absent)) {
3094 if (s > aend) {
3095 /* Only one (or less) character matched in the last iteration.
3096 * This is not a possible point. */
3097 goto fail;
3099 /* All possible points were found. Try matching after (?~...). */
3100 DATA_ENSURE(0);
3101 p += addr;
3103 else {
3104 STACK_PUSH_ALT(p + addr, s, sprev, pkeep); /* Push possible point. */
3105 n = enclen(encode, s, end);
3106 STACK_PUSH_ABSENT_POS(absent, ABSENT_END_POS); /* Save the original pos. */
3107 STACK_PUSH_ALT(selfp, s + n, s, pkeep); /* Next iteration. */
3108 STACK_PUSH_ABSENT;
3109 ABSENT_END_POS = aend;
3112 MOP_OUT;
3113 JUMP;
3115 CASE(OP_ABSENT_END) MOP_IN(OP_ABSENT_END);
3116 /* The pattern inside (?~...) was matched.
3117 * Set the end-pos temporary and go to next iteration. */
3118 if (sprev < ABSENT_END_POS)
3119 ABSENT_END_POS = sprev;
3120 #ifdef ONIG_DEBUG_MATCH
3121 fprintf(stderr, "ABSENT_END: end:%p\n", ABSENT_END_POS);
3122 #endif
3123 STACK_POP_TIL_ABSENT;
3124 goto fail;
3125 NEXT;
3127 #ifdef USE_SUBEXP_CALL
3128 CASE(OP_CALL) MOP_IN(OP_CALL);
3129 GET_ABSADDR_INC(addr, p);
3130 STACK_PUSH_CALL_FRAME(p);
3131 p = reg->p + addr;
3132 MOP_OUT;
3133 JUMP;
3135 CASE(OP_RETURN) MOP_IN(OP_RETURN);
3136 STACK_RETURN(p);
3137 STACK_PUSH_RETURN;
3138 MOP_OUT;
3139 JUMP;
3140 #endif
3142 CASE(OP_CONDITION) MOP_IN(OP_CONDITION);
3143 GET_MEMNUM_INC(mem, p);
3144 GET_RELADDR_INC(addr, p);
3145 if ((mem > num_mem) ||
3146 (mem_end_stk[mem] == INVALID_STACK_INDEX) ||
3147 (mem_start_stk[mem] == INVALID_STACK_INDEX)) {
3148 p += addr;
3150 MOP_OUT;
3151 JUMP;
3153 CASE(OP_FINISH)
3154 goto finish;
3155 NEXT;
3157 CASE(OP_FAIL)
3158 if (0) {
3159 /* fall */
3160 fail:
3161 MOP_OUT;
3163 MOP_IN(OP_FAIL);
3164 STACK_POP;
3165 p = stk->u.state.pcode;
3166 s = stk->u.state.pstr;
3167 sprev = stk->u.state.pstr_prev;
3168 pkeep = stk->u.state.pkeep;
3170 #ifdef USE_COMBINATION_EXPLOSION_CHECK
3171 if (stk->u.state.state_check != 0) {
3172 stk->type = STK_STATE_CHECK_MARK;
3173 stk++;
3175 #endif
3177 MOP_OUT;
3178 JUMP;
3180 DEFAULT
3181 goto bytecode_error;
3182 } VM_LOOP_END
3184 finish:
3185 STACK_SAVE;
3186 if (xmalloc_base) xfree(xmalloc_base);
3187 return best_len;
3189 #ifdef ONIG_DEBUG
3190 stack_error:
3191 STACK_SAVE;
3192 if (xmalloc_base) xfree(xmalloc_base);
3193 return ONIGERR_STACK_BUG;
3194 #endif
3196 bytecode_error:
3197 STACK_SAVE;
3198 if (xmalloc_base) xfree(xmalloc_base);
3199 return ONIGERR_UNDEFINED_BYTECODE;
3201 unexpected_bytecode_error:
3202 STACK_SAVE;
3203 if (xmalloc_base) xfree(xmalloc_base);
3204 return ONIGERR_UNEXPECTED_BYTECODE;
3208 static UChar*
3209 slow_search(OnigEncoding enc, UChar* target, UChar* target_end,
3210 const UChar* text, const UChar* text_end, UChar* text_range)
3212 UChar *t, *p, *s, *end;
3214 end = (UChar* )text_end;
3215 end -= target_end - target - 1;
3216 if (end > text_range)
3217 end = text_range;
3219 s = (UChar* )text;
3221 if (enc->max_enc_len == enc->min_enc_len) {
3222 int n = enc->max_enc_len;
3224 while (s < end) {
3225 if (*s == *target) {
3226 p = s + 1;
3227 t = target + 1;
3228 if (target_end == t || memcmp(t, p, target_end - t) == 0)
3229 return s;
3231 s += n;
3233 return (UChar* )NULL;
3235 while (s < end) {
3236 if (*s == *target) {
3237 p = s + 1;
3238 t = target + 1;
3239 if (target_end == t || memcmp(t, p, target_end - t) == 0)
3240 return s;
3242 s += enclen(enc, s, text_end);
3245 return (UChar* )NULL;
3248 static int
3249 str_lower_case_match(OnigEncoding enc, int case_fold_flag,
3250 const UChar* t, const UChar* tend,
3251 const UChar* p, const UChar* end)
3253 int lowlen;
3254 UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
3256 while (t < tend) {
3257 lowlen = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &p, end, lowbuf);
3258 q = lowbuf;
3259 while (lowlen > 0) {
3260 if (*t++ != *q++) return 0;
3261 lowlen--;
3265 return 1;
3268 static UChar*
3269 slow_search_ic(OnigEncoding enc, int case_fold_flag,
3270 UChar* target, UChar* target_end,
3271 const UChar* text, const UChar* text_end, UChar* text_range)
3273 UChar *s, *end;
3275 end = (UChar* )text_end;
3276 end -= target_end - target - 1;
3277 if (end > text_range)
3278 end = text_range;
3280 s = (UChar* )text;
3282 while (s < end) {
3283 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
3284 s, text_end))
3285 return s;
3287 s += enclen(enc, s, text_end);
3290 return (UChar* )NULL;
3293 static UChar*
3294 slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end,
3295 const UChar* text, const UChar* adjust_text,
3296 const UChar* text_end, const UChar* text_start)
3298 UChar *t, *p, *s;
3300 s = (UChar* )text_end;
3301 s -= (target_end - target);
3302 if (s > text_start)
3303 s = (UChar* )text_start;
3304 else
3305 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s, text_end);
3307 while (s >= text) {
3308 if (*s == *target) {
3309 p = s + 1;
3310 t = target + 1;
3311 while (t < target_end) {
3312 if (*t != *p++)
3313 break;
3314 t++;
3316 if (t == target_end)
3317 return s;
3319 s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s, text_end);
3322 return (UChar* )NULL;
3325 static UChar*
3326 slow_search_backward_ic(OnigEncoding enc, int case_fold_flag,
3327 UChar* target, UChar* target_end,
3328 const UChar* text, const UChar* adjust_text,
3329 const UChar* text_end, const UChar* text_start)
3331 UChar *s;
3333 s = (UChar* )text_end;
3334 s -= (target_end - target);
3335 if (s > text_start)
3336 s = (UChar* )text_start;
3337 else
3338 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s, text_end);
3340 while (s >= text) {
3341 if (str_lower_case_match(enc, case_fold_flag,
3342 target, target_end, s, text_end))
3343 return s;
3345 s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s, text_end);
3348 return (UChar* )NULL;
3351 #ifndef USE_SUNDAY_QUICK_SEARCH
3352 /* Boyer-Moore-Horspool search applied to a multibyte string */
3353 static UChar*
3354 bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
3355 const UChar* text, const UChar* text_end,
3356 const UChar* text_range)
3358 const UChar *s, *se, *t, *p, *end;
3359 const UChar *tail;
3360 ptrdiff_t skip, tlen1;
3362 # ifdef ONIG_DEBUG_SEARCH
3363 fprintf(stderr, "bm_search_notrev: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
3364 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
3365 # endif
3367 tail = target_end - 1;
3368 tlen1 = tail - target;
3369 end = text_range;
3370 if (end + tlen1 > text_end)
3371 end = text_end - tlen1;
3373 s = text;
3375 if (IS_NULL(reg->int_map)) {
3376 while (s < end) {
3377 p = se = s + tlen1;
3378 t = tail;
3379 while (*p == *t) {
3380 if (t == target) return (UChar* )s;
3381 p--; t--;
3383 skip = reg->map[*se];
3384 t = s;
3385 do {
3386 s += enclen(reg->enc, s, end);
3387 } while ((s - t) < skip && s < end);
3390 else {
3391 # if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
3392 while (s < end) {
3393 p = se = s + tlen1;
3394 t = tail;
3395 while (*p == *t) {
3396 if (t == target) return (UChar* )s;
3397 p--; t--;
3399 skip = reg->int_map[*se];
3400 t = s;
3401 do {
3402 s += enclen(reg->enc, s, end);
3403 } while ((s - t) < skip && s < end);
3405 # endif
3408 return (UChar* )NULL;
3411 /* Boyer-Moore-Horspool search */
3412 static UChar*
3413 bm_search(regex_t* reg, const UChar* target, const UChar* target_end,
3414 const UChar* text, const UChar* text_end, const UChar* text_range)
3416 const UChar *s, *t, *p, *end;
3417 const UChar *tail;
3419 # ifdef ONIG_DEBUG_SEARCH
3420 fprintf(stderr, "bm_search: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
3421 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
3422 # endif
3424 end = text_range + (target_end - target) - 1;
3425 if (end > text_end)
3426 end = text_end;
3428 tail = target_end - 1;
3429 s = text + (target_end - target) - 1;
3430 if (IS_NULL(reg->int_map)) {
3431 while (s < end) {
3432 p = s;
3433 t = tail;
3434 # ifdef ONIG_DEBUG_SEARCH
3435 fprintf(stderr, "bm_search_loop: pos: %"PRIdPTR" %s\n",
3436 (intptr_t )(s - text), s);
3437 # endif
3438 while (*p == *t) {
3439 if (t == target) return (UChar* )p;
3440 p--; t--;
3442 s += reg->map[*s];
3445 else { /* see int_map[] */
3446 # if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
3447 while (s < end) {
3448 p = s;
3449 t = tail;
3450 while (*p == *t) {
3451 if (t == target) return (UChar* )p;
3452 p--; t--;
3454 s += reg->int_map[*s];
3456 # endif
3458 return (UChar* )NULL;
3461 /* Boyer-Moore-Horspool search applied to a multibyte string (ignore case) */
3462 static UChar*
3463 bm_search_notrev_ic(regex_t* reg, const UChar* target, const UChar* target_end,
3464 const UChar* text, const UChar* text_end,
3465 const UChar* text_range)
3467 const UChar *s, *se, *t, *end;
3468 const UChar *tail;
3469 ptrdiff_t skip, tlen1;
3470 OnigEncoding enc = reg->enc;
3471 int case_fold_flag = reg->case_fold_flag;
3473 # ifdef ONIG_DEBUG_SEARCH
3474 fprintf(stderr, "bm_search_notrev_ic: text: %d (%p), text_end: %d (%p), text_range: %d (%p)\n",
3475 (int )text, text, (int )text_end, text_end, (int )text_range, text_range);
3476 # endif
3478 tail = target_end - 1;
3479 tlen1 = tail - target;
3480 end = text_range;
3481 if (end + tlen1 > text_end)
3482 end = text_end - tlen1;
3484 s = text;
3486 if (IS_NULL(reg->int_map)) {
3487 while (s < end) {
3488 se = s + tlen1;
3489 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
3490 s, se + 1))
3491 return (UChar* )s;
3492 skip = reg->map[*se];
3493 t = s;
3494 do {
3495 s += enclen(reg->enc, s, end);
3496 } while ((s - t) < skip && s < end);
3499 else {
3500 # if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
3501 while (s < end) {
3502 se = s + tlen1;
3503 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
3504 s, se + 1))
3505 return (UChar* )s;
3506 skip = reg->int_map[*se];
3507 t = s;
3508 do {
3509 s += enclen(reg->enc, s, end);
3510 } while ((s - t) < skip && s < end);
3512 # endif
3515 return (UChar* )NULL;
3518 /* Boyer-Moore-Horspool search (ignore case) */
3519 static UChar*
3520 bm_search_ic(regex_t* reg, const UChar* target, const UChar* target_end,
3521 const UChar* text, const UChar* text_end, const UChar* text_range)
3523 const UChar *s, *p, *end;
3524 const UChar *tail;
3525 OnigEncoding enc = reg->enc;
3526 int case_fold_flag = reg->case_fold_flag;
3528 # ifdef ONIG_DEBUG_SEARCH
3529 fprintf(stderr, "bm_search_ic: text: %d (%p), text_end: %d (%p), text_range: %d (%p)\n",
3530 (int )text, text, (int )text_end, text_end, (int )text_range, text_range);
3531 # endif
3533 end = text_range + (target_end - target) - 1;
3534 if (end > text_end)
3535 end = text_end;
3537 tail = target_end - 1;
3538 s = text + (target_end - target) - 1;
3539 if (IS_NULL(reg->int_map)) {
3540 while (s < end) {
3541 p = s - (target_end - target) + 1;
3542 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
3543 p, s + 1))
3544 return (UChar* )p;
3545 s += reg->map[*s];
3548 else { /* see int_map[] */
3549 # if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
3550 while (s < end) {
3551 p = s - (target_end - target) + 1;
3552 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
3553 p, s + 1))
3554 return (UChar* )p;
3555 s += reg->int_map[*s];
3557 # endif
3559 return (UChar* )NULL;
3562 #else /* USE_SUNDAY_QUICK_SEARCH */
3564 /* Sunday's quick search applied to a multibyte string */
3565 static UChar*
3566 bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
3567 const UChar* text, const UChar* text_end,
3568 const UChar* text_range)
3570 const UChar *s, *se, *t, *p, *end;
3571 const UChar *tail;
3572 ptrdiff_t skip, tlen1;
3573 OnigEncoding enc = reg->enc;
3575 # ifdef ONIG_DEBUG_SEARCH
3576 fprintf(stderr, "bm_search_notrev: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
3577 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
3578 # endif
3580 tail = target_end - 1;
3581 tlen1 = tail - target;
3582 end = text_range;
3583 if (end + tlen1 > text_end)
3584 end = text_end - tlen1;
3586 s = text;
3588 if (IS_NULL(reg->int_map)) {
3589 while (s < end) {
3590 p = se = s + tlen1;
3591 t = tail;
3592 while (*p == *t) {
3593 if (t == target) return (UChar* )s;
3594 p--; t--;
3596 if (s + 1 >= end) break;
3597 skip = reg->map[se[1]];
3598 t = s;
3599 do {
3600 s += enclen(enc, s, end);
3601 } while ((s - t) < skip && s < end);
3604 else {
3605 # if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
3606 while (s < end) {
3607 p = se = s + tlen1;
3608 t = tail;
3609 while (*p == *t) {
3610 if (t == target) return (UChar* )s;
3611 p--; t--;
3613 if (s + 1 >= end) break;
3614 skip = reg->int_map[se[1]];
3615 t = s;
3616 do {
3617 s += enclen(enc, s, end);
3618 } while ((s - t) < skip && s < end);
3620 # endif
3623 return (UChar* )NULL;
3626 /* Sunday's quick search */
3627 static UChar*
3628 bm_search(regex_t* reg, const UChar* target, const UChar* target_end,
3629 const UChar* text, const UChar* text_end, const UChar* text_range)
3631 const UChar *s, *t, *p, *end;
3632 const UChar *tail;
3633 ptrdiff_t tlen1;
3635 # ifdef ONIG_DEBUG_SEARCH
3636 fprintf(stderr, "bm_search: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
3637 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
3638 # endif
3640 tail = target_end - 1;
3641 tlen1 = tail - target;
3642 end = text_range + tlen1;
3643 if (end > text_end)
3644 end = text_end;
3646 s = text + tlen1;
3647 if (IS_NULL(reg->int_map)) {
3648 while (s < end) {
3649 p = s;
3650 t = tail;
3651 while (*p == *t) {
3652 if (t == target) return (UChar* )p;
3653 p--; t--;
3655 if (s + 1 >= end) break;
3656 s += reg->map[s[1]];
3659 else { /* see int_map[] */
3660 # if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
3661 while (s < end) {
3662 p = s;
3663 t = tail;
3664 while (*p == *t) {
3665 if (t == target) return (UChar* )p;
3666 p--; t--;
3668 if (s + 1 >= end) break;
3669 s += reg->int_map[s[1]];
3671 # endif
3673 return (UChar* )NULL;
3676 /* Sunday's quick search applied to a multibyte string (ignore case) */
3677 static UChar*
3678 bm_search_notrev_ic(regex_t* reg, const UChar* target, const UChar* target_end,
3679 const UChar* text, const UChar* text_end,
3680 const UChar* text_range)
3682 const UChar *s, *se, *t, *end;
3683 const UChar *tail;
3684 ptrdiff_t skip, tlen1;
3685 OnigEncoding enc = reg->enc;
3686 int case_fold_flag = reg->case_fold_flag;
3688 # ifdef ONIG_DEBUG_SEARCH
3689 fprintf(stderr, "bm_search_notrev_ic: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
3690 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
3691 # endif
3693 tail = target_end - 1;
3694 tlen1 = tail - target;
3695 end = text_range;
3696 if (end + tlen1 > text_end)
3697 end = text_end - tlen1;
3699 s = text;
3701 if (IS_NULL(reg->int_map)) {
3702 while (s < end) {
3703 se = s + tlen1;
3704 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
3705 s, se + 1))
3706 return (UChar* )s;
3707 if (s + 1 >= end) break;
3708 skip = reg->map[se[1]];
3709 t = s;
3710 do {
3711 s += enclen(enc, s, end);
3712 } while ((s - t) < skip && s < end);
3715 else {
3716 # if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
3717 while (s < end) {
3718 se = s + tlen1;
3719 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
3720 s, se + 1))
3721 return (UChar* )s;
3722 if (s + 1 >= end) break;
3723 skip = reg->int_map[se[1]];
3724 t = s;
3725 do {
3726 s += enclen(enc, s, end);
3727 } while ((s - t) < skip && s < end);
3729 # endif
3732 return (UChar* )NULL;
3735 /* Sunday's quick search (ignore case) */
3736 static UChar*
3737 bm_search_ic(regex_t* reg, const UChar* target, const UChar* target_end,
3738 const UChar* text, const UChar* text_end, const UChar* text_range)
3740 const UChar *s, *p, *end;
3741 const UChar *tail;
3742 ptrdiff_t tlen1;
3743 OnigEncoding enc = reg->enc;
3744 int case_fold_flag = reg->case_fold_flag;
3746 # ifdef ONIG_DEBUG_SEARCH
3747 fprintf(stderr, "bm_search_ic: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
3748 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
3749 # endif
3751 tail = target_end - 1;
3752 tlen1 = tail - target;
3753 end = text_range + tlen1;
3754 if (end > text_end)
3755 end = text_end;
3757 s = text + tlen1;
3758 if (IS_NULL(reg->int_map)) {
3759 while (s < end) {
3760 p = s - tlen1;
3761 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
3762 p, s + 1))
3763 return (UChar* )p;
3764 if (s + 1 >= end) break;
3765 s += reg->map[s[1]];
3768 else { /* see int_map[] */
3769 # if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
3770 while (s < end) {
3771 p = s - tlen1;
3772 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
3773 p, s + 1))
3774 return (UChar* )p;
3775 if (s + 1 >= end) break;
3776 s += reg->int_map[s[1]];
3778 # endif
3780 return (UChar* )NULL;
3782 #endif /* USE_SUNDAY_QUICK_SEARCH */
3784 #ifdef USE_INT_MAP_BACKWARD
3785 static int
3786 set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED,
3787 int** skip)
3789 int i, len;
3791 if (IS_NULL(*skip)) {
3792 *skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
3793 if (IS_NULL(*skip)) return ONIGERR_MEMORY;
3796 len = (int )(end - s);
3797 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
3798 (*skip)[i] = len;
3800 for (i = len - 1; i > 0; i--)
3801 (*skip)[s[i]] = i;
3803 return 0;
3806 static UChar*
3807 bm_search_backward(regex_t* reg, const UChar* target, const UChar* target_end,
3808 const UChar* text, const UChar* adjust_text,
3809 const UChar* text_end, const UChar* text_start)
3811 const UChar *s, *t, *p;
3813 s = text_end - (target_end - target);
3814 if (text_start < s)
3815 s = text_start;
3816 else
3817 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s, text_end);
3819 while (s >= text) {
3820 p = s;
3821 t = target;
3822 while (t < target_end && *p == *t) {
3823 p++; t++;
3825 if (t == target_end)
3826 return (UChar* )s;
3828 s -= reg->int_map_backward[*s];
3829 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s, text_end);
3832 return (UChar* )NULL;
3834 #endif
3836 static UChar*
3837 map_search(OnigEncoding enc, UChar map[],
3838 const UChar* text, const UChar* text_range, const UChar* text_end)
3840 const UChar *s = text;
3842 while (s < text_range) {
3843 if (map[*s]) return (UChar* )s;
3845 s += enclen(enc, s, text_end);
3847 return (UChar* )NULL;
3850 static UChar*
3851 map_search_backward(OnigEncoding enc, UChar map[],
3852 const UChar* text, const UChar* adjust_text,
3853 const UChar* text_start, const UChar* text_end)
3855 const UChar *s = text_start;
3857 while (s >= text) {
3858 if (map[*s]) return (UChar* )s;
3860 s = onigenc_get_prev_char_head(enc, adjust_text, s, text_end);
3862 return (UChar* )NULL;
3865 extern OnigPosition
3866 onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, OnigRegion* region,
3867 OnigOptionType option)
3869 ptrdiff_t r;
3870 UChar *prev;
3871 OnigMatchArg msa;
3873 MATCH_ARG_INIT(msa, option, region, at, at);
3874 #ifdef USE_COMBINATION_EXPLOSION_CHECK
3876 ptrdiff_t offset = at - str;
3877 STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check);
3879 #endif
3881 if (region) {
3882 r = onig_region_resize_clear(region, reg->num_mem + 1);
3884 else
3885 r = 0;
3887 if (r == 0) {
3888 prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at, end);
3889 r = match_at(reg, str, end,
3890 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
3891 end,
3892 #endif
3893 at, prev, &msa);
3896 MATCH_ARG_FREE(msa);
3897 return r;
3900 static int
3901 forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
3902 UChar* range, UChar** low, UChar** high, UChar** low_prev)
3904 UChar *p, *pprev = (UChar* )NULL;
3906 #ifdef ONIG_DEBUG_SEARCH
3907 fprintf(stderr, "forward_search_range: str: %"PRIuPTR" (%p), end: %"PRIuPTR" (%p), s: %"PRIuPTR" (%p), range: %"PRIuPTR" (%p)\n",
3908 (uintptr_t )str, str, (uintptr_t )end, end, (uintptr_t )s, s, (uintptr_t )range, range);
3909 #endif
3911 p = s;
3912 if (reg->dmin > 0) {
3913 if (ONIGENC_IS_SINGLEBYTE(reg->enc)) {
3914 p += reg->dmin;
3916 else {
3917 UChar *q = p + reg->dmin;
3919 if (q >= end) return 0; /* fail */
3920 while (p < q) p += enclen(reg->enc, p, end);
3924 retry:
3925 switch (reg->optimize) {
3926 case ONIG_OPTIMIZE_EXACT:
3927 p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range);
3928 break;
3929 case ONIG_OPTIMIZE_EXACT_IC:
3930 p = slow_search_ic(reg->enc, reg->case_fold_flag,
3931 reg->exact, reg->exact_end, p, end, range);
3932 break;
3934 case ONIG_OPTIMIZE_EXACT_BM:
3935 p = bm_search(reg, reg->exact, reg->exact_end, p, end, range);
3936 break;
3938 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:
3939 p = bm_search_notrev(reg, reg->exact, reg->exact_end, p, end, range);
3940 break;
3942 case ONIG_OPTIMIZE_EXACT_BM_IC:
3943 p = bm_search_ic(reg, reg->exact, reg->exact_end, p, end, range);
3944 break;
3946 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC:
3947 p = bm_search_notrev_ic(reg, reg->exact, reg->exact_end, p, end, range);
3948 break;
3950 case ONIG_OPTIMIZE_MAP:
3951 p = map_search(reg->enc, reg->map, p, range, end);
3952 break;
3955 if (p && p < range) {
3956 if (p - reg->dmin < s) {
3957 retry_gate:
3958 pprev = p;
3959 p += enclen(reg->enc, p, end);
3960 goto retry;
3963 if (reg->sub_anchor) {
3964 UChar* prev;
3966 switch (reg->sub_anchor) {
3967 case ANCHOR_BEGIN_LINE:
3968 if (!ON_STR_BEGIN(p)) {
3969 prev = onigenc_get_prev_char_head(reg->enc,
3970 (pprev ? pprev : str), p, end);
3971 if (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0))
3972 goto retry_gate;
3974 break;
3976 case ANCHOR_END_LINE:
3977 if (ON_STR_END(p)) {
3978 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3979 prev = (UChar* )onigenc_get_prev_char_head(reg->enc,
3980 (pprev ? pprev : str), p);
3981 if (prev && ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 1))
3982 goto retry_gate;
3983 #endif
3985 else if (! ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, p, str, end, reg->options, 1))
3986 goto retry_gate;
3987 break;
3991 if (reg->dmax == 0) {
3992 *low = p;
3993 if (low_prev) {
3994 if (*low > s)
3995 *low_prev = onigenc_get_prev_char_head(reg->enc, s, p, end);
3996 else
3997 *low_prev = onigenc_get_prev_char_head(reg->enc,
3998 (pprev ? pprev : str), p, end);
4001 else {
4002 if (reg->dmax != ONIG_INFINITE_DISTANCE) {
4003 if (p < str + reg->dmax) {
4004 *low = (UChar* )str;
4005 if (low_prev)
4006 *low_prev = onigenc_get_prev_char_head(reg->enc, str, *low, end);
4008 else {
4009 *low = p - reg->dmax;
4010 if (*low > s) {
4011 *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s,
4012 *low, end, (const UChar** )low_prev);
4013 if (low_prev && IS_NULL(*low_prev))
4014 *low_prev = onigenc_get_prev_char_head(reg->enc,
4015 (pprev ? pprev : s), *low, end);
4017 else {
4018 if (low_prev)
4019 *low_prev = onigenc_get_prev_char_head(reg->enc,
4020 (pprev ? pprev : str), *low, end);
4025 /* no needs to adjust *high, *high is used as range check only */
4026 *high = p - reg->dmin;
4028 #ifdef ONIG_DEBUG_SEARCH
4029 fprintf(stderr,
4030 "forward_search_range success: low: %"PRIdPTR", high: %"PRIdPTR", dmin: %"PRIdPTR", dmax: %"PRIdPTR"\n",
4031 *low - str, *high - str, reg->dmin, reg->dmax);
4032 #endif
4033 return 1; /* success */
4036 return 0; /* fail */
4039 #define BM_BACKWARD_SEARCH_LENGTH_THRESHOLD 100
4041 static int
4042 backward_search_range(regex_t* reg, const UChar* str, const UChar* end,
4043 UChar* s, const UChar* range, UChar* adjrange,
4044 UChar** low, UChar** high)
4046 UChar *p;
4048 range += reg->dmin;
4049 p = s;
4051 retry:
4052 switch (reg->optimize) {
4053 case ONIG_OPTIMIZE_EXACT:
4054 exact_method:
4055 p = slow_search_backward(reg->enc, reg->exact, reg->exact_end,
4056 range, adjrange, end, p);
4057 break;
4059 case ONIG_OPTIMIZE_EXACT_IC:
4060 case ONIG_OPTIMIZE_EXACT_BM_IC:
4061 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC:
4062 p = slow_search_backward_ic(reg->enc, reg->case_fold_flag,
4063 reg->exact, reg->exact_end,
4064 range, adjrange, end, p);
4065 break;
4067 case ONIG_OPTIMIZE_EXACT_BM:
4068 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:
4069 #ifdef USE_INT_MAP_BACKWARD
4070 if (IS_NULL(reg->int_map_backward)) {
4071 int r;
4072 if (s - range < BM_BACKWARD_SEARCH_LENGTH_THRESHOLD)
4073 goto exact_method;
4075 r = set_bm_backward_skip(reg->exact, reg->exact_end, reg->enc,
4076 &(reg->int_map_backward));
4077 if (r) return r;
4079 p = bm_search_backward(reg, reg->exact, reg->exact_end, range, adjrange,
4080 end, p);
4081 #else
4082 goto exact_method;
4083 #endif
4084 break;
4086 case ONIG_OPTIMIZE_MAP:
4087 p = map_search_backward(reg->enc, reg->map, range, adjrange, p, end);
4088 break;
4091 if (p) {
4092 if (reg->sub_anchor) {
4093 UChar* prev;
4095 switch (reg->sub_anchor) {
4096 case ANCHOR_BEGIN_LINE:
4097 if (!ON_STR_BEGIN(p)) {
4098 prev = onigenc_get_prev_char_head(reg->enc, str, p, end);
4099 if (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0)) {
4100 p = prev;
4101 goto retry;
4104 break;
4106 case ANCHOR_END_LINE:
4107 if (ON_STR_END(p)) {
4108 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
4109 prev = onigenc_get_prev_char_head(reg->enc, adjrange, p);
4110 if (IS_NULL(prev)) goto fail;
4111 if (ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 1)) {
4112 p = prev;
4113 goto retry;
4115 #endif
4117 else if (! ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, p, str, end, reg->options, 1)) {
4118 p = onigenc_get_prev_char_head(reg->enc, adjrange, p, end);
4119 if (IS_NULL(p)) goto fail;
4120 goto retry;
4122 break;
4126 /* no needs to adjust *high, *high is used as range check only */
4127 if (reg->dmax != ONIG_INFINITE_DISTANCE) {
4128 *low = p - reg->dmax;
4129 *high = p - reg->dmin;
4130 *high = onigenc_get_right_adjust_char_head(reg->enc, adjrange, *high, end);
4133 #ifdef ONIG_DEBUG_SEARCH
4134 fprintf(stderr, "backward_search_range: low: %d, high: %d\n",
4135 (int )(*low - str), (int )(*high - str));
4136 #endif
4137 return 1; /* success */
4140 fail:
4141 #ifdef ONIG_DEBUG_SEARCH
4142 fprintf(stderr, "backward_search_range: fail.\n");
4143 #endif
4144 return 0; /* fail */
4148 extern OnigPosition
4149 onig_search(regex_t* reg, const UChar* str, const UChar* end,
4150 const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option)
4152 return onig_search_gpos(reg, str, end, start, start, range, region, option);
4155 extern OnigPosition
4156 onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end,
4157 const UChar* global_pos,
4158 const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option)
4160 ptrdiff_t r;
4161 UChar *s, *prev;
4162 OnigMatchArg msa;
4163 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
4164 const UChar *orig_start = start;
4165 const UChar *orig_range = range;
4166 #endif
4168 #ifdef ONIG_DEBUG_SEARCH
4169 fprintf(stderr,
4170 "onig_search (entry point): str: %"PRIuPTR" (%p), end: %"PRIuPTR", start: %"PRIuPTR", range: %"PRIuPTR"\n",
4171 (uintptr_t )str, str, end - str, start - str, range - str);
4172 #endif
4174 if (region) {
4175 r = onig_region_resize_clear(region, reg->num_mem + 1);
4176 if (r) goto finish_no_msa;
4179 if (start > end || start < str) goto mismatch_no_msa;
4182 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
4183 # ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
4184 # define MATCH_AND_RETURN_CHECK(upper_range) \
4185 r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
4186 if (r != ONIG_MISMATCH) {\
4187 if (r >= 0) {\
4188 if (! IS_FIND_LONGEST(reg->options)) {\
4189 goto match;\
4192 else goto finish; /* error */ \
4194 # else
4195 # define MATCH_AND_RETURN_CHECK(upper_range) \
4196 r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
4197 if (r != ONIG_MISMATCH) {\
4198 if (r >= 0) {\
4199 goto match;\
4201 else goto finish; /* error */ \
4203 # endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
4204 #else
4205 # ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
4206 # define MATCH_AND_RETURN_CHECK(none) \
4207 r = match_at(reg, str, end, s, prev, &msa);\
4208 if (r != ONIG_MISMATCH) {\
4209 if (r >= 0) {\
4210 if (! IS_FIND_LONGEST(reg->options)) {\
4211 goto match;\
4214 else goto finish; /* error */ \
4216 # else
4217 # define MATCH_AND_RETURN_CHECK(none) \
4218 r = match_at(reg, str, end, s, prev, &msa);\
4219 if (r != ONIG_MISMATCH) {\
4220 if (r >= 0) {\
4221 goto match;\
4223 else goto finish; /* error */ \
4225 # endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
4226 #endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */
4229 /* anchor optimize: resume search range */
4230 if (reg->anchor != 0 && str < end) {
4231 UChar *min_semi_end, *max_semi_end;
4233 if (reg->anchor & ANCHOR_BEGIN_POSITION) {
4234 /* search start-position only */
4235 begin_position:
4236 if (range > start)
4238 if (global_pos > start)
4240 if (global_pos < range)
4241 range = global_pos + 1;
4243 else
4244 range = start + 1;
4246 else
4247 range = start;
4249 else if (reg->anchor & ANCHOR_BEGIN_BUF) {
4250 /* search str-position only */
4251 if (range > start) {
4252 if (start != str) goto mismatch_no_msa;
4253 range = str + 1;
4255 else {
4256 if (range <= str) {
4257 start = str;
4258 range = str;
4260 else
4261 goto mismatch_no_msa;
4264 else if (reg->anchor & ANCHOR_END_BUF) {
4265 min_semi_end = max_semi_end = (UChar* )end;
4267 end_buf:
4268 if ((OnigDistance )(max_semi_end - str) < reg->anchor_dmin)
4269 goto mismatch_no_msa;
4271 if (range > start) {
4272 if ((OnigDistance )(min_semi_end - start) > reg->anchor_dmax) {
4273 start = min_semi_end - reg->anchor_dmax;
4274 if (start < end)
4275 start = onigenc_get_right_adjust_char_head(reg->enc, str, start, end);
4277 if ((OnigDistance )(max_semi_end - (range - 1)) < reg->anchor_dmin) {
4278 range = max_semi_end - reg->anchor_dmin + 1;
4281 if (start > range) goto mismatch_no_msa;
4282 /* If start == range, match with empty at end.
4283 Backward search is used. */
4285 else {
4286 if ((OnigDistance )(min_semi_end - range) > reg->anchor_dmax) {
4287 range = min_semi_end - reg->anchor_dmax;
4289 if ((OnigDistance )(max_semi_end - start) < reg->anchor_dmin) {
4290 start = max_semi_end - reg->anchor_dmin;
4291 start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start, end);
4293 if (range > start) goto mismatch_no_msa;
4296 else if (reg->anchor & ANCHOR_SEMI_END_BUF) {
4297 UChar* pre_end = ONIGENC_STEP_BACK(reg->enc, str, end, end, 1);
4299 max_semi_end = (UChar* )end;
4300 if (ONIGENC_IS_MBC_NEWLINE(reg->enc, pre_end, end)) {
4301 min_semi_end = pre_end;
4303 #ifdef USE_CRNL_AS_LINE_TERMINATOR
4304 pre_end = ONIGENC_STEP_BACK(reg->enc, str, pre_end, end, 1);
4305 if (IS_NOT_NULL(pre_end) &&
4306 IS_NEWLINE_CRLF(reg->options) &&
4307 ONIGENC_IS_MBC_CRNL(reg->enc, pre_end, end)) {
4308 min_semi_end = pre_end;
4310 #endif
4311 if (min_semi_end > str && start <= min_semi_end) {
4312 goto end_buf;
4315 else {
4316 min_semi_end = (UChar* )end;
4317 goto end_buf;
4320 else if ((reg->anchor & ANCHOR_ANYCHAR_STAR_ML)) {
4321 goto begin_position;
4324 else if (str == end) { /* empty string */
4325 static const UChar address_for_empty_string[] = "";
4327 #ifdef ONIG_DEBUG_SEARCH
4328 fprintf(stderr, "onig_search: empty string.\n");
4329 #endif
4331 if (reg->threshold_len == 0) {
4332 start = end = str = address_for_empty_string;
4333 s = (UChar* )start;
4334 prev = (UChar* )NULL;
4336 MATCH_ARG_INIT(msa, option, region, start, start);
4337 #ifdef USE_COMBINATION_EXPLOSION_CHECK
4338 msa.state_check_buff = (void* )0;
4339 msa.state_check_buff_size = 0; /* NO NEED, for valgrind */
4340 #endif
4341 MATCH_AND_RETURN_CHECK(end);
4342 goto mismatch;
4344 goto mismatch_no_msa;
4347 #ifdef ONIG_DEBUG_SEARCH
4348 fprintf(stderr, "onig_search(apply anchor): end: %d, start: %d, range: %d\n",
4349 (int )(end - str), (int )(start - str), (int )(range - str));
4350 #endif
4352 MATCH_ARG_INIT(msa, option, region, start, global_pos);
4353 #ifdef USE_COMBINATION_EXPLOSION_CHECK
4355 ptrdiff_t offset = (MIN(start, range) - str);
4356 STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check);
4358 #endif
4360 s = (UChar* )start;
4361 if (range > start) { /* forward search */
4362 if (s > str)
4363 prev = onigenc_get_prev_char_head(reg->enc, str, s, end);
4364 else
4365 prev = (UChar* )NULL;
4367 if (reg->optimize != ONIG_OPTIMIZE_NONE) {
4368 UChar *sch_range, *low, *high, *low_prev;
4370 sch_range = (UChar* )range;
4371 if (reg->dmax != 0) {
4372 if (reg->dmax == ONIG_INFINITE_DISTANCE)
4373 sch_range = (UChar* )end;
4374 else {
4375 sch_range += reg->dmax;
4376 if (sch_range > end) sch_range = (UChar* )end;
4380 if ((end - start) < reg->threshold_len)
4381 goto mismatch;
4383 if (reg->dmax != ONIG_INFINITE_DISTANCE) {
4384 do {
4385 if (! forward_search_range(reg, str, end, s, sch_range,
4386 &low, &high, &low_prev)) goto mismatch;
4387 if (s < low) {
4388 s = low;
4389 prev = low_prev;
4391 while (s <= high) {
4392 MATCH_AND_RETURN_CHECK(orig_range);
4393 prev = s;
4394 s += enclen(reg->enc, s, end);
4396 } while (s < range);
4397 goto mismatch;
4399 else { /* check only. */
4400 if (! forward_search_range(reg, str, end, s, sch_range,
4401 &low, &high, (UChar** )NULL)) goto mismatch;
4403 if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) {
4404 do {
4405 MATCH_AND_RETURN_CHECK(orig_range);
4406 prev = s;
4407 s += enclen(reg->enc, s, end);
4409 if ((reg->anchor & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) == 0) {
4410 while (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0)
4411 && s < range) {
4412 prev = s;
4413 s += enclen(reg->enc, s, end);
4416 } while (s < range);
4417 goto mismatch;
4422 do {
4423 MATCH_AND_RETURN_CHECK(orig_range);
4424 prev = s;
4425 s += enclen(reg->enc, s, end);
4426 } while (s < range);
4428 if (s == range) { /* because empty match with /$/. */
4429 MATCH_AND_RETURN_CHECK(orig_range);
4432 else { /* backward search */
4433 if (reg->optimize != ONIG_OPTIMIZE_NONE) {
4434 UChar *low, *high, *adjrange, *sch_start;
4436 if (range < end)
4437 adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range, end);
4438 else
4439 adjrange = (UChar* )end;
4441 if (reg->dmax != ONIG_INFINITE_DISTANCE &&
4442 (end - range) >= reg->threshold_len) {
4443 do {
4444 sch_start = s + reg->dmax;
4445 if (sch_start > end) sch_start = (UChar* )end;
4446 if (backward_search_range(reg, str, end, sch_start, range, adjrange,
4447 &low, &high) <= 0)
4448 goto mismatch;
4450 if (s > high)
4451 s = high;
4453 while (s >= low) {
4454 prev = onigenc_get_prev_char_head(reg->enc, str, s, end);
4455 MATCH_AND_RETURN_CHECK(orig_start);
4456 s = prev;
4458 } while (s >= range);
4459 goto mismatch;
4461 else { /* check only. */
4462 if ((end - range) < reg->threshold_len) goto mismatch;
4464 sch_start = s;
4465 if (reg->dmax != 0) {
4466 if (reg->dmax == ONIG_INFINITE_DISTANCE)
4467 sch_start = (UChar* )end;
4468 else {
4469 sch_start += reg->dmax;
4470 if (sch_start > end) sch_start = (UChar* )end;
4471 else
4472 sch_start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc,
4473 start, sch_start, end);
4476 if (backward_search_range(reg, str, end, sch_start, range, adjrange,
4477 &low, &high) <= 0) goto mismatch;
4481 do {
4482 prev = onigenc_get_prev_char_head(reg->enc, str, s, end);
4483 MATCH_AND_RETURN_CHECK(orig_start);
4484 s = prev;
4485 } while (s >= range);
4488 mismatch:
4489 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
4490 if (IS_FIND_LONGEST(reg->options)) {
4491 if (msa.best_len >= 0) {
4492 s = msa.best_s;
4493 goto match;
4496 #endif
4497 r = ONIG_MISMATCH;
4499 finish:
4500 MATCH_ARG_FREE(msa);
4502 /* If result is mismatch and no FIND_NOT_EMPTY option,
4503 then the region is not set in match_at(). */
4504 if (IS_FIND_NOT_EMPTY(reg->options) && region) {
4505 onig_region_clear(region);
4508 #ifdef ONIG_DEBUG
4509 if (r != ONIG_MISMATCH)
4510 fprintf(stderr, "onig_search: error %"PRIdPTRDIFF"\n", r);
4511 #endif
4512 return r;
4514 mismatch_no_msa:
4515 r = ONIG_MISMATCH;
4516 finish_no_msa:
4517 #ifdef ONIG_DEBUG
4518 if (r != ONIG_MISMATCH)
4519 fprintf(stderr, "onig_search: error %"PRIdPTRDIFF"\n", r);
4520 #endif
4521 return r;
4523 match:
4524 MATCH_ARG_FREE(msa);
4525 return s - str;
4528 extern OnigPosition
4529 onig_scan(regex_t* reg, const UChar* str, const UChar* end,
4530 OnigRegion* region, OnigOptionType option,
4531 int (*scan_callback)(OnigPosition, OnigPosition, OnigRegion*, void*),
4532 void* callback_arg)
4534 OnigPosition r;
4535 OnigPosition n;
4536 int rs;
4537 const UChar* start;
4539 n = 0;
4540 start = str;
4541 while (1) {
4542 r = onig_search(reg, str, end, start, end, region, option);
4543 if (r >= 0) {
4544 rs = scan_callback(n, r, region, callback_arg);
4545 n++;
4546 if (rs != 0)
4547 return rs;
4549 if (region->end[0] == start - str) {
4550 if (start >= end) break;
4551 start += enclen(reg->enc, start, end);
4553 else
4554 start = str + region->end[0];
4556 if (start > end)
4557 break;
4559 else if (r == ONIG_MISMATCH) {
4560 break;
4562 else { /* error */
4563 return r;
4567 return n;
4570 extern OnigEncoding
4571 onig_get_encoding(const regex_t* reg)
4573 return reg->enc;
4576 extern OnigOptionType
4577 onig_get_options(const regex_t* reg)
4579 return reg->options;
4582 extern OnigCaseFoldType
4583 onig_get_case_fold_flag(const regex_t* reg)
4585 return reg->case_fold_flag;
4588 extern const OnigSyntaxType*
4589 onig_get_syntax(const regex_t* reg)
4591 return reg->syntax;
4594 extern int
4595 onig_number_of_captures(const regex_t* reg)
4597 return reg->num_mem;
4600 extern int
4601 onig_number_of_capture_histories(const regex_t* reg)
4603 #ifdef USE_CAPTURE_HISTORY
4604 int i, n;
4606 n = 0;
4607 for (i = 0; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
4608 if (BIT_STATUS_AT(reg->capture_history, i) != 0)
4609 n++;
4611 return n;
4612 #else
4613 return 0;
4614 #endif
4617 extern void
4618 onig_copy_encoding(OnigEncodingType *to, OnigEncoding from)
4620 *to = *from;