1 /* Extended regular expression matching and search library.
2 Copyright (C) 2002-2018 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <https://www.gnu.org/licenses/>. */
20 static reg_errcode_t
match_ctx_init (re_match_context_t
*cache
, int eflags
,
22 static void match_ctx_clean (re_match_context_t
*mctx
);
23 static void match_ctx_free (re_match_context_t
*cache
);
24 static reg_errcode_t
match_ctx_add_entry (re_match_context_t
*cache
, Idx node
,
25 Idx str_idx
, Idx from
, Idx to
);
26 static Idx
search_cur_bkref_entry (const re_match_context_t
*mctx
, Idx str_idx
);
27 static reg_errcode_t
match_ctx_add_subtop (re_match_context_t
*mctx
, Idx node
,
29 static re_sub_match_last_t
* match_ctx_add_sublast (re_sub_match_top_t
*subtop
,
30 Idx node
, Idx str_idx
);
31 static void sift_ctx_init (re_sift_context_t
*sctx
, re_dfastate_t
**sifted_sts
,
32 re_dfastate_t
**limited_sts
, Idx last_node
,
34 static reg_errcode_t
re_search_internal (const regex_t
*preg
,
35 const char *string
, Idx length
,
36 Idx start
, Idx last_start
, Idx stop
,
37 size_t nmatch
, regmatch_t pmatch
[],
39 static regoff_t
re_search_2_stub (struct re_pattern_buffer
*bufp
,
40 const char *string1
, Idx length1
,
41 const char *string2
, Idx length2
,
42 Idx start
, regoff_t range
,
43 struct re_registers
*regs
,
44 Idx stop
, bool ret_len
);
45 static regoff_t
re_search_stub (struct re_pattern_buffer
*bufp
,
46 const char *string
, Idx length
, Idx start
,
47 regoff_t range
, Idx stop
,
48 struct re_registers
*regs
,
50 static unsigned re_copy_regs (struct re_registers
*regs
, regmatch_t
*pmatch
,
51 Idx nregs
, int regs_allocated
);
52 static reg_errcode_t
prune_impossible_nodes (re_match_context_t
*mctx
);
53 static Idx
check_matching (re_match_context_t
*mctx
, bool fl_longest_match
,
55 static Idx
check_halt_state_context (const re_match_context_t
*mctx
,
56 const re_dfastate_t
*state
, Idx idx
);
57 static void update_regs (const re_dfa_t
*dfa
, regmatch_t
*pmatch
,
58 regmatch_t
*prev_idx_match
, Idx cur_node
,
59 Idx cur_idx
, Idx nmatch
);
60 static reg_errcode_t
push_fail_stack (struct re_fail_stack_t
*fs
,
61 Idx str_idx
, Idx dest_node
, Idx nregs
,
63 re_node_set
*eps_via_nodes
);
64 static reg_errcode_t
set_regs (const regex_t
*preg
,
65 const re_match_context_t
*mctx
,
66 size_t nmatch
, regmatch_t
*pmatch
,
68 static reg_errcode_t
free_fail_stack_return (struct re_fail_stack_t
*fs
);
71 static int sift_states_iter_mb (const re_match_context_t
*mctx
,
72 re_sift_context_t
*sctx
,
73 Idx node_idx
, Idx str_idx
, Idx max_str_idx
);
74 #endif /* RE_ENABLE_I18N */
75 static reg_errcode_t
sift_states_backward (const re_match_context_t
*mctx
,
76 re_sift_context_t
*sctx
);
77 static reg_errcode_t
build_sifted_states (const re_match_context_t
*mctx
,
78 re_sift_context_t
*sctx
, Idx str_idx
,
79 re_node_set
*cur_dest
);
80 static reg_errcode_t
update_cur_sifted_state (const re_match_context_t
*mctx
,
81 re_sift_context_t
*sctx
,
83 re_node_set
*dest_nodes
);
84 static reg_errcode_t
add_epsilon_src_nodes (const re_dfa_t
*dfa
,
85 re_node_set
*dest_nodes
,
86 const re_node_set
*candidates
);
87 static bool check_dst_limits (const re_match_context_t
*mctx
,
88 const re_node_set
*limits
,
89 Idx dst_node
, Idx dst_idx
, Idx src_node
,
91 static int check_dst_limits_calc_pos_1 (const re_match_context_t
*mctx
,
92 int boundaries
, Idx subexp_idx
,
93 Idx from_node
, Idx bkref_idx
);
94 static int check_dst_limits_calc_pos (const re_match_context_t
*mctx
,
95 Idx limit
, Idx subexp_idx
,
96 Idx node
, Idx str_idx
,
98 static reg_errcode_t
check_subexp_limits (const re_dfa_t
*dfa
,
99 re_node_set
*dest_nodes
,
100 const re_node_set
*candidates
,
102 struct re_backref_cache_entry
*bkref_ents
,
104 static reg_errcode_t
sift_states_bkref (const re_match_context_t
*mctx
,
105 re_sift_context_t
*sctx
,
106 Idx str_idx
, const re_node_set
*candidates
);
107 static reg_errcode_t
merge_state_array (const re_dfa_t
*dfa
,
109 re_dfastate_t
**src
, Idx num
);
110 static re_dfastate_t
*find_recover_state (reg_errcode_t
*err
,
111 re_match_context_t
*mctx
);
112 static re_dfastate_t
*transit_state (reg_errcode_t
*err
,
113 re_match_context_t
*mctx
,
114 re_dfastate_t
*state
);
115 static re_dfastate_t
*merge_state_with_log (reg_errcode_t
*err
,
116 re_match_context_t
*mctx
,
117 re_dfastate_t
*next_state
);
118 static reg_errcode_t
check_subexp_matching_top (re_match_context_t
*mctx
,
119 re_node_set
*cur_nodes
,
122 static re_dfastate_t
*transit_state_sb (reg_errcode_t
*err
,
123 re_match_context_t
*mctx
,
124 re_dfastate_t
*pstate
);
126 #ifdef RE_ENABLE_I18N
127 static reg_errcode_t
transit_state_mb (re_match_context_t
*mctx
,
128 re_dfastate_t
*pstate
);
129 #endif /* RE_ENABLE_I18N */
130 static reg_errcode_t
transit_state_bkref (re_match_context_t
*mctx
,
131 const re_node_set
*nodes
);
132 static reg_errcode_t
get_subexp (re_match_context_t
*mctx
,
133 Idx bkref_node
, Idx bkref_str_idx
);
134 static reg_errcode_t
get_subexp_sub (re_match_context_t
*mctx
,
135 const re_sub_match_top_t
*sub_top
,
136 re_sub_match_last_t
*sub_last
,
137 Idx bkref_node
, Idx bkref_str
);
138 static Idx
find_subexp_node (const re_dfa_t
*dfa
, const re_node_set
*nodes
,
139 Idx subexp_idx
, int type
);
140 static reg_errcode_t
check_arrival (re_match_context_t
*mctx
,
141 state_array_t
*path
, Idx top_node
,
142 Idx top_str
, Idx last_node
, Idx last_str
,
144 static reg_errcode_t
check_arrival_add_next_nodes (re_match_context_t
*mctx
,
146 re_node_set
*cur_nodes
,
147 re_node_set
*next_nodes
);
148 static reg_errcode_t
check_arrival_expand_ecl (const re_dfa_t
*dfa
,
149 re_node_set
*cur_nodes
,
150 Idx ex_subexp
, int type
);
151 static reg_errcode_t
check_arrival_expand_ecl_sub (const re_dfa_t
*dfa
,
152 re_node_set
*dst_nodes
,
153 Idx target
, Idx ex_subexp
,
155 static reg_errcode_t
expand_bkref_cache (re_match_context_t
*mctx
,
156 re_node_set
*cur_nodes
, Idx cur_str
,
157 Idx subexp_num
, int type
);
158 static bool build_trtable (const re_dfa_t
*dfa
, re_dfastate_t
*state
);
159 #ifdef RE_ENABLE_I18N
160 static int check_node_accept_bytes (const re_dfa_t
*dfa
, Idx node_idx
,
161 const re_string_t
*input
, Idx idx
);
163 static unsigned int find_collation_sequence_value (const unsigned char *mbs
,
166 #endif /* RE_ENABLE_I18N */
167 static Idx
group_nodes_into_DFAstates (const re_dfa_t
*dfa
,
168 const re_dfastate_t
*state
,
169 re_node_set
*states_node
,
170 bitset_t
*states_ch
);
171 static bool check_node_accept (const re_match_context_t
*mctx
,
172 const re_token_t
*node
, Idx idx
);
173 static reg_errcode_t
extend_buffers (re_match_context_t
*mctx
, int min_len
);
175 /* Entry point for POSIX code. */
177 /* regexec searches for a given pattern, specified by PREG, in the
180 If NMATCH is zero or REG_NOSUB was set in the cflags argument to
181 'regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at
182 least NMATCH elements, and we set them to the offsets of the
183 corresponding matched substrings.
185 EFLAGS specifies "execution flags" which affect matching: if
186 REG_NOTBOL is set, then ^ does not match at the beginning of the
187 string; if REG_NOTEOL is set, then $ does not match at the end.
189 We return 0 if we find a match and REG_NOMATCH if not. */
192 regexec (const regex_t
*_Restrict_ preg
, const char *_Restrict_ string
,
193 size_t nmatch
, regmatch_t pmatch
[], int eflags
)
197 re_dfa_t
*dfa
= preg
->buffer
;
199 if (eflags
& ~(REG_NOTBOL
| REG_NOTEOL
| REG_STARTEND
))
202 if (eflags
& REG_STARTEND
)
204 start
= pmatch
[0].rm_so
;
205 length
= pmatch
[0].rm_eo
;
210 length
= strlen (string
);
213 lock_lock (dfa
->lock
);
215 err
= re_search_internal (preg
, string
, length
, start
, length
,
216 length
, 0, NULL
, eflags
);
218 err
= re_search_internal (preg
, string
, length
, start
, length
,
219 length
, nmatch
, pmatch
, eflags
);
220 lock_unlock (dfa
->lock
);
221 return err
!= REG_NOERROR
;
225 libc_hidden_def (__regexec
)
227 # include <shlib-compat.h>
228 versioned_symbol (libc
, __regexec
, regexec
, GLIBC_2_3_4
);
230 # if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)
231 __typeof__ (__regexec
) __compat_regexec
;
234 attribute_compat_text_section
235 __compat_regexec (const regex_t
*_Restrict_ preg
,
236 const char *_Restrict_ string
, size_t nmatch
,
237 regmatch_t pmatch
[], int eflags
)
239 return regexec (preg
, string
, nmatch
, pmatch
,
240 eflags
& (REG_NOTBOL
| REG_NOTEOL
));
242 compat_symbol (libc
, __compat_regexec
, regexec
, GLIBC_2_0
);
246 /* Entry points for GNU code. */
248 /* re_match, re_search, re_match_2, re_search_2
250 The former two functions operate on STRING with length LENGTH,
251 while the later two operate on concatenation of STRING1 and STRING2
252 with lengths LENGTH1 and LENGTH2, respectively.
254 re_match() matches the compiled pattern in BUFP against the string,
255 starting at index START.
257 re_search() first tries matching at index START, then it tries to match
258 starting from index START + 1, and so on. The last start position tried
259 is START + RANGE. (Thus RANGE = 0 forces re_search to operate the same
262 The parameter STOP of re_{match,search}_2 specifies that no match exceeding
263 the first STOP characters of the concatenation of the strings should be
266 If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match
267 and all groups is stored in REGS. (For the "_2" variants, the offsets are
268 computed relative to the concatenation, not relative to the individual
271 On success, re_match* functions return the length of the match, re_search*
272 return the position of the start of the match. Return value -1 means no
273 match was found and -2 indicates an internal error. */
276 re_match (struct re_pattern_buffer
*bufp
, const char *string
, Idx length
,
277 Idx start
, struct re_registers
*regs
)
279 return re_search_stub (bufp
, string
, length
, start
, 0, length
, regs
, true);
282 weak_alias (__re_match
, re_match
)
286 re_search (struct re_pattern_buffer
*bufp
, const char *string
, Idx length
,
287 Idx start
, regoff_t range
, struct re_registers
*regs
)
289 return re_search_stub (bufp
, string
, length
, start
, range
, length
, regs
,
293 weak_alias (__re_search
, re_search
)
297 re_match_2 (struct re_pattern_buffer
*bufp
, const char *string1
, Idx length1
,
298 const char *string2
, Idx length2
, Idx start
,
299 struct re_registers
*regs
, Idx stop
)
301 return re_search_2_stub (bufp
, string1
, length1
, string2
, length2
,
302 start
, 0, regs
, stop
, true);
305 weak_alias (__re_match_2
, re_match_2
)
309 re_search_2 (struct re_pattern_buffer
*bufp
, const char *string1
, Idx length1
,
310 const char *string2
, Idx length2
, Idx start
, regoff_t range
,
311 struct re_registers
*regs
, Idx stop
)
313 return re_search_2_stub (bufp
, string1
, length1
, string2
, length2
,
314 start
, range
, regs
, stop
, false);
317 weak_alias (__re_search_2
, re_search_2
)
321 re_search_2_stub (struct re_pattern_buffer
*bufp
, const char *string1
,
322 Idx length1
, const char *string2
, Idx length2
, Idx start
,
323 regoff_t range
, struct re_registers
*regs
,
324 Idx stop
, bool ret_len
)
331 if (BE ((length1
< 0 || length2
< 0 || stop
< 0
332 || INT_ADD_WRAPV (length1
, length2
, &len
)),
336 /* Concatenate the strings. */
340 s
= re_malloc (char, len
);
342 if (BE (s
== NULL
, 0))
345 memcpy (__mempcpy (s
, string1
, length1
), string2
, length2
);
347 memcpy (s
, string1
, length1
);
348 memcpy (s
+ length1
, string2
, length2
);
357 rval
= re_search_stub (bufp
, str
, len
, start
, range
, stop
, regs
,
363 /* The parameters have the same meaning as those of re_search.
364 Additional parameters:
365 If RET_LEN is true the length of the match is returned (re_match style);
366 otherwise the position of the match is returned. */
369 re_search_stub (struct re_pattern_buffer
*bufp
, const char *string
, Idx length
,
370 Idx start
, regoff_t range
, Idx stop
, struct re_registers
*regs
,
373 reg_errcode_t result
;
378 re_dfa_t
*dfa
= bufp
->buffer
;
379 Idx last_start
= start
+ range
;
381 /* Check for out-of-range. */
382 if (BE (start
< 0 || start
> length
, 0))
384 if (BE (length
< last_start
|| (0 <= range
&& last_start
< start
), 0))
386 else if (BE (last_start
< 0 || (range
< 0 && start
<= last_start
), 0))
389 lock_lock (dfa
->lock
);
391 eflags
|= (bufp
->not_bol
) ? REG_NOTBOL
: 0;
392 eflags
|= (bufp
->not_eol
) ? REG_NOTEOL
: 0;
394 /* Compile fastmap if we haven't yet. */
395 if (start
< last_start
&& bufp
->fastmap
!= NULL
&& !bufp
->fastmap_accurate
)
396 re_compile_fastmap (bufp
);
398 if (BE (bufp
->no_sub
, 0))
401 /* We need at least 1 register. */
404 else if (BE (bufp
->regs_allocated
== REGS_FIXED
405 && regs
->num_regs
<= bufp
->re_nsub
, 0))
407 nregs
= regs
->num_regs
;
408 if (BE (nregs
< 1, 0))
410 /* Nothing can be copied to regs. */
416 nregs
= bufp
->re_nsub
+ 1;
417 pmatch
= re_malloc (regmatch_t
, nregs
);
418 if (BE (pmatch
== NULL
, 0))
424 result
= re_search_internal (bufp
, string
, length
, start
, last_start
, stop
,
425 nregs
, pmatch
, eflags
);
429 /* I hope we needn't fill their regs with -1's when no match was found. */
430 if (result
!= REG_NOERROR
)
431 rval
= result
== REG_NOMATCH
? -1 : -2;
432 else if (regs
!= NULL
)
434 /* If caller wants register contents data back, copy them. */
435 bufp
->regs_allocated
= re_copy_regs (regs
, pmatch
, nregs
,
436 bufp
->regs_allocated
);
437 if (BE (bufp
->regs_allocated
== REGS_UNALLOCATED
, 0))
441 if (BE (rval
== 0, 1))
445 assert (pmatch
[0].rm_so
== start
);
446 rval
= pmatch
[0].rm_eo
- start
;
449 rval
= pmatch
[0].rm_so
;
453 lock_unlock (dfa
->lock
);
458 re_copy_regs (struct re_registers
*regs
, regmatch_t
*pmatch
, Idx nregs
,
461 int rval
= REGS_REALLOCATE
;
463 Idx need_regs
= nregs
+ 1;
464 /* We need one extra element beyond 'num_regs' for the '-1' marker GNU code
467 /* Have the register data arrays been allocated? */
468 if (regs_allocated
== REGS_UNALLOCATED
)
469 { /* No. So allocate them with malloc. */
470 regs
->start
= re_malloc (regoff_t
, need_regs
);
471 if (BE (regs
->start
== NULL
, 0))
472 return REGS_UNALLOCATED
;
473 regs
->end
= re_malloc (regoff_t
, need_regs
);
474 if (BE (regs
->end
== NULL
, 0))
476 re_free (regs
->start
);
477 return REGS_UNALLOCATED
;
479 regs
->num_regs
= need_regs
;
481 else if (regs_allocated
== REGS_REALLOCATE
)
482 { /* Yes. If we need more elements than were already
483 allocated, reallocate them. If we need fewer, just
485 if (BE (need_regs
> regs
->num_regs
, 0))
487 regoff_t
*new_start
= re_realloc (regs
->start
, regoff_t
, need_regs
);
489 if (BE (new_start
== NULL
, 0))
490 return REGS_UNALLOCATED
;
491 new_end
= re_realloc (regs
->end
, regoff_t
, need_regs
);
492 if (BE (new_end
== NULL
, 0))
495 return REGS_UNALLOCATED
;
497 regs
->start
= new_start
;
499 regs
->num_regs
= need_regs
;
504 assert (regs_allocated
== REGS_FIXED
);
505 /* This function may not be called with REGS_FIXED and nregs too big. */
506 assert (regs
->num_regs
>= nregs
);
511 for (i
= 0; i
< nregs
; ++i
)
513 regs
->start
[i
] = pmatch
[i
].rm_so
;
514 regs
->end
[i
] = pmatch
[i
].rm_eo
;
516 for ( ; i
< regs
->num_regs
; ++i
)
517 regs
->start
[i
] = regs
->end
[i
] = -1;
522 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
523 ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use
524 this memory for recording register information. STARTS and ENDS
525 must be allocated using the malloc library routine, and must each
526 be at least NUM_REGS * sizeof (regoff_t) bytes long.
528 If NUM_REGS == 0, then subsequent matches should allocate their own
531 Unless this function is called, the first search or match using
532 PATTERN_BUFFER will allocate its own register data, without
533 freeing the old data. */
536 re_set_registers (struct re_pattern_buffer
*bufp
, struct re_registers
*regs
,
537 __re_size_t num_regs
, regoff_t
*starts
, regoff_t
*ends
)
541 bufp
->regs_allocated
= REGS_REALLOCATE
;
542 regs
->num_regs
= num_regs
;
543 regs
->start
= starts
;
548 bufp
->regs_allocated
= REGS_UNALLOCATED
;
550 regs
->start
= regs
->end
= NULL
;
554 weak_alias (__re_set_registers
, re_set_registers
)
557 /* Entry points compatible with 4.2 BSD regex library. We don't define
558 them unless specifically requested. */
560 #if defined _REGEX_RE_COMP || defined _LIBC
565 re_exec (const char *s
)
567 return 0 == regexec (&re_comp_buf
, s
, 0, NULL
, 0);
569 #endif /* _REGEX_RE_COMP */
571 /* Internal entry point. */
573 /* Searches for a compiled pattern PREG in the string STRING, whose
574 length is LENGTH. NMATCH, PMATCH, and EFLAGS have the same
575 meaning as with regexec. LAST_START is START + RANGE, where
576 START and RANGE have the same meaning as with re_search.
577 Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
578 otherwise return the error code.
579 Note: We assume front end functions already check ranges.
580 (0 <= LAST_START && LAST_START <= LENGTH) */
583 __attribute_warn_unused_result__
584 re_search_internal (const regex_t
*preg
, const char *string
, Idx length
,
585 Idx start
, Idx last_start
, Idx stop
, size_t nmatch
,
586 regmatch_t pmatch
[], int eflags
)
589 const re_dfa_t
*dfa
= preg
->buffer
;
590 Idx left_lim
, right_lim
;
592 bool fl_longest_match
;
599 #if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
600 re_match_context_t mctx
= { .dfa
= dfa
};
602 re_match_context_t mctx
;
604 char *fastmap
= ((preg
->fastmap
!= NULL
&& preg
->fastmap_accurate
605 && start
!= last_start
&& !preg
->can_be_null
)
606 ? preg
->fastmap
: NULL
);
607 RE_TRANSLATE_TYPE t
= preg
->translate
;
609 #if !(defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L))
610 memset (&mctx
, '\0', sizeof (re_match_context_t
));
614 extra_nmatch
= (nmatch
> preg
->re_nsub
) ? nmatch
- (preg
->re_nsub
+ 1) : 0;
615 nmatch
-= extra_nmatch
;
617 /* Check if the DFA haven't been compiled. */
618 if (BE (preg
->used
== 0 || dfa
->init_state
== NULL
619 || dfa
->init_state_word
== NULL
|| dfa
->init_state_nl
== NULL
620 || dfa
->init_state_begbuf
== NULL
, 0))
624 /* We assume front-end functions already check them. */
625 assert (0 <= last_start
&& last_start
<= length
);
628 /* If initial states with non-begbuf contexts have no elements,
629 the regex must be anchored. If preg->newline_anchor is set,
630 we'll never use init_state_nl, so do not check it. */
631 if (dfa
->init_state
->nodes
.nelem
== 0
632 && dfa
->init_state_word
->nodes
.nelem
== 0
633 && (dfa
->init_state_nl
->nodes
.nelem
== 0
634 || !preg
->newline_anchor
))
636 if (start
!= 0 && last_start
!= 0)
638 start
= last_start
= 0;
641 /* We must check the longest matching, if nmatch > 0. */
642 fl_longest_match
= (nmatch
!= 0 || dfa
->nbackref
);
644 err
= re_string_allocate (&mctx
.input
, string
, length
, dfa
->nodes_len
+ 1,
645 preg
->translate
, (preg
->syntax
& RE_ICASE
) != 0,
647 if (BE (err
!= REG_NOERROR
, 0))
649 mctx
.input
.stop
= stop
;
650 mctx
.input
.raw_stop
= stop
;
651 mctx
.input
.newline_anchor
= preg
->newline_anchor
;
653 err
= match_ctx_init (&mctx
, eflags
, dfa
->nbackref
* 2);
654 if (BE (err
!= REG_NOERROR
, 0))
657 /* We will log all the DFA states through which the dfa pass,
658 if nmatch > 1, or this dfa has "multibyte node", which is a
659 back-reference or a node which can accept multibyte character or
660 multi character collating element. */
661 if (nmatch
> 1 || dfa
->has_mb_node
)
663 /* Avoid overflow. */
664 if (BE ((MIN (IDX_MAX
, SIZE_MAX
/ sizeof (re_dfastate_t
*))
665 <= mctx
.input
.bufs_len
), 0))
671 mctx
.state_log
= re_malloc (re_dfastate_t
*, mctx
.input
.bufs_len
+ 1);
672 if (BE (mctx
.state_log
== NULL
, 0))
679 mctx
.state_log
= NULL
;
682 mctx
.input
.tip_context
= (eflags
& REG_NOTBOL
) ? CONTEXT_BEGBUF
683 : CONTEXT_NEWLINE
| CONTEXT_BEGBUF
;
685 /* Check incrementally whether the input string matches. */
686 incr
= (last_start
< start
) ? -1 : 1;
687 left_lim
= (last_start
< start
) ? last_start
: start
;
688 right_lim
= (last_start
< start
) ? start
: last_start
;
689 sb
= dfa
->mb_cur_max
== 1;
692 ? ((sb
|| !(preg
->syntax
& RE_ICASE
|| t
) ? 4 : 0)
693 | (start
<= last_start
? 2 : 0)
694 | (t
!= NULL
? 1 : 0))
697 for (;; match_first
+= incr
)
700 if (match_first
< left_lim
|| right_lim
< match_first
)
703 /* Advance as rapidly as possible through the string, until we
704 find a plausible place to start matching. This may be done
705 with varying efficiency, so there are various possibilities:
706 only the most common of them are specialized, in order to
707 save on code size. We use a switch statement for speed. */
715 /* Fastmap with single-byte translation, match forward. */
716 while (BE (match_first
< right_lim
, 1)
717 && !fastmap
[t
[(unsigned char) string
[match_first
]]])
719 goto forward_match_found_start_or_reached_end
;
722 /* Fastmap without translation, match forward. */
723 while (BE (match_first
< right_lim
, 1)
724 && !fastmap
[(unsigned char) string
[match_first
]])
727 forward_match_found_start_or_reached_end
:
728 if (BE (match_first
== right_lim
, 0))
730 ch
= match_first
>= length
731 ? 0 : (unsigned char) string
[match_first
];
732 if (!fastmap
[t
? t
[ch
] : ch
])
739 /* Fastmap without multi-byte translation, match backwards. */
740 while (match_first
>= left_lim
)
742 ch
= match_first
>= length
743 ? 0 : (unsigned char) string
[match_first
];
744 if (fastmap
[t
? t
[ch
] : ch
])
748 if (match_first
< left_lim
)
753 /* In this case, we can't determine easily the current byte,
754 since it might be a component byte of a multibyte
755 character. Then we use the constructed buffer instead. */
758 /* If MATCH_FIRST is out of the valid range, reconstruct the
760 __re_size_t offset
= match_first
- mctx
.input
.raw_mbs_idx
;
761 if (BE (offset
>= (__re_size_t
) mctx
.input
.valid_raw_len
, 0))
763 err
= re_string_reconstruct (&mctx
.input
, match_first
,
765 if (BE (err
!= REG_NOERROR
, 0))
768 offset
= match_first
- mctx
.input
.raw_mbs_idx
;
770 /* If MATCH_FIRST is out of the buffer, leave it as '\0'.
771 Note that MATCH_FIRST must not be smaller than 0. */
772 ch
= (match_first
>= length
773 ? 0 : re_string_byte_at (&mctx
.input
, offset
));
777 if (match_first
< left_lim
|| match_first
> right_lim
)
786 /* Reconstruct the buffers so that the matcher can assume that
787 the matching starts from the beginning of the buffer. */
788 err
= re_string_reconstruct (&mctx
.input
, match_first
, eflags
);
789 if (BE (err
!= REG_NOERROR
, 0))
792 #ifdef RE_ENABLE_I18N
793 /* Don't consider this char as a possible match start if it part,
794 yet isn't the head, of a multibyte character. */
795 if (!sb
&& !re_string_first_byte (&mctx
.input
, 0))
799 /* It seems to be appropriate one, then use the matcher. */
800 /* We assume that the matching starts from 0. */
801 mctx
.state_log_top
= mctx
.nbkref_ents
= mctx
.max_mb_elem_len
= 0;
802 match_last
= check_matching (&mctx
, fl_longest_match
,
803 start
<= last_start
? &match_first
: NULL
);
804 if (match_last
!= -1)
806 if (BE (match_last
== -2, 0))
813 mctx
.match_last
= match_last
;
814 if ((!preg
->no_sub
&& nmatch
> 1) || dfa
->nbackref
)
816 re_dfastate_t
*pstate
= mctx
.state_log
[match_last
];
817 mctx
.last_node
= check_halt_state_context (&mctx
, pstate
,
820 if ((!preg
->no_sub
&& nmatch
> 1 && dfa
->has_plural_match
)
823 err
= prune_impossible_nodes (&mctx
);
824 if (err
== REG_NOERROR
)
826 if (BE (err
!= REG_NOMATCH
, 0))
831 break; /* We found a match. */
835 match_ctx_clean (&mctx
);
839 assert (match_last
!= -1);
840 assert (err
== REG_NOERROR
);
843 /* Set pmatch[] if we need. */
848 /* Initialize registers. */
849 for (reg_idx
= 1; reg_idx
< nmatch
; ++reg_idx
)
850 pmatch
[reg_idx
].rm_so
= pmatch
[reg_idx
].rm_eo
= -1;
852 /* Set the points where matching start/end. */
854 pmatch
[0].rm_eo
= mctx
.match_last
;
855 /* FIXME: This function should fail if mctx.match_last exceeds
856 the maximum possible regoff_t value. We need a new error
857 code REG_OVERFLOW. */
859 if (!preg
->no_sub
&& nmatch
> 1)
861 err
= set_regs (preg
, &mctx
, nmatch
, pmatch
,
862 dfa
->has_plural_match
&& dfa
->nbackref
> 0);
863 if (BE (err
!= REG_NOERROR
, 0))
867 /* At last, add the offset to each register, since we slid
868 the buffers so that we could assume that the matching starts
870 for (reg_idx
= 0; reg_idx
< nmatch
; ++reg_idx
)
871 if (pmatch
[reg_idx
].rm_so
!= -1)
873 #ifdef RE_ENABLE_I18N
874 if (BE (mctx
.input
.offsets_needed
!= 0, 0))
876 pmatch
[reg_idx
].rm_so
=
877 (pmatch
[reg_idx
].rm_so
== mctx
.input
.valid_len
878 ? mctx
.input
.valid_raw_len
879 : mctx
.input
.offsets
[pmatch
[reg_idx
].rm_so
]);
880 pmatch
[reg_idx
].rm_eo
=
881 (pmatch
[reg_idx
].rm_eo
== mctx
.input
.valid_len
882 ? mctx
.input
.valid_raw_len
883 : mctx
.input
.offsets
[pmatch
[reg_idx
].rm_eo
]);
886 assert (mctx
.input
.offsets_needed
== 0);
888 pmatch
[reg_idx
].rm_so
+= match_first
;
889 pmatch
[reg_idx
].rm_eo
+= match_first
;
891 for (reg_idx
= 0; reg_idx
< extra_nmatch
; ++reg_idx
)
893 pmatch
[nmatch
+ reg_idx
].rm_so
= -1;
894 pmatch
[nmatch
+ reg_idx
].rm_eo
= -1;
898 for (reg_idx
= 0; reg_idx
+ 1 < nmatch
; reg_idx
++)
899 if (dfa
->subexp_map
[reg_idx
] != reg_idx
)
901 pmatch
[reg_idx
+ 1].rm_so
902 = pmatch
[dfa
->subexp_map
[reg_idx
] + 1].rm_so
;
903 pmatch
[reg_idx
+ 1].rm_eo
904 = pmatch
[dfa
->subexp_map
[reg_idx
] + 1].rm_eo
;
909 re_free (mctx
.state_log
);
911 match_ctx_free (&mctx
);
912 re_string_destruct (&mctx
.input
);
917 __attribute_warn_unused_result__
918 prune_impossible_nodes (re_match_context_t
*mctx
)
920 const re_dfa_t
*const dfa
= mctx
->dfa
;
921 Idx halt_node
, match_last
;
923 re_dfastate_t
**sifted_states
;
924 re_dfastate_t
**lim_states
= NULL
;
925 re_sift_context_t sctx
;
927 assert (mctx
->state_log
!= NULL
);
929 match_last
= mctx
->match_last
;
930 halt_node
= mctx
->last_node
;
932 /* Avoid overflow. */
933 if (BE (MIN (IDX_MAX
, SIZE_MAX
/ sizeof (re_dfastate_t
*)) <= match_last
, 0))
936 sifted_states
= re_malloc (re_dfastate_t
*, match_last
+ 1);
937 if (BE (sifted_states
== NULL
, 0))
944 lim_states
= re_malloc (re_dfastate_t
*, match_last
+ 1);
945 if (BE (lim_states
== NULL
, 0))
952 memset (lim_states
, '\0',
953 sizeof (re_dfastate_t
*) * (match_last
+ 1));
954 sift_ctx_init (&sctx
, sifted_states
, lim_states
, halt_node
,
956 ret
= sift_states_backward (mctx
, &sctx
);
957 re_node_set_free (&sctx
.limits
);
958 if (BE (ret
!= REG_NOERROR
, 0))
960 if (sifted_states
[0] != NULL
|| lim_states
[0] != NULL
)
970 } while (mctx
->state_log
[match_last
] == NULL
971 || !mctx
->state_log
[match_last
]->halt
);
972 halt_node
= check_halt_state_context (mctx
,
973 mctx
->state_log
[match_last
],
976 ret
= merge_state_array (dfa
, sifted_states
, lim_states
,
978 re_free (lim_states
);
980 if (BE (ret
!= REG_NOERROR
, 0))
985 sift_ctx_init (&sctx
, sifted_states
, lim_states
, halt_node
, match_last
);
986 ret
= sift_states_backward (mctx
, &sctx
);
987 re_node_set_free (&sctx
.limits
);
988 if (BE (ret
!= REG_NOERROR
, 0))
990 if (sifted_states
[0] == NULL
)
996 re_free (mctx
->state_log
);
997 mctx
->state_log
= sifted_states
;
998 sifted_states
= NULL
;
999 mctx
->last_node
= halt_node
;
1000 mctx
->match_last
= match_last
;
1003 re_free (sifted_states
);
1004 re_free (lim_states
);
1008 /* Acquire an initial state and return it.
1009 We must select appropriate initial state depending on the context,
1010 since initial states may have constraints like "\<", "^", etc.. */
1012 static inline re_dfastate_t
*
1013 __attribute__ ((always_inline
))
1014 acquire_init_state_context (reg_errcode_t
*err
, const re_match_context_t
*mctx
,
1017 const re_dfa_t
*const dfa
= mctx
->dfa
;
1018 if (dfa
->init_state
->has_constraint
)
1020 unsigned int context
;
1021 context
= re_string_context_at (&mctx
->input
, idx
- 1, mctx
->eflags
);
1022 if (IS_WORD_CONTEXT (context
))
1023 return dfa
->init_state_word
;
1024 else if (IS_ORDINARY_CONTEXT (context
))
1025 return dfa
->init_state
;
1026 else if (IS_BEGBUF_CONTEXT (context
) && IS_NEWLINE_CONTEXT (context
))
1027 return dfa
->init_state_begbuf
;
1028 else if (IS_NEWLINE_CONTEXT (context
))
1029 return dfa
->init_state_nl
;
1030 else if (IS_BEGBUF_CONTEXT (context
))
1032 /* It is relatively rare case, then calculate on demand. */
1033 return re_acquire_state_context (err
, dfa
,
1034 dfa
->init_state
->entrance_nodes
,
1038 /* Must not happen? */
1039 return dfa
->init_state
;
1042 return dfa
->init_state
;
1045 /* Check whether the regular expression match input string INPUT or not,
1046 and return the index where the matching end. Return -1 if
1047 there is no match, and return -2 in case of an error.
1048 FL_LONGEST_MATCH means we want the POSIX longest matching.
1049 If P_MATCH_FIRST is not NULL, and the match fails, it is set to the
1050 next place where we may want to try matching.
1051 Note that the matcher assumes that the matching starts from the current
1052 index of the buffer. */
1055 __attribute_warn_unused_result__
1056 check_matching (re_match_context_t
*mctx
, bool fl_longest_match
,
1059 const re_dfa_t
*const dfa
= mctx
->dfa
;
1062 Idx match_last
= -1;
1063 Idx cur_str_idx
= re_string_cur_idx (&mctx
->input
);
1064 re_dfastate_t
*cur_state
;
1065 bool at_init_state
= p_match_first
!= NULL
;
1066 Idx next_start_idx
= cur_str_idx
;
1069 cur_state
= acquire_init_state_context (&err
, mctx
, cur_str_idx
);
1070 /* An initial state must not be NULL (invalid). */
1071 if (BE (cur_state
== NULL
, 0))
1073 assert (err
== REG_ESPACE
);
1077 if (mctx
->state_log
!= NULL
)
1079 mctx
->state_log
[cur_str_idx
] = cur_state
;
1081 /* Check OP_OPEN_SUBEXP in the initial state in case that we use them
1082 later. E.g. Processing back references. */
1083 if (BE (dfa
->nbackref
, 0))
1085 at_init_state
= false;
1086 err
= check_subexp_matching_top (mctx
, &cur_state
->nodes
, 0);
1087 if (BE (err
!= REG_NOERROR
, 0))
1090 if (cur_state
->has_backref
)
1092 err
= transit_state_bkref (mctx
, &cur_state
->nodes
);
1093 if (BE (err
!= REG_NOERROR
, 0))
1099 /* If the RE accepts NULL string. */
1100 if (BE (cur_state
->halt
, 0))
1102 if (!cur_state
->has_constraint
1103 || check_halt_state_context (mctx
, cur_state
, cur_str_idx
))
1105 if (!fl_longest_match
)
1109 match_last
= cur_str_idx
;
1115 while (!re_string_eoi (&mctx
->input
))
1117 re_dfastate_t
*old_state
= cur_state
;
1118 Idx next_char_idx
= re_string_cur_idx (&mctx
->input
) + 1;
1120 if ((BE (next_char_idx
>= mctx
->input
.bufs_len
, 0)
1121 && mctx
->input
.bufs_len
< mctx
->input
.len
)
1122 || (BE (next_char_idx
>= mctx
->input
.valid_len
, 0)
1123 && mctx
->input
.valid_len
< mctx
->input
.len
))
1125 err
= extend_buffers (mctx
, next_char_idx
+ 1);
1126 if (BE (err
!= REG_NOERROR
, 0))
1128 assert (err
== REG_ESPACE
);
1133 cur_state
= transit_state (&err
, mctx
, cur_state
);
1134 if (mctx
->state_log
!= NULL
)
1135 cur_state
= merge_state_with_log (&err
, mctx
, cur_state
);
1137 if (cur_state
== NULL
)
1139 /* Reached the invalid state or an error. Try to recover a valid
1140 state using the state log, if available and if we have not
1141 already found a valid (even if not the longest) match. */
1142 if (BE (err
!= REG_NOERROR
, 0))
1145 if (mctx
->state_log
== NULL
1146 || (match
&& !fl_longest_match
)
1147 || (cur_state
= find_recover_state (&err
, mctx
)) == NULL
)
1151 if (BE (at_init_state
, 0))
1153 if (old_state
== cur_state
)
1154 next_start_idx
= next_char_idx
;
1156 at_init_state
= false;
1159 if (cur_state
->halt
)
1161 /* Reached a halt state.
1162 Check the halt state can satisfy the current context. */
1163 if (!cur_state
->has_constraint
1164 || check_halt_state_context (mctx
, cur_state
,
1165 re_string_cur_idx (&mctx
->input
)))
1167 /* We found an appropriate halt state. */
1168 match_last
= re_string_cur_idx (&mctx
->input
);
1171 /* We found a match, do not modify match_first below. */
1172 p_match_first
= NULL
;
1173 if (!fl_longest_match
)
1180 *p_match_first
+= next_start_idx
;
1185 /* Check NODE match the current context. */
1188 check_halt_node_context (const re_dfa_t
*dfa
, Idx node
, unsigned int context
)
1190 re_token_type_t type
= dfa
->nodes
[node
].type
;
1191 unsigned int constraint
= dfa
->nodes
[node
].constraint
;
1192 if (type
!= END_OF_RE
)
1196 if (NOT_SATISFY_NEXT_CONSTRAINT (constraint
, context
))
1201 /* Check the halt state STATE match the current context.
1202 Return 0 if not match, if the node, STATE has, is a halt node and
1203 match the context, return the node. */
1206 check_halt_state_context (const re_match_context_t
*mctx
,
1207 const re_dfastate_t
*state
, Idx idx
)
1210 unsigned int context
;
1212 assert (state
->halt
);
1214 context
= re_string_context_at (&mctx
->input
, idx
, mctx
->eflags
);
1215 for (i
= 0; i
< state
->nodes
.nelem
; ++i
)
1216 if (check_halt_node_context (mctx
->dfa
, state
->nodes
.elems
[i
], context
))
1217 return state
->nodes
.elems
[i
];
1221 /* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
1222 corresponding to the DFA).
1223 Return the destination node, and update EPS_VIA_NODES;
1224 return -1 in case of errors. */
1227 proceed_next_node (const re_match_context_t
*mctx
, Idx nregs
, regmatch_t
*regs
,
1228 Idx
*pidx
, Idx node
, re_node_set
*eps_via_nodes
,
1229 struct re_fail_stack_t
*fs
)
1231 const re_dfa_t
*const dfa
= mctx
->dfa
;
1234 if (IS_EPSILON_NODE (dfa
->nodes
[node
].type
))
1236 re_node_set
*cur_nodes
= &mctx
->state_log
[*pidx
]->nodes
;
1237 re_node_set
*edests
= &dfa
->edests
[node
];
1239 ok
= re_node_set_insert (eps_via_nodes
, node
);
1242 /* Pick up a valid destination, or return -1 if none
1244 for (dest_node
= -1, i
= 0; i
< edests
->nelem
; ++i
)
1246 Idx candidate
= edests
->elems
[i
];
1247 if (!re_node_set_contains (cur_nodes
, candidate
))
1249 if (dest_node
== -1)
1250 dest_node
= candidate
;
1254 /* In order to avoid infinite loop like "(a*)*", return the second
1255 epsilon-transition if the first was already considered. */
1256 if (re_node_set_contains (eps_via_nodes
, dest_node
))
1259 /* Otherwise, push the second epsilon-transition on the fail stack. */
1261 && push_fail_stack (fs
, *pidx
, candidate
, nregs
, regs
,
1265 /* We know we are going to exit. */
1274 re_token_type_t type
= dfa
->nodes
[node
].type
;
1276 #ifdef RE_ENABLE_I18N
1277 if (dfa
->nodes
[node
].accept_mb
)
1278 naccepted
= check_node_accept_bytes (dfa
, node
, &mctx
->input
, *pidx
);
1280 #endif /* RE_ENABLE_I18N */
1281 if (type
== OP_BACK_REF
)
1283 Idx subexp_idx
= dfa
->nodes
[node
].opr
.idx
+ 1;
1284 naccepted
= regs
[subexp_idx
].rm_eo
- regs
[subexp_idx
].rm_so
;
1287 if (regs
[subexp_idx
].rm_so
== -1 || regs
[subexp_idx
].rm_eo
== -1)
1291 char *buf
= (char *) re_string_get_buffer (&mctx
->input
);
1292 if (memcmp (buf
+ regs
[subexp_idx
].rm_so
, buf
+ *pidx
,
1301 ok
= re_node_set_insert (eps_via_nodes
, node
);
1304 dest_node
= dfa
->edests
[node
].elems
[0];
1305 if (re_node_set_contains (&mctx
->state_log
[*pidx
]->nodes
,
1312 || check_node_accept (mctx
, dfa
->nodes
+ node
, *pidx
))
1314 Idx dest_node
= dfa
->nexts
[node
];
1315 *pidx
= (naccepted
== 0) ? *pidx
+ 1 : *pidx
+ naccepted
;
1316 if (fs
&& (*pidx
> mctx
->match_last
|| mctx
->state_log
[*pidx
] == NULL
1317 || !re_node_set_contains (&mctx
->state_log
[*pidx
]->nodes
,
1320 re_node_set_empty (eps_via_nodes
);
1327 static reg_errcode_t
1328 __attribute_warn_unused_result__
1329 push_fail_stack (struct re_fail_stack_t
*fs
, Idx str_idx
, Idx dest_node
,
1330 Idx nregs
, regmatch_t
*regs
, re_node_set
*eps_via_nodes
)
1333 Idx num
= fs
->num
++;
1334 if (fs
->num
== fs
->alloc
)
1336 struct re_fail_stack_ent_t
*new_array
;
1337 new_array
= re_realloc (fs
->stack
, struct re_fail_stack_ent_t
,
1339 if (new_array
== NULL
)
1342 fs
->stack
= new_array
;
1344 fs
->stack
[num
].idx
= str_idx
;
1345 fs
->stack
[num
].node
= dest_node
;
1346 fs
->stack
[num
].regs
= re_malloc (regmatch_t
, nregs
);
1347 if (fs
->stack
[num
].regs
== NULL
)
1349 memcpy (fs
->stack
[num
].regs
, regs
, sizeof (regmatch_t
) * nregs
);
1350 err
= re_node_set_init_copy (&fs
->stack
[num
].eps_via_nodes
, eps_via_nodes
);
1355 pop_fail_stack (struct re_fail_stack_t
*fs
, Idx
*pidx
, Idx nregs
,
1356 regmatch_t
*regs
, re_node_set
*eps_via_nodes
)
1358 Idx num
= --fs
->num
;
1360 *pidx
= fs
->stack
[num
].idx
;
1361 memcpy (regs
, fs
->stack
[num
].regs
, sizeof (regmatch_t
) * nregs
);
1362 re_node_set_free (eps_via_nodes
);
1363 re_free (fs
->stack
[num
].regs
);
1364 *eps_via_nodes
= fs
->stack
[num
].eps_via_nodes
;
1365 return fs
->stack
[num
].node
;
1368 /* Set the positions where the subexpressions are starts/ends to registers
1370 Note: We assume that pmatch[0] is already set, and
1371 pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch. */
1373 static reg_errcode_t
1374 __attribute_warn_unused_result__
1375 set_regs (const regex_t
*preg
, const re_match_context_t
*mctx
, size_t nmatch
,
1376 regmatch_t
*pmatch
, bool fl_backtrack
)
1378 const re_dfa_t
*dfa
= preg
->buffer
;
1380 re_node_set eps_via_nodes
;
1381 struct re_fail_stack_t
*fs
;
1382 struct re_fail_stack_t fs_body
= { 0, 2, NULL
};
1383 regmatch_t
*prev_idx_match
;
1384 bool prev_idx_match_malloced
= false;
1387 assert (nmatch
> 1);
1388 assert (mctx
->state_log
!= NULL
);
1393 fs
->stack
= re_malloc (struct re_fail_stack_ent_t
, fs
->alloc
);
1394 if (fs
->stack
== NULL
)
1400 cur_node
= dfa
->init_node
;
1401 re_node_set_init_empty (&eps_via_nodes
);
1403 if (__libc_use_alloca (nmatch
* sizeof (regmatch_t
)))
1404 prev_idx_match
= (regmatch_t
*) alloca (nmatch
* sizeof (regmatch_t
));
1407 prev_idx_match
= re_malloc (regmatch_t
, nmatch
);
1408 if (prev_idx_match
== NULL
)
1410 free_fail_stack_return (fs
);
1413 prev_idx_match_malloced
= true;
1415 memcpy (prev_idx_match
, pmatch
, sizeof (regmatch_t
) * nmatch
);
1417 for (idx
= pmatch
[0].rm_so
; idx
<= pmatch
[0].rm_eo
;)
1419 update_regs (dfa
, pmatch
, prev_idx_match
, cur_node
, idx
, nmatch
);
1421 if (idx
== pmatch
[0].rm_eo
&& cur_node
== mctx
->last_node
)
1426 for (reg_idx
= 0; reg_idx
< nmatch
; ++reg_idx
)
1427 if (pmatch
[reg_idx
].rm_so
> -1 && pmatch
[reg_idx
].rm_eo
== -1)
1429 if (reg_idx
== nmatch
)
1431 re_node_set_free (&eps_via_nodes
);
1432 if (prev_idx_match_malloced
)
1433 re_free (prev_idx_match
);
1434 return free_fail_stack_return (fs
);
1436 cur_node
= pop_fail_stack (fs
, &idx
, nmatch
, pmatch
,
1441 re_node_set_free (&eps_via_nodes
);
1442 if (prev_idx_match_malloced
)
1443 re_free (prev_idx_match
);
1448 /* Proceed to next node. */
1449 cur_node
= proceed_next_node (mctx
, nmatch
, pmatch
, &idx
, cur_node
,
1450 &eps_via_nodes
, fs
);
1452 if (BE (cur_node
< 0, 0))
1454 if (BE (cur_node
== -2, 0))
1456 re_node_set_free (&eps_via_nodes
);
1457 if (prev_idx_match_malloced
)
1458 re_free (prev_idx_match
);
1459 free_fail_stack_return (fs
);
1463 cur_node
= pop_fail_stack (fs
, &idx
, nmatch
, pmatch
,
1467 re_node_set_free (&eps_via_nodes
);
1468 if (prev_idx_match_malloced
)
1469 re_free (prev_idx_match
);
1474 re_node_set_free (&eps_via_nodes
);
1475 if (prev_idx_match_malloced
)
1476 re_free (prev_idx_match
);
1477 return free_fail_stack_return (fs
);
1480 static reg_errcode_t
1481 free_fail_stack_return (struct re_fail_stack_t
*fs
)
1486 for (fs_idx
= 0; fs_idx
< fs
->num
; ++fs_idx
)
1488 re_node_set_free (&fs
->stack
[fs_idx
].eps_via_nodes
);
1489 re_free (fs
->stack
[fs_idx
].regs
);
1491 re_free (fs
->stack
);
1497 update_regs (const re_dfa_t
*dfa
, regmatch_t
*pmatch
,
1498 regmatch_t
*prev_idx_match
, Idx cur_node
, Idx cur_idx
, Idx nmatch
)
1500 int type
= dfa
->nodes
[cur_node
].type
;
1501 if (type
== OP_OPEN_SUBEXP
)
1503 Idx reg_num
= dfa
->nodes
[cur_node
].opr
.idx
+ 1;
1505 /* We are at the first node of this sub expression. */
1506 if (reg_num
< nmatch
)
1508 pmatch
[reg_num
].rm_so
= cur_idx
;
1509 pmatch
[reg_num
].rm_eo
= -1;
1512 else if (type
== OP_CLOSE_SUBEXP
)
1514 Idx reg_num
= dfa
->nodes
[cur_node
].opr
.idx
+ 1;
1515 if (reg_num
< nmatch
)
1517 /* We are at the last node of this sub expression. */
1518 if (pmatch
[reg_num
].rm_so
< cur_idx
)
1520 pmatch
[reg_num
].rm_eo
= cur_idx
;
1521 /* This is a non-empty match or we are not inside an optional
1522 subexpression. Accept this right away. */
1523 memcpy (prev_idx_match
, pmatch
, sizeof (regmatch_t
) * nmatch
);
1527 if (dfa
->nodes
[cur_node
].opt_subexp
1528 && prev_idx_match
[reg_num
].rm_so
!= -1)
1529 /* We transited through an empty match for an optional
1530 subexpression, like (a?)*, and this is not the subexp's
1531 first match. Copy back the old content of the registers
1532 so that matches of an inner subexpression are undone as
1533 well, like in ((a?))*. */
1534 memcpy (pmatch
, prev_idx_match
, sizeof (regmatch_t
) * nmatch
);
1536 /* We completed a subexpression, but it may be part of
1537 an optional one, so do not update PREV_IDX_MATCH. */
1538 pmatch
[reg_num
].rm_eo
= cur_idx
;
1544 /* This function checks the STATE_LOG from the SCTX->last_str_idx to 0
1545 and sift the nodes in each states according to the following rules.
1546 Updated state_log will be wrote to STATE_LOG.
1548 Rules: We throw away the Node 'a' in the STATE_LOG[STR_IDX] if...
1549 1. When STR_IDX == MATCH_LAST(the last index in the state_log):
1550 If 'a' isn't the LAST_NODE and 'a' can't epsilon transit to
1551 the LAST_NODE, we throw away the node 'a'.
1552 2. When 0 <= STR_IDX < MATCH_LAST and 'a' accepts
1553 string 's' and transit to 'b':
1554 i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
1556 ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
1557 thrown away, we throw away the node 'a'.
1558 3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b':
1559 i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
1561 ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away,
1562 we throw away the node 'a'. */
1564 #define STATE_NODE_CONTAINS(state,node) \
1565 ((state) != NULL && re_node_set_contains (&(state)->nodes, node))
1567 static reg_errcode_t
1568 sift_states_backward (const re_match_context_t
*mctx
, re_sift_context_t
*sctx
)
1572 Idx str_idx
= sctx
->last_str_idx
;
1573 re_node_set cur_dest
;
1576 assert (mctx
->state_log
!= NULL
&& mctx
->state_log
[str_idx
] != NULL
);
1579 /* Build sifted state_log[str_idx]. It has the nodes which can epsilon
1580 transit to the last_node and the last_node itself. */
1581 err
= re_node_set_init_1 (&cur_dest
, sctx
->last_node
);
1582 if (BE (err
!= REG_NOERROR
, 0))
1584 err
= update_cur_sifted_state (mctx
, sctx
, str_idx
, &cur_dest
);
1585 if (BE (err
!= REG_NOERROR
, 0))
1588 /* Then check each states in the state_log. */
1591 /* Update counters. */
1592 null_cnt
= (sctx
->sifted_states
[str_idx
] == NULL
) ? null_cnt
+ 1 : 0;
1593 if (null_cnt
> mctx
->max_mb_elem_len
)
1595 memset (sctx
->sifted_states
, '\0',
1596 sizeof (re_dfastate_t
*) * str_idx
);
1597 re_node_set_free (&cur_dest
);
1600 re_node_set_empty (&cur_dest
);
1603 if (mctx
->state_log
[str_idx
])
1605 err
= build_sifted_states (mctx
, sctx
, str_idx
, &cur_dest
);
1606 if (BE (err
!= REG_NOERROR
, 0))
1610 /* Add all the nodes which satisfy the following conditions:
1611 - It can epsilon transit to a node in CUR_DEST.
1613 And update state_log. */
1614 err
= update_cur_sifted_state (mctx
, sctx
, str_idx
, &cur_dest
);
1615 if (BE (err
!= REG_NOERROR
, 0))
1620 re_node_set_free (&cur_dest
);
1624 static reg_errcode_t
1625 __attribute_warn_unused_result__
1626 build_sifted_states (const re_match_context_t
*mctx
, re_sift_context_t
*sctx
,
1627 Idx str_idx
, re_node_set
*cur_dest
)
1629 const re_dfa_t
*const dfa
= mctx
->dfa
;
1630 const re_node_set
*cur_src
= &mctx
->state_log
[str_idx
]->non_eps_nodes
;
1633 /* Then build the next sifted state.
1634 We build the next sifted state on 'cur_dest', and update
1635 'sifted_states[str_idx]' with 'cur_dest'.
1637 'cur_dest' is the sifted state from 'state_log[str_idx + 1]'.
1638 'cur_src' points the node_set of the old 'state_log[str_idx]'
1639 (with the epsilon nodes pre-filtered out). */
1640 for (i
= 0; i
< cur_src
->nelem
; i
++)
1642 Idx prev_node
= cur_src
->elems
[i
];
1647 re_token_type_t type
= dfa
->nodes
[prev_node
].type
;
1648 assert (!IS_EPSILON_NODE (type
));
1650 #ifdef RE_ENABLE_I18N
1651 /* If the node may accept "multi byte". */
1652 if (dfa
->nodes
[prev_node
].accept_mb
)
1653 naccepted
= sift_states_iter_mb (mctx
, sctx
, prev_node
,
1654 str_idx
, sctx
->last_str_idx
);
1655 #endif /* RE_ENABLE_I18N */
1657 /* We don't check backreferences here.
1658 See update_cur_sifted_state(). */
1660 && check_node_accept (mctx
, dfa
->nodes
+ prev_node
, str_idx
)
1661 && STATE_NODE_CONTAINS (sctx
->sifted_states
[str_idx
+ 1],
1662 dfa
->nexts
[prev_node
]))
1668 if (sctx
->limits
.nelem
)
1670 Idx to_idx
= str_idx
+ naccepted
;
1671 if (check_dst_limits (mctx
, &sctx
->limits
,
1672 dfa
->nexts
[prev_node
], to_idx
,
1673 prev_node
, str_idx
))
1676 ok
= re_node_set_insert (cur_dest
, prev_node
);
1684 /* Helper functions. */
1686 static reg_errcode_t
1687 clean_state_log_if_needed (re_match_context_t
*mctx
, Idx next_state_log_idx
)
1689 Idx top
= mctx
->state_log_top
;
1691 if ((next_state_log_idx
>= mctx
->input
.bufs_len
1692 && mctx
->input
.bufs_len
< mctx
->input
.len
)
1693 || (next_state_log_idx
>= mctx
->input
.valid_len
1694 && mctx
->input
.valid_len
< mctx
->input
.len
))
1697 err
= extend_buffers (mctx
, next_state_log_idx
+ 1);
1698 if (BE (err
!= REG_NOERROR
, 0))
1702 if (top
< next_state_log_idx
)
1704 memset (mctx
->state_log
+ top
+ 1, '\0',
1705 sizeof (re_dfastate_t
*) * (next_state_log_idx
- top
));
1706 mctx
->state_log_top
= next_state_log_idx
;
1711 static reg_errcode_t
1712 merge_state_array (const re_dfa_t
*dfa
, re_dfastate_t
**dst
,
1713 re_dfastate_t
**src
, Idx num
)
1717 for (st_idx
= 0; st_idx
< num
; ++st_idx
)
1719 if (dst
[st_idx
] == NULL
)
1720 dst
[st_idx
] = src
[st_idx
];
1721 else if (src
[st_idx
] != NULL
)
1723 re_node_set merged_set
;
1724 err
= re_node_set_init_union (&merged_set
, &dst
[st_idx
]->nodes
,
1725 &src
[st_idx
]->nodes
);
1726 if (BE (err
!= REG_NOERROR
, 0))
1728 dst
[st_idx
] = re_acquire_state (&err
, dfa
, &merged_set
);
1729 re_node_set_free (&merged_set
);
1730 if (BE (err
!= REG_NOERROR
, 0))
1737 static reg_errcode_t
1738 update_cur_sifted_state (const re_match_context_t
*mctx
,
1739 re_sift_context_t
*sctx
, Idx str_idx
,
1740 re_node_set
*dest_nodes
)
1742 const re_dfa_t
*const dfa
= mctx
->dfa
;
1743 reg_errcode_t err
= REG_NOERROR
;
1744 const re_node_set
*candidates
;
1745 candidates
= ((mctx
->state_log
[str_idx
] == NULL
) ? NULL
1746 : &mctx
->state_log
[str_idx
]->nodes
);
1748 if (dest_nodes
->nelem
== 0)
1749 sctx
->sifted_states
[str_idx
] = NULL
;
1754 /* At first, add the nodes which can epsilon transit to a node in
1756 err
= add_epsilon_src_nodes (dfa
, dest_nodes
, candidates
);
1757 if (BE (err
!= REG_NOERROR
, 0))
1760 /* Then, check the limitations in the current sift_context. */
1761 if (sctx
->limits
.nelem
)
1763 err
= check_subexp_limits (dfa
, dest_nodes
, candidates
, &sctx
->limits
,
1764 mctx
->bkref_ents
, str_idx
);
1765 if (BE (err
!= REG_NOERROR
, 0))
1770 sctx
->sifted_states
[str_idx
] = re_acquire_state (&err
, dfa
, dest_nodes
);
1771 if (BE (err
!= REG_NOERROR
, 0))
1775 if (candidates
&& mctx
->state_log
[str_idx
]->has_backref
)
1777 err
= sift_states_bkref (mctx
, sctx
, str_idx
, candidates
);
1778 if (BE (err
!= REG_NOERROR
, 0))
1784 static reg_errcode_t
1785 __attribute_warn_unused_result__
1786 add_epsilon_src_nodes (const re_dfa_t
*dfa
, re_node_set
*dest_nodes
,
1787 const re_node_set
*candidates
)
1789 reg_errcode_t err
= REG_NOERROR
;
1792 re_dfastate_t
*state
= re_acquire_state (&err
, dfa
, dest_nodes
);
1793 if (BE (err
!= REG_NOERROR
, 0))
1796 if (!state
->inveclosure
.alloc
)
1798 err
= re_node_set_alloc (&state
->inveclosure
, dest_nodes
->nelem
);
1799 if (BE (err
!= REG_NOERROR
, 0))
1801 for (i
= 0; i
< dest_nodes
->nelem
; i
++)
1803 err
= re_node_set_merge (&state
->inveclosure
,
1804 dfa
->inveclosures
+ dest_nodes
->elems
[i
]);
1805 if (BE (err
!= REG_NOERROR
, 0))
1809 return re_node_set_add_intersect (dest_nodes
, candidates
,
1810 &state
->inveclosure
);
1813 static reg_errcode_t
1814 sub_epsilon_src_nodes (const re_dfa_t
*dfa
, Idx node
, re_node_set
*dest_nodes
,
1815 const re_node_set
*candidates
)
1819 re_node_set
*inv_eclosure
= dfa
->inveclosures
+ node
;
1820 re_node_set except_nodes
;
1821 re_node_set_init_empty (&except_nodes
);
1822 for (ecl_idx
= 0; ecl_idx
< inv_eclosure
->nelem
; ++ecl_idx
)
1824 Idx cur_node
= inv_eclosure
->elems
[ecl_idx
];
1825 if (cur_node
== node
)
1827 if (IS_EPSILON_NODE (dfa
->nodes
[cur_node
].type
))
1829 Idx edst1
= dfa
->edests
[cur_node
].elems
[0];
1830 Idx edst2
= ((dfa
->edests
[cur_node
].nelem
> 1)
1831 ? dfa
->edests
[cur_node
].elems
[1] : -1);
1832 if ((!re_node_set_contains (inv_eclosure
, edst1
)
1833 && re_node_set_contains (dest_nodes
, edst1
))
1835 && !re_node_set_contains (inv_eclosure
, edst2
)
1836 && re_node_set_contains (dest_nodes
, edst2
)))
1838 err
= re_node_set_add_intersect (&except_nodes
, candidates
,
1839 dfa
->inveclosures
+ cur_node
);
1840 if (BE (err
!= REG_NOERROR
, 0))
1842 re_node_set_free (&except_nodes
);
1848 for (ecl_idx
= 0; ecl_idx
< inv_eclosure
->nelem
; ++ecl_idx
)
1850 Idx cur_node
= inv_eclosure
->elems
[ecl_idx
];
1851 if (!re_node_set_contains (&except_nodes
, cur_node
))
1853 Idx idx
= re_node_set_contains (dest_nodes
, cur_node
) - 1;
1854 re_node_set_remove_at (dest_nodes
, idx
);
1857 re_node_set_free (&except_nodes
);
1862 check_dst_limits (const re_match_context_t
*mctx
, const re_node_set
*limits
,
1863 Idx dst_node
, Idx dst_idx
, Idx src_node
, Idx src_idx
)
1865 const re_dfa_t
*const dfa
= mctx
->dfa
;
1866 Idx lim_idx
, src_pos
, dst_pos
;
1868 Idx dst_bkref_idx
= search_cur_bkref_entry (mctx
, dst_idx
);
1869 Idx src_bkref_idx
= search_cur_bkref_entry (mctx
, src_idx
);
1870 for (lim_idx
= 0; lim_idx
< limits
->nelem
; ++lim_idx
)
1873 struct re_backref_cache_entry
*ent
;
1874 ent
= mctx
->bkref_ents
+ limits
->elems
[lim_idx
];
1875 subexp_idx
= dfa
->nodes
[ent
->node
].opr
.idx
;
1877 dst_pos
= check_dst_limits_calc_pos (mctx
, limits
->elems
[lim_idx
],
1878 subexp_idx
, dst_node
, dst_idx
,
1880 src_pos
= check_dst_limits_calc_pos (mctx
, limits
->elems
[lim_idx
],
1881 subexp_idx
, src_node
, src_idx
,
1885 <src> <dst> ( <subexp> )
1886 ( <subexp> ) <src> <dst>
1887 ( <subexp1> <src> <subexp2> <dst> <subexp3> ) */
1888 if (src_pos
== dst_pos
)
1889 continue; /* This is unrelated limitation. */
1897 check_dst_limits_calc_pos_1 (const re_match_context_t
*mctx
, int boundaries
,
1898 Idx subexp_idx
, Idx from_node
, Idx bkref_idx
)
1900 const re_dfa_t
*const dfa
= mctx
->dfa
;
1901 const re_node_set
*eclosures
= dfa
->eclosures
+ from_node
;
1904 /* Else, we are on the boundary: examine the nodes on the epsilon
1906 for (node_idx
= 0; node_idx
< eclosures
->nelem
; ++node_idx
)
1908 Idx node
= eclosures
->elems
[node_idx
];
1909 switch (dfa
->nodes
[node
].type
)
1912 if (bkref_idx
!= -1)
1914 struct re_backref_cache_entry
*ent
= mctx
->bkref_ents
+ bkref_idx
;
1920 if (ent
->node
!= node
)
1923 if (subexp_idx
< BITSET_WORD_BITS
1924 && !(ent
->eps_reachable_subexps_map
1925 & ((bitset_word_t
) 1 << subexp_idx
)))
1928 /* Recurse trying to reach the OP_OPEN_SUBEXP and
1929 OP_CLOSE_SUBEXP cases below. But, if the
1930 destination node is the same node as the source
1931 node, don't recurse because it would cause an
1932 infinite loop: a regex that exhibits this behavior
1934 dst
= dfa
->edests
[node
].elems
[0];
1935 if (dst
== from_node
)
1939 else /* if (boundaries & 2) */
1944 check_dst_limits_calc_pos_1 (mctx
, boundaries
, subexp_idx
,
1946 if (cpos
== -1 /* && (boundaries & 1) */)
1948 if (cpos
== 0 && (boundaries
& 2))
1951 if (subexp_idx
< BITSET_WORD_BITS
)
1952 ent
->eps_reachable_subexps_map
1953 &= ~((bitset_word_t
) 1 << subexp_idx
);
1955 while (ent
++->more
);
1959 case OP_OPEN_SUBEXP
:
1960 if ((boundaries
& 1) && subexp_idx
== dfa
->nodes
[node
].opr
.idx
)
1964 case OP_CLOSE_SUBEXP
:
1965 if ((boundaries
& 2) && subexp_idx
== dfa
->nodes
[node
].opr
.idx
)
1974 return (boundaries
& 2) ? 1 : 0;
1978 check_dst_limits_calc_pos (const re_match_context_t
*mctx
, Idx limit
,
1979 Idx subexp_idx
, Idx from_node
, Idx str_idx
,
1982 struct re_backref_cache_entry
*lim
= mctx
->bkref_ents
+ limit
;
1985 /* If we are outside the range of the subexpression, return -1 or 1. */
1986 if (str_idx
< lim
->subexp_from
)
1989 if (lim
->subexp_to
< str_idx
)
1992 /* If we are within the subexpression, return 0. */
1993 boundaries
= (str_idx
== lim
->subexp_from
);
1994 boundaries
|= (str_idx
== lim
->subexp_to
) << 1;
1995 if (boundaries
== 0)
1998 /* Else, examine epsilon closure. */
1999 return check_dst_limits_calc_pos_1 (mctx
, boundaries
, subexp_idx
,
2000 from_node
, bkref_idx
);
2003 /* Check the limitations of sub expressions LIMITS, and remove the nodes
2004 which are against limitations from DEST_NODES. */
2006 static reg_errcode_t
2007 check_subexp_limits (const re_dfa_t
*dfa
, re_node_set
*dest_nodes
,
2008 const re_node_set
*candidates
, re_node_set
*limits
,
2009 struct re_backref_cache_entry
*bkref_ents
, Idx str_idx
)
2012 Idx node_idx
, lim_idx
;
2014 for (lim_idx
= 0; lim_idx
< limits
->nelem
; ++lim_idx
)
2017 struct re_backref_cache_entry
*ent
;
2018 ent
= bkref_ents
+ limits
->elems
[lim_idx
];
2020 if (str_idx
<= ent
->subexp_from
|| ent
->str_idx
< str_idx
)
2021 continue; /* This is unrelated limitation. */
2023 subexp_idx
= dfa
->nodes
[ent
->node
].opr
.idx
;
2024 if (ent
->subexp_to
== str_idx
)
2028 for (node_idx
= 0; node_idx
< dest_nodes
->nelem
; ++node_idx
)
2030 Idx node
= dest_nodes
->elems
[node_idx
];
2031 re_token_type_t type
= dfa
->nodes
[node
].type
;
2032 if (type
== OP_OPEN_SUBEXP
2033 && subexp_idx
== dfa
->nodes
[node
].opr
.idx
)
2035 else if (type
== OP_CLOSE_SUBEXP
2036 && subexp_idx
== dfa
->nodes
[node
].opr
.idx
)
2040 /* Check the limitation of the open subexpression. */
2041 /* Note that (ent->subexp_to = str_idx != ent->subexp_from). */
2044 err
= sub_epsilon_src_nodes (dfa
, ops_node
, dest_nodes
,
2046 if (BE (err
!= REG_NOERROR
, 0))
2050 /* Check the limitation of the close subexpression. */
2052 for (node_idx
= 0; node_idx
< dest_nodes
->nelem
; ++node_idx
)
2054 Idx node
= dest_nodes
->elems
[node_idx
];
2055 if (!re_node_set_contains (dfa
->inveclosures
+ node
,
2057 && !re_node_set_contains (dfa
->eclosures
+ node
,
2060 /* It is against this limitation.
2061 Remove it form the current sifted state. */
2062 err
= sub_epsilon_src_nodes (dfa
, node
, dest_nodes
,
2064 if (BE (err
!= REG_NOERROR
, 0))
2070 else /* (ent->subexp_to != str_idx) */
2072 for (node_idx
= 0; node_idx
< dest_nodes
->nelem
; ++node_idx
)
2074 Idx node
= dest_nodes
->elems
[node_idx
];
2075 re_token_type_t type
= dfa
->nodes
[node
].type
;
2076 if (type
== OP_CLOSE_SUBEXP
|| type
== OP_OPEN_SUBEXP
)
2078 if (subexp_idx
!= dfa
->nodes
[node
].opr
.idx
)
2080 /* It is against this limitation.
2081 Remove it form the current sifted state. */
2082 err
= sub_epsilon_src_nodes (dfa
, node
, dest_nodes
,
2084 if (BE (err
!= REG_NOERROR
, 0))
2093 static reg_errcode_t
2094 __attribute_warn_unused_result__
2095 sift_states_bkref (const re_match_context_t
*mctx
, re_sift_context_t
*sctx
,
2096 Idx str_idx
, const re_node_set
*candidates
)
2098 const re_dfa_t
*const dfa
= mctx
->dfa
;
2101 re_sift_context_t local_sctx
;
2102 Idx first_idx
= search_cur_bkref_entry (mctx
, str_idx
);
2104 if (first_idx
== -1)
2107 local_sctx
.sifted_states
= NULL
; /* Mark that it hasn't been initialized. */
2109 for (node_idx
= 0; node_idx
< candidates
->nelem
; ++node_idx
)
2112 re_token_type_t type
;
2113 struct re_backref_cache_entry
*entry
;
2114 node
= candidates
->elems
[node_idx
];
2115 type
= dfa
->nodes
[node
].type
;
2116 /* Avoid infinite loop for the REs like "()\1+". */
2117 if (node
== sctx
->last_node
&& str_idx
== sctx
->last_str_idx
)
2119 if (type
!= OP_BACK_REF
)
2122 entry
= mctx
->bkref_ents
+ first_idx
;
2123 enabled_idx
= first_idx
;
2130 re_dfastate_t
*cur_state
;
2132 if (entry
->node
!= node
)
2134 subexp_len
= entry
->subexp_to
- entry
->subexp_from
;
2135 to_idx
= str_idx
+ subexp_len
;
2136 dst_node
= (subexp_len
? dfa
->nexts
[node
]
2137 : dfa
->edests
[node
].elems
[0]);
2139 if (to_idx
> sctx
->last_str_idx
2140 || sctx
->sifted_states
[to_idx
] == NULL
2141 || !STATE_NODE_CONTAINS (sctx
->sifted_states
[to_idx
], dst_node
)
2142 || check_dst_limits (mctx
, &sctx
->limits
, node
,
2143 str_idx
, dst_node
, to_idx
))
2146 if (local_sctx
.sifted_states
== NULL
)
2149 err
= re_node_set_init_copy (&local_sctx
.limits
, &sctx
->limits
);
2150 if (BE (err
!= REG_NOERROR
, 0))
2153 local_sctx
.last_node
= node
;
2154 local_sctx
.last_str_idx
= str_idx
;
2155 ok
= re_node_set_insert (&local_sctx
.limits
, enabled_idx
);
2161 cur_state
= local_sctx
.sifted_states
[str_idx
];
2162 err
= sift_states_backward (mctx
, &local_sctx
);
2163 if (BE (err
!= REG_NOERROR
, 0))
2165 if (sctx
->limited_states
!= NULL
)
2167 err
= merge_state_array (dfa
, sctx
->limited_states
,
2168 local_sctx
.sifted_states
,
2170 if (BE (err
!= REG_NOERROR
, 0))
2173 local_sctx
.sifted_states
[str_idx
] = cur_state
;
2174 re_node_set_remove (&local_sctx
.limits
, enabled_idx
);
2176 /* mctx->bkref_ents may have changed, reload the pointer. */
2177 entry
= mctx
->bkref_ents
+ enabled_idx
;
2179 while (enabled_idx
++, entry
++->more
);
2183 if (local_sctx
.sifted_states
!= NULL
)
2185 re_node_set_free (&local_sctx
.limits
);
2192 #ifdef RE_ENABLE_I18N
2194 sift_states_iter_mb (const re_match_context_t
*mctx
, re_sift_context_t
*sctx
,
2195 Idx node_idx
, Idx str_idx
, Idx max_str_idx
)
2197 const re_dfa_t
*const dfa
= mctx
->dfa
;
2199 /* Check the node can accept "multi byte". */
2200 naccepted
= check_node_accept_bytes (dfa
, node_idx
, &mctx
->input
, str_idx
);
2201 if (naccepted
> 0 && str_idx
+ naccepted
<= max_str_idx
&&
2202 !STATE_NODE_CONTAINS (sctx
->sifted_states
[str_idx
+ naccepted
],
2203 dfa
->nexts
[node_idx
]))
2204 /* The node can't accept the "multi byte", or the
2205 destination was already thrown away, then the node
2206 could't accept the current input "multi byte". */
2208 /* Otherwise, it is sure that the node could accept
2209 'naccepted' bytes input. */
2212 #endif /* RE_ENABLE_I18N */
2215 /* Functions for state transition. */
2217 /* Return the next state to which the current state STATE will transit by
2218 accepting the current input byte, and update STATE_LOG if necessary.
2219 If STATE can accept a multibyte char/collating element/back reference
2220 update the destination of STATE_LOG. */
2222 static re_dfastate_t
*
2223 __attribute_warn_unused_result__
2224 transit_state (reg_errcode_t
*err
, re_match_context_t
*mctx
,
2225 re_dfastate_t
*state
)
2227 re_dfastate_t
**trtable
;
2230 #ifdef RE_ENABLE_I18N
2231 /* If the current state can accept multibyte. */
2232 if (BE (state
->accept_mb
, 0))
2234 *err
= transit_state_mb (mctx
, state
);
2235 if (BE (*err
!= REG_NOERROR
, 0))
2238 #endif /* RE_ENABLE_I18N */
2240 /* Then decide the next state with the single byte. */
2243 /* don't use transition table */
2244 return transit_state_sb (err
, mctx
, state
);
2247 /* Use transition table */
2248 ch
= re_string_fetch_byte (&mctx
->input
);
2251 trtable
= state
->trtable
;
2252 if (BE (trtable
!= NULL
, 1))
2255 trtable
= state
->word_trtable
;
2256 if (BE (trtable
!= NULL
, 1))
2258 unsigned int context
;
2260 = re_string_context_at (&mctx
->input
,
2261 re_string_cur_idx (&mctx
->input
) - 1,
2263 if (IS_WORD_CONTEXT (context
))
2264 return trtable
[ch
+ SBC_MAX
];
2269 if (!build_trtable (mctx
->dfa
, state
))
2275 /* Retry, we now have a transition table. */
2279 /* Update the state_log if we need */
2280 static re_dfastate_t
*
2281 merge_state_with_log (reg_errcode_t
*err
, re_match_context_t
*mctx
,
2282 re_dfastate_t
*next_state
)
2284 const re_dfa_t
*const dfa
= mctx
->dfa
;
2285 Idx cur_idx
= re_string_cur_idx (&mctx
->input
);
2287 if (cur_idx
> mctx
->state_log_top
)
2289 mctx
->state_log
[cur_idx
] = next_state
;
2290 mctx
->state_log_top
= cur_idx
;
2292 else if (mctx
->state_log
[cur_idx
] == 0)
2294 mctx
->state_log
[cur_idx
] = next_state
;
2298 re_dfastate_t
*pstate
;
2299 unsigned int context
;
2300 re_node_set next_nodes
, *log_nodes
, *table_nodes
= NULL
;
2301 /* If (state_log[cur_idx] != 0), it implies that cur_idx is
2302 the destination of a multibyte char/collating element/
2303 back reference. Then the next state is the union set of
2304 these destinations and the results of the transition table. */
2305 pstate
= mctx
->state_log
[cur_idx
];
2306 log_nodes
= pstate
->entrance_nodes
;
2307 if (next_state
!= NULL
)
2309 table_nodes
= next_state
->entrance_nodes
;
2310 *err
= re_node_set_init_union (&next_nodes
, table_nodes
,
2312 if (BE (*err
!= REG_NOERROR
, 0))
2316 next_nodes
= *log_nodes
;
2317 /* Note: We already add the nodes of the initial state,
2318 then we don't need to add them here. */
2320 context
= re_string_context_at (&mctx
->input
,
2321 re_string_cur_idx (&mctx
->input
) - 1,
2323 next_state
= mctx
->state_log
[cur_idx
]
2324 = re_acquire_state_context (err
, dfa
, &next_nodes
, context
);
2325 /* We don't need to check errors here, since the return value of
2326 this function is next_state and ERR is already set. */
2328 if (table_nodes
!= NULL
)
2329 re_node_set_free (&next_nodes
);
2332 if (BE (dfa
->nbackref
, 0) && next_state
!= NULL
)
2334 /* Check OP_OPEN_SUBEXP in the current state in case that we use them
2335 later. We must check them here, since the back references in the
2336 next state might use them. */
2337 *err
= check_subexp_matching_top (mctx
, &next_state
->nodes
,
2339 if (BE (*err
!= REG_NOERROR
, 0))
2342 /* If the next state has back references. */
2343 if (next_state
->has_backref
)
2345 *err
= transit_state_bkref (mctx
, &next_state
->nodes
);
2346 if (BE (*err
!= REG_NOERROR
, 0))
2348 next_state
= mctx
->state_log
[cur_idx
];
2355 /* Skip bytes in the input that correspond to part of a
2356 multi-byte match, then look in the log for a state
2357 from which to restart matching. */
2358 static re_dfastate_t
*
2359 find_recover_state (reg_errcode_t
*err
, re_match_context_t
*mctx
)
2361 re_dfastate_t
*cur_state
;
2364 Idx max
= mctx
->state_log_top
;
2365 Idx cur_str_idx
= re_string_cur_idx (&mctx
->input
);
2369 if (++cur_str_idx
> max
)
2371 re_string_skip_bytes (&mctx
->input
, 1);
2373 while (mctx
->state_log
[cur_str_idx
] == NULL
);
2375 cur_state
= merge_state_with_log (err
, mctx
, NULL
);
2377 while (*err
== REG_NOERROR
&& cur_state
== NULL
);
2381 /* Helper functions for transit_state. */
2383 /* From the node set CUR_NODES, pick up the nodes whose types are
2384 OP_OPEN_SUBEXP and which have corresponding back references in the regular
2385 expression. And register them to use them later for evaluating the
2386 corresponding back references. */
2388 static reg_errcode_t
2389 check_subexp_matching_top (re_match_context_t
*mctx
, re_node_set
*cur_nodes
,
2392 const re_dfa_t
*const dfa
= mctx
->dfa
;
2396 /* TODO: This isn't efficient.
2397 Because there might be more than one nodes whose types are
2398 OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2401 for (node_idx
= 0; node_idx
< cur_nodes
->nelem
; ++node_idx
)
2403 Idx node
= cur_nodes
->elems
[node_idx
];
2404 if (dfa
->nodes
[node
].type
== OP_OPEN_SUBEXP
2405 && dfa
->nodes
[node
].opr
.idx
< BITSET_WORD_BITS
2406 && (dfa
->used_bkref_map
2407 & ((bitset_word_t
) 1 << dfa
->nodes
[node
].opr
.idx
)))
2409 err
= match_ctx_add_subtop (mctx
, node
, str_idx
);
2410 if (BE (err
!= REG_NOERROR
, 0))
2418 /* Return the next state to which the current state STATE will transit by
2419 accepting the current input byte. */
2421 static re_dfastate_t
*
2422 transit_state_sb (reg_errcode_t
*err
, re_match_context_t
*mctx
,
2423 re_dfastate_t
*state
)
2425 const re_dfa_t
*const dfa
= mctx
->dfa
;
2426 re_node_set next_nodes
;
2427 re_dfastate_t
*next_state
;
2428 Idx node_cnt
, cur_str_idx
= re_string_cur_idx (&mctx
->input
);
2429 unsigned int context
;
2431 *err
= re_node_set_alloc (&next_nodes
, state
->nodes
.nelem
+ 1);
2432 if (BE (*err
!= REG_NOERROR
, 0))
2434 for (node_cnt
= 0; node_cnt
< state
->nodes
.nelem
; ++node_cnt
)
2436 Idx cur_node
= state
->nodes
.elems
[node_cnt
];
2437 if (check_node_accept (mctx
, dfa
->nodes
+ cur_node
, cur_str_idx
))
2439 *err
= re_node_set_merge (&next_nodes
,
2440 dfa
->eclosures
+ dfa
->nexts
[cur_node
]);
2441 if (BE (*err
!= REG_NOERROR
, 0))
2443 re_node_set_free (&next_nodes
);
2448 context
= re_string_context_at (&mctx
->input
, cur_str_idx
, mctx
->eflags
);
2449 next_state
= re_acquire_state_context (err
, dfa
, &next_nodes
, context
);
2450 /* We don't need to check errors here, since the return value of
2451 this function is next_state and ERR is already set. */
2453 re_node_set_free (&next_nodes
);
2454 re_string_skip_bytes (&mctx
->input
, 1);
2459 #ifdef RE_ENABLE_I18N
2460 static reg_errcode_t
2461 transit_state_mb (re_match_context_t
*mctx
, re_dfastate_t
*pstate
)
2463 const re_dfa_t
*const dfa
= mctx
->dfa
;
2467 for (i
= 0; i
< pstate
->nodes
.nelem
; ++i
)
2469 re_node_set dest_nodes
, *new_nodes
;
2470 Idx cur_node_idx
= pstate
->nodes
.elems
[i
];
2473 unsigned int context
;
2474 re_dfastate_t
*dest_state
;
2476 if (!dfa
->nodes
[cur_node_idx
].accept_mb
)
2479 if (dfa
->nodes
[cur_node_idx
].constraint
)
2481 context
= re_string_context_at (&mctx
->input
,
2482 re_string_cur_idx (&mctx
->input
),
2484 if (NOT_SATISFY_NEXT_CONSTRAINT (dfa
->nodes
[cur_node_idx
].constraint
,
2489 /* How many bytes the node can accept? */
2490 naccepted
= check_node_accept_bytes (dfa
, cur_node_idx
, &mctx
->input
,
2491 re_string_cur_idx (&mctx
->input
));
2495 /* The node can accepts 'naccepted' bytes. */
2496 dest_idx
= re_string_cur_idx (&mctx
->input
) + naccepted
;
2497 mctx
->max_mb_elem_len
= ((mctx
->max_mb_elem_len
< naccepted
) ? naccepted
2498 : mctx
->max_mb_elem_len
);
2499 err
= clean_state_log_if_needed (mctx
, dest_idx
);
2500 if (BE (err
!= REG_NOERROR
, 0))
2503 assert (dfa
->nexts
[cur_node_idx
] != -1);
2505 new_nodes
= dfa
->eclosures
+ dfa
->nexts
[cur_node_idx
];
2507 dest_state
= mctx
->state_log
[dest_idx
];
2508 if (dest_state
== NULL
)
2509 dest_nodes
= *new_nodes
;
2512 err
= re_node_set_init_union (&dest_nodes
,
2513 dest_state
->entrance_nodes
, new_nodes
);
2514 if (BE (err
!= REG_NOERROR
, 0))
2517 context
= re_string_context_at (&mctx
->input
, dest_idx
- 1,
2519 mctx
->state_log
[dest_idx
]
2520 = re_acquire_state_context (&err
, dfa
, &dest_nodes
, context
);
2521 if (dest_state
!= NULL
)
2522 re_node_set_free (&dest_nodes
);
2523 if (BE (mctx
->state_log
[dest_idx
] == NULL
&& err
!= REG_NOERROR
, 0))
2528 #endif /* RE_ENABLE_I18N */
2530 static reg_errcode_t
2531 transit_state_bkref (re_match_context_t
*mctx
, const re_node_set
*nodes
)
2533 const re_dfa_t
*const dfa
= mctx
->dfa
;
2536 Idx cur_str_idx
= re_string_cur_idx (&mctx
->input
);
2538 for (i
= 0; i
< nodes
->nelem
; ++i
)
2540 Idx dest_str_idx
, prev_nelem
, bkc_idx
;
2541 Idx node_idx
= nodes
->elems
[i
];
2542 unsigned int context
;
2543 const re_token_t
*node
= dfa
->nodes
+ node_idx
;
2544 re_node_set
*new_dest_nodes
;
2546 /* Check whether 'node' is a backreference or not. */
2547 if (node
->type
!= OP_BACK_REF
)
2550 if (node
->constraint
)
2552 context
= re_string_context_at (&mctx
->input
, cur_str_idx
,
2554 if (NOT_SATISFY_NEXT_CONSTRAINT (node
->constraint
, context
))
2558 /* 'node' is a backreference.
2559 Check the substring which the substring matched. */
2560 bkc_idx
= mctx
->nbkref_ents
;
2561 err
= get_subexp (mctx
, node_idx
, cur_str_idx
);
2562 if (BE (err
!= REG_NOERROR
, 0))
2565 /* And add the epsilon closures (which is 'new_dest_nodes') of
2566 the backreference to appropriate state_log. */
2568 assert (dfa
->nexts
[node_idx
] != -1);
2570 for (; bkc_idx
< mctx
->nbkref_ents
; ++bkc_idx
)
2573 re_dfastate_t
*dest_state
;
2574 struct re_backref_cache_entry
*bkref_ent
;
2575 bkref_ent
= mctx
->bkref_ents
+ bkc_idx
;
2576 if (bkref_ent
->node
!= node_idx
|| bkref_ent
->str_idx
!= cur_str_idx
)
2578 subexp_len
= bkref_ent
->subexp_to
- bkref_ent
->subexp_from
;
2579 new_dest_nodes
= (subexp_len
== 0
2580 ? dfa
->eclosures
+ dfa
->edests
[node_idx
].elems
[0]
2581 : dfa
->eclosures
+ dfa
->nexts
[node_idx
]);
2582 dest_str_idx
= (cur_str_idx
+ bkref_ent
->subexp_to
2583 - bkref_ent
->subexp_from
);
2584 context
= re_string_context_at (&mctx
->input
, dest_str_idx
- 1,
2586 dest_state
= mctx
->state_log
[dest_str_idx
];
2587 prev_nelem
= ((mctx
->state_log
[cur_str_idx
] == NULL
) ? 0
2588 : mctx
->state_log
[cur_str_idx
]->nodes
.nelem
);
2589 /* Add 'new_dest_node' to state_log. */
2590 if (dest_state
== NULL
)
2592 mctx
->state_log
[dest_str_idx
]
2593 = re_acquire_state_context (&err
, dfa
, new_dest_nodes
,
2595 if (BE (mctx
->state_log
[dest_str_idx
] == NULL
2596 && err
!= REG_NOERROR
, 0))
2601 re_node_set dest_nodes
;
2602 err
= re_node_set_init_union (&dest_nodes
,
2603 dest_state
->entrance_nodes
,
2605 if (BE (err
!= REG_NOERROR
, 0))
2607 re_node_set_free (&dest_nodes
);
2610 mctx
->state_log
[dest_str_idx
]
2611 = re_acquire_state_context (&err
, dfa
, &dest_nodes
, context
);
2612 re_node_set_free (&dest_nodes
);
2613 if (BE (mctx
->state_log
[dest_str_idx
] == NULL
2614 && err
!= REG_NOERROR
, 0))
2617 /* We need to check recursively if the backreference can epsilon
2620 && mctx
->state_log
[cur_str_idx
]->nodes
.nelem
> prev_nelem
)
2622 err
= check_subexp_matching_top (mctx
, new_dest_nodes
,
2624 if (BE (err
!= REG_NOERROR
, 0))
2626 err
= transit_state_bkref (mctx
, new_dest_nodes
);
2627 if (BE (err
!= REG_NOERROR
, 0))
2637 /* Enumerate all the candidates which the backreference BKREF_NODE can match
2638 at BKREF_STR_IDX, and register them by match_ctx_add_entry().
2639 Note that we might collect inappropriate candidates here.
2640 However, the cost of checking them strictly here is too high, then we
2641 delay these checking for prune_impossible_nodes(). */
2643 static reg_errcode_t
2644 __attribute_warn_unused_result__
2645 get_subexp (re_match_context_t
*mctx
, Idx bkref_node
, Idx bkref_str_idx
)
2647 const re_dfa_t
*const dfa
= mctx
->dfa
;
2648 Idx subexp_num
, sub_top_idx
;
2649 const char *buf
= (const char *) re_string_get_buffer (&mctx
->input
);
2650 /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX. */
2651 Idx cache_idx
= search_cur_bkref_entry (mctx
, bkref_str_idx
);
2652 if (cache_idx
!= -1)
2654 const struct re_backref_cache_entry
*entry
2655 = mctx
->bkref_ents
+ cache_idx
;
2657 if (entry
->node
== bkref_node
)
2658 return REG_NOERROR
; /* We already checked it. */
2659 while (entry
++->more
);
2662 subexp_num
= dfa
->nodes
[bkref_node
].opr
.idx
;
2664 /* For each sub expression */
2665 for (sub_top_idx
= 0; sub_top_idx
< mctx
->nsub_tops
; ++sub_top_idx
)
2668 re_sub_match_top_t
*sub_top
= mctx
->sub_tops
[sub_top_idx
];
2669 re_sub_match_last_t
*sub_last
;
2670 Idx sub_last_idx
, sl_str
, bkref_str_off
;
2672 if (dfa
->nodes
[sub_top
->node
].opr
.idx
!= subexp_num
)
2673 continue; /* It isn't related. */
2675 sl_str
= sub_top
->str_idx
;
2676 bkref_str_off
= bkref_str_idx
;
2677 /* At first, check the last node of sub expressions we already
2679 for (sub_last_idx
= 0; sub_last_idx
< sub_top
->nlasts
; ++sub_last_idx
)
2681 regoff_t sl_str_diff
;
2682 sub_last
= sub_top
->lasts
[sub_last_idx
];
2683 sl_str_diff
= sub_last
->str_idx
- sl_str
;
2684 /* The matched string by the sub expression match with the substring
2685 at the back reference? */
2686 if (sl_str_diff
> 0)
2688 if (BE (bkref_str_off
+ sl_str_diff
> mctx
->input
.valid_len
, 0))
2690 /* Not enough chars for a successful match. */
2691 if (bkref_str_off
+ sl_str_diff
> mctx
->input
.len
)
2694 err
= clean_state_log_if_needed (mctx
,
2697 if (BE (err
!= REG_NOERROR
, 0))
2699 buf
= (const char *) re_string_get_buffer (&mctx
->input
);
2701 if (memcmp (buf
+ bkref_str_off
, buf
+ sl_str
, sl_str_diff
) != 0)
2702 /* We don't need to search this sub expression any more. */
2705 bkref_str_off
+= sl_str_diff
;
2706 sl_str
+= sl_str_diff
;
2707 err
= get_subexp_sub (mctx
, sub_top
, sub_last
, bkref_node
,
2710 /* Reload buf, since the preceding call might have reallocated
2712 buf
= (const char *) re_string_get_buffer (&mctx
->input
);
2714 if (err
== REG_NOMATCH
)
2716 if (BE (err
!= REG_NOERROR
, 0))
2720 if (sub_last_idx
< sub_top
->nlasts
)
2722 if (sub_last_idx
> 0)
2724 /* Then, search for the other last nodes of the sub expression. */
2725 for (; sl_str
<= bkref_str_idx
; ++sl_str
)
2728 regoff_t sl_str_off
;
2729 const re_node_set
*nodes
;
2730 sl_str_off
= sl_str
- sub_top
->str_idx
;
2731 /* The matched string by the sub expression match with the substring
2732 at the back reference? */
2735 if (BE (bkref_str_off
>= mctx
->input
.valid_len
, 0))
2737 /* If we are at the end of the input, we cannot match. */
2738 if (bkref_str_off
>= mctx
->input
.len
)
2741 err
= extend_buffers (mctx
, bkref_str_off
+ 1);
2742 if (BE (err
!= REG_NOERROR
, 0))
2745 buf
= (const char *) re_string_get_buffer (&mctx
->input
);
2747 if (buf
[bkref_str_off
++] != buf
[sl_str
- 1])
2748 break; /* We don't need to search this sub expression
2751 if (mctx
->state_log
[sl_str
] == NULL
)
2753 /* Does this state have a ')' of the sub expression? */
2754 nodes
= &mctx
->state_log
[sl_str
]->nodes
;
2755 cls_node
= find_subexp_node (dfa
, nodes
, subexp_num
,
2759 if (sub_top
->path
== NULL
)
2761 sub_top
->path
= calloc (sizeof (state_array_t
),
2762 sl_str
- sub_top
->str_idx
+ 1);
2763 if (sub_top
->path
== NULL
)
2766 /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node
2767 in the current context? */
2768 err
= check_arrival (mctx
, sub_top
->path
, sub_top
->node
,
2769 sub_top
->str_idx
, cls_node
, sl_str
,
2771 if (err
== REG_NOMATCH
)
2773 if (BE (err
!= REG_NOERROR
, 0))
2775 sub_last
= match_ctx_add_sublast (sub_top
, cls_node
, sl_str
);
2776 if (BE (sub_last
== NULL
, 0))
2778 err
= get_subexp_sub (mctx
, sub_top
, sub_last
, bkref_node
,
2780 if (err
== REG_NOMATCH
)
2787 /* Helper functions for get_subexp(). */
2789 /* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR.
2790 If it can arrive, register the sub expression expressed with SUB_TOP
2793 static reg_errcode_t
2794 get_subexp_sub (re_match_context_t
*mctx
, const re_sub_match_top_t
*sub_top
,
2795 re_sub_match_last_t
*sub_last
, Idx bkref_node
, Idx bkref_str
)
2799 /* Can the subexpression arrive the back reference? */
2800 err
= check_arrival (mctx
, &sub_last
->path
, sub_last
->node
,
2801 sub_last
->str_idx
, bkref_node
, bkref_str
,
2803 if (err
!= REG_NOERROR
)
2805 err
= match_ctx_add_entry (mctx
, bkref_node
, bkref_str
, sub_top
->str_idx
,
2807 if (BE (err
!= REG_NOERROR
, 0))
2809 to_idx
= bkref_str
+ sub_last
->str_idx
- sub_top
->str_idx
;
2810 return clean_state_log_if_needed (mctx
, to_idx
);
2813 /* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX.
2814 Search '(' if FL_OPEN, or search ')' otherwise.
2815 TODO: This function isn't efficient...
2816 Because there might be more than one nodes whose types are
2817 OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2822 find_subexp_node (const re_dfa_t
*dfa
, const re_node_set
*nodes
,
2823 Idx subexp_idx
, int type
)
2826 for (cls_idx
= 0; cls_idx
< nodes
->nelem
; ++cls_idx
)
2828 Idx cls_node
= nodes
->elems
[cls_idx
];
2829 const re_token_t
*node
= dfa
->nodes
+ cls_node
;
2830 if (node
->type
== type
2831 && node
->opr
.idx
== subexp_idx
)
2837 /* Check whether the node TOP_NODE at TOP_STR can arrive to the node
2838 LAST_NODE at LAST_STR. We record the path onto PATH since it will be
2840 Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise. */
2842 static reg_errcode_t
2843 __attribute_warn_unused_result__
2844 check_arrival (re_match_context_t
*mctx
, state_array_t
*path
, Idx top_node
,
2845 Idx top_str
, Idx last_node
, Idx last_str
, int type
)
2847 const re_dfa_t
*const dfa
= mctx
->dfa
;
2848 reg_errcode_t err
= REG_NOERROR
;
2849 Idx subexp_num
, backup_cur_idx
, str_idx
, null_cnt
;
2850 re_dfastate_t
*cur_state
= NULL
;
2851 re_node_set
*cur_nodes
, next_nodes
;
2852 re_dfastate_t
**backup_state_log
;
2853 unsigned int context
;
2855 subexp_num
= dfa
->nodes
[top_node
].opr
.idx
;
2856 /* Extend the buffer if we need. */
2857 if (BE (path
->alloc
< last_str
+ mctx
->max_mb_elem_len
+ 1, 0))
2859 re_dfastate_t
**new_array
;
2860 Idx old_alloc
= path
->alloc
;
2861 Idx incr_alloc
= last_str
+ mctx
->max_mb_elem_len
+ 1;
2863 if (BE (IDX_MAX
- old_alloc
< incr_alloc
, 0))
2865 new_alloc
= old_alloc
+ incr_alloc
;
2866 if (BE (SIZE_MAX
/ sizeof (re_dfastate_t
*) < new_alloc
, 0))
2868 new_array
= re_realloc (path
->array
, re_dfastate_t
*, new_alloc
);
2869 if (BE (new_array
== NULL
, 0))
2871 path
->array
= new_array
;
2872 path
->alloc
= new_alloc
;
2873 memset (new_array
+ old_alloc
, '\0',
2874 sizeof (re_dfastate_t
*) * (path
->alloc
- old_alloc
));
2877 str_idx
= path
->next_idx
? path
->next_idx
: top_str
;
2879 /* Temporary modify MCTX. */
2880 backup_state_log
= mctx
->state_log
;
2881 backup_cur_idx
= mctx
->input
.cur_idx
;
2882 mctx
->state_log
= path
->array
;
2883 mctx
->input
.cur_idx
= str_idx
;
2885 /* Setup initial node set. */
2886 context
= re_string_context_at (&mctx
->input
, str_idx
- 1, mctx
->eflags
);
2887 if (str_idx
== top_str
)
2889 err
= re_node_set_init_1 (&next_nodes
, top_node
);
2890 if (BE (err
!= REG_NOERROR
, 0))
2892 err
= check_arrival_expand_ecl (dfa
, &next_nodes
, subexp_num
, type
);
2893 if (BE (err
!= REG_NOERROR
, 0))
2895 re_node_set_free (&next_nodes
);
2901 cur_state
= mctx
->state_log
[str_idx
];
2902 if (cur_state
&& cur_state
->has_backref
)
2904 err
= re_node_set_init_copy (&next_nodes
, &cur_state
->nodes
);
2905 if (BE (err
!= REG_NOERROR
, 0))
2909 re_node_set_init_empty (&next_nodes
);
2911 if (str_idx
== top_str
|| (cur_state
&& cur_state
->has_backref
))
2913 if (next_nodes
.nelem
)
2915 err
= expand_bkref_cache (mctx
, &next_nodes
, str_idx
,
2917 if (BE (err
!= REG_NOERROR
, 0))
2919 re_node_set_free (&next_nodes
);
2923 cur_state
= re_acquire_state_context (&err
, dfa
, &next_nodes
, context
);
2924 if (BE (cur_state
== NULL
&& err
!= REG_NOERROR
, 0))
2926 re_node_set_free (&next_nodes
);
2929 mctx
->state_log
[str_idx
] = cur_state
;
2932 for (null_cnt
= 0; str_idx
< last_str
&& null_cnt
<= mctx
->max_mb_elem_len
;)
2934 re_node_set_empty (&next_nodes
);
2935 if (mctx
->state_log
[str_idx
+ 1])
2937 err
= re_node_set_merge (&next_nodes
,
2938 &mctx
->state_log
[str_idx
+ 1]->nodes
);
2939 if (BE (err
!= REG_NOERROR
, 0))
2941 re_node_set_free (&next_nodes
);
2947 err
= check_arrival_add_next_nodes (mctx
, str_idx
,
2948 &cur_state
->non_eps_nodes
,
2950 if (BE (err
!= REG_NOERROR
, 0))
2952 re_node_set_free (&next_nodes
);
2957 if (next_nodes
.nelem
)
2959 err
= check_arrival_expand_ecl (dfa
, &next_nodes
, subexp_num
, type
);
2960 if (BE (err
!= REG_NOERROR
, 0))
2962 re_node_set_free (&next_nodes
);
2965 err
= expand_bkref_cache (mctx
, &next_nodes
, str_idx
,
2967 if (BE (err
!= REG_NOERROR
, 0))
2969 re_node_set_free (&next_nodes
);
2973 context
= re_string_context_at (&mctx
->input
, str_idx
- 1, mctx
->eflags
);
2974 cur_state
= re_acquire_state_context (&err
, dfa
, &next_nodes
, context
);
2975 if (BE (cur_state
== NULL
&& err
!= REG_NOERROR
, 0))
2977 re_node_set_free (&next_nodes
);
2980 mctx
->state_log
[str_idx
] = cur_state
;
2981 null_cnt
= cur_state
== NULL
? null_cnt
+ 1 : 0;
2983 re_node_set_free (&next_nodes
);
2984 cur_nodes
= (mctx
->state_log
[last_str
] == NULL
? NULL
2985 : &mctx
->state_log
[last_str
]->nodes
);
2986 path
->next_idx
= str_idx
;
2989 mctx
->state_log
= backup_state_log
;
2990 mctx
->input
.cur_idx
= backup_cur_idx
;
2992 /* Then check the current node set has the node LAST_NODE. */
2993 if (cur_nodes
!= NULL
&& re_node_set_contains (cur_nodes
, last_node
))
2999 /* Helper functions for check_arrival. */
3001 /* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them
3003 TODO: This function is similar to the functions transit_state*(),
3004 however this function has many additional works.
3005 Can't we unify them? */
3007 static reg_errcode_t
3008 __attribute_warn_unused_result__
3009 check_arrival_add_next_nodes (re_match_context_t
*mctx
, Idx str_idx
,
3010 re_node_set
*cur_nodes
, re_node_set
*next_nodes
)
3012 const re_dfa_t
*const dfa
= mctx
->dfa
;
3015 #ifdef RE_ENABLE_I18N
3016 reg_errcode_t err
= REG_NOERROR
;
3018 re_node_set union_set
;
3019 re_node_set_init_empty (&union_set
);
3020 for (cur_idx
= 0; cur_idx
< cur_nodes
->nelem
; ++cur_idx
)
3023 Idx cur_node
= cur_nodes
->elems
[cur_idx
];
3025 re_token_type_t type
= dfa
->nodes
[cur_node
].type
;
3026 assert (!IS_EPSILON_NODE (type
));
3028 #ifdef RE_ENABLE_I18N
3029 /* If the node may accept "multi byte". */
3030 if (dfa
->nodes
[cur_node
].accept_mb
)
3032 naccepted
= check_node_accept_bytes (dfa
, cur_node
, &mctx
->input
,
3036 re_dfastate_t
*dest_state
;
3037 Idx next_node
= dfa
->nexts
[cur_node
];
3038 Idx next_idx
= str_idx
+ naccepted
;
3039 dest_state
= mctx
->state_log
[next_idx
];
3040 re_node_set_empty (&union_set
);
3043 err
= re_node_set_merge (&union_set
, &dest_state
->nodes
);
3044 if (BE (err
!= REG_NOERROR
, 0))
3046 re_node_set_free (&union_set
);
3050 ok
= re_node_set_insert (&union_set
, next_node
);
3053 re_node_set_free (&union_set
);
3056 mctx
->state_log
[next_idx
] = re_acquire_state (&err
, dfa
,
3058 if (BE (mctx
->state_log
[next_idx
] == NULL
3059 && err
!= REG_NOERROR
, 0))
3061 re_node_set_free (&union_set
);
3066 #endif /* RE_ENABLE_I18N */
3068 || check_node_accept (mctx
, dfa
->nodes
+ cur_node
, str_idx
))
3070 ok
= re_node_set_insert (next_nodes
, dfa
->nexts
[cur_node
]);
3073 re_node_set_free (&union_set
);
3078 re_node_set_free (&union_set
);
3082 /* For all the nodes in CUR_NODES, add the epsilon closures of them to
3083 CUR_NODES, however exclude the nodes which are:
3084 - inside the sub expression whose number is EX_SUBEXP, if FL_OPEN.
3085 - out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN.
3088 static reg_errcode_t
3089 check_arrival_expand_ecl (const re_dfa_t
*dfa
, re_node_set
*cur_nodes
,
3090 Idx ex_subexp
, int type
)
3093 Idx idx
, outside_node
;
3094 re_node_set new_nodes
;
3096 assert (cur_nodes
->nelem
);
3098 err
= re_node_set_alloc (&new_nodes
, cur_nodes
->nelem
);
3099 if (BE (err
!= REG_NOERROR
, 0))
3101 /* Create a new node set NEW_NODES with the nodes which are epsilon
3102 closures of the node in CUR_NODES. */
3104 for (idx
= 0; idx
< cur_nodes
->nelem
; ++idx
)
3106 Idx cur_node
= cur_nodes
->elems
[idx
];
3107 const re_node_set
*eclosure
= dfa
->eclosures
+ cur_node
;
3108 outside_node
= find_subexp_node (dfa
, eclosure
, ex_subexp
, type
);
3109 if (outside_node
== -1)
3111 /* There are no problematic nodes, just merge them. */
3112 err
= re_node_set_merge (&new_nodes
, eclosure
);
3113 if (BE (err
!= REG_NOERROR
, 0))
3115 re_node_set_free (&new_nodes
);
3121 /* There are problematic nodes, re-calculate incrementally. */
3122 err
= check_arrival_expand_ecl_sub (dfa
, &new_nodes
, cur_node
,
3124 if (BE (err
!= REG_NOERROR
, 0))
3126 re_node_set_free (&new_nodes
);
3131 re_node_set_free (cur_nodes
);
3132 *cur_nodes
= new_nodes
;
3136 /* Helper function for check_arrival_expand_ecl.
3137 Check incrementally the epsilon closure of TARGET, and if it isn't
3138 problematic append it to DST_NODES. */
3140 static reg_errcode_t
3141 __attribute_warn_unused_result__
3142 check_arrival_expand_ecl_sub (const re_dfa_t
*dfa
, re_node_set
*dst_nodes
,
3143 Idx target
, Idx ex_subexp
, int type
)
3146 for (cur_node
= target
; !re_node_set_contains (dst_nodes
, cur_node
);)
3150 if (dfa
->nodes
[cur_node
].type
== type
3151 && dfa
->nodes
[cur_node
].opr
.idx
== ex_subexp
)
3153 if (type
== OP_CLOSE_SUBEXP
)
3155 ok
= re_node_set_insert (dst_nodes
, cur_node
);
3161 ok
= re_node_set_insert (dst_nodes
, cur_node
);
3164 if (dfa
->edests
[cur_node
].nelem
== 0)
3166 if (dfa
->edests
[cur_node
].nelem
== 2)
3169 err
= check_arrival_expand_ecl_sub (dfa
, dst_nodes
,
3170 dfa
->edests
[cur_node
].elems
[1],
3172 if (BE (err
!= REG_NOERROR
, 0))
3175 cur_node
= dfa
->edests
[cur_node
].elems
[0];
3181 /* For all the back references in the current state, calculate the
3182 destination of the back references by the appropriate entry
3183 in MCTX->BKREF_ENTS. */
3185 static reg_errcode_t
3186 __attribute_warn_unused_result__
3187 expand_bkref_cache (re_match_context_t
*mctx
, re_node_set
*cur_nodes
,
3188 Idx cur_str
, Idx subexp_num
, int type
)
3190 const re_dfa_t
*const dfa
= mctx
->dfa
;
3192 Idx cache_idx_start
= search_cur_bkref_entry (mctx
, cur_str
);
3193 struct re_backref_cache_entry
*ent
;
3195 if (cache_idx_start
== -1)
3199 ent
= mctx
->bkref_ents
+ cache_idx_start
;
3202 Idx to_idx
, next_node
;
3204 /* Is this entry ENT is appropriate? */
3205 if (!re_node_set_contains (cur_nodes
, ent
->node
))
3208 to_idx
= cur_str
+ ent
->subexp_to
- ent
->subexp_from
;
3209 /* Calculate the destination of the back reference, and append it
3210 to MCTX->STATE_LOG. */
3211 if (to_idx
== cur_str
)
3213 /* The backreference did epsilon transit, we must re-check all the
3214 node in the current state. */
3215 re_node_set new_dests
;
3216 reg_errcode_t err2
, err3
;
3217 next_node
= dfa
->edests
[ent
->node
].elems
[0];
3218 if (re_node_set_contains (cur_nodes
, next_node
))
3220 err
= re_node_set_init_1 (&new_dests
, next_node
);
3221 err2
= check_arrival_expand_ecl (dfa
, &new_dests
, subexp_num
, type
);
3222 err3
= re_node_set_merge (cur_nodes
, &new_dests
);
3223 re_node_set_free (&new_dests
);
3224 if (BE (err
!= REG_NOERROR
|| err2
!= REG_NOERROR
3225 || err3
!= REG_NOERROR
, 0))
3227 err
= (err
!= REG_NOERROR
? err
3228 : (err2
!= REG_NOERROR
? err2
: err3
));
3231 /* TODO: It is still inefficient... */
3236 re_node_set union_set
;
3237 next_node
= dfa
->nexts
[ent
->node
];
3238 if (mctx
->state_log
[to_idx
])
3241 if (re_node_set_contains (&mctx
->state_log
[to_idx
]->nodes
,
3244 err
= re_node_set_init_copy (&union_set
,
3245 &mctx
->state_log
[to_idx
]->nodes
);
3246 ok
= re_node_set_insert (&union_set
, next_node
);
3247 if (BE (err
!= REG_NOERROR
|| ! ok
, 0))
3249 re_node_set_free (&union_set
);
3250 err
= err
!= REG_NOERROR
? err
: REG_ESPACE
;
3256 err
= re_node_set_init_1 (&union_set
, next_node
);
3257 if (BE (err
!= REG_NOERROR
, 0))
3260 mctx
->state_log
[to_idx
] = re_acquire_state (&err
, dfa
, &union_set
);
3261 re_node_set_free (&union_set
);
3262 if (BE (mctx
->state_log
[to_idx
] == NULL
3263 && err
!= REG_NOERROR
, 0))
3267 while (ent
++->more
);
3271 /* Build transition table for the state.
3272 Return true if successful. */
3275 build_trtable (const re_dfa_t
*dfa
, re_dfastate_t
*state
)
3280 bool need_word_trtable
= false;
3281 bitset_word_t elem
, mask
;
3282 bool dests_node_malloced
= false;
3283 bool dest_states_malloced
= false;
3284 Idx ndests
; /* Number of the destination states from 'state'. */
3285 re_dfastate_t
**trtable
;
3286 re_dfastate_t
**dest_states
= NULL
, **dest_states_word
, **dest_states_nl
;
3287 re_node_set follows
, *dests_node
;
3289 bitset_t acceptable
;
3293 re_node_set dests_node
[SBC_MAX
];
3294 bitset_t dests_ch
[SBC_MAX
];
3297 /* We build DFA states which corresponds to the destination nodes
3298 from 'state'. 'dests_node[i]' represents the nodes which i-th
3299 destination state contains, and 'dests_ch[i]' represents the
3300 characters which i-th destination state accepts. */
3301 if (__libc_use_alloca (sizeof (struct dests_alloc
)))
3302 dests_alloc
= (struct dests_alloc
*) alloca (sizeof (struct dests_alloc
));
3305 dests_alloc
= re_malloc (struct dests_alloc
, 1);
3306 if (BE (dests_alloc
== NULL
, 0))
3308 dests_node_malloced
= true;
3310 dests_node
= dests_alloc
->dests_node
;
3311 dests_ch
= dests_alloc
->dests_ch
;
3313 /* Initialize transition table. */
3314 state
->word_trtable
= state
->trtable
= NULL
;
3316 /* At first, group all nodes belonging to 'state' into several
3318 ndests
= group_nodes_into_DFAstates (dfa
, state
, dests_node
, dests_ch
);
3319 if (BE (ndests
<= 0, 0))
3321 if (dests_node_malloced
)
3322 re_free (dests_alloc
);
3323 /* Return false in case of an error, true otherwise. */
3326 state
->trtable
= (re_dfastate_t
**)
3327 calloc (sizeof (re_dfastate_t
*), SBC_MAX
);
3328 if (BE (state
->trtable
== NULL
, 0))
3335 err
= re_node_set_alloc (&follows
, ndests
+ 1);
3336 if (BE (err
!= REG_NOERROR
, 0))
3339 /* Avoid arithmetic overflow in size calculation. */
3340 if (BE ((((SIZE_MAX
- (sizeof (re_node_set
) + sizeof (bitset_t
)) * SBC_MAX
)
3341 / (3 * sizeof (re_dfastate_t
*)))
3346 if (__libc_use_alloca ((sizeof (re_node_set
) + sizeof (bitset_t
)) * SBC_MAX
3347 + ndests
* 3 * sizeof (re_dfastate_t
*)))
3348 dest_states
= (re_dfastate_t
**)
3349 alloca (ndests
* 3 * sizeof (re_dfastate_t
*));
3352 dest_states
= re_malloc (re_dfastate_t
*, ndests
* 3);
3353 if (BE (dest_states
== NULL
, 0))
3356 if (dest_states_malloced
)
3357 re_free (dest_states
);
3358 re_node_set_free (&follows
);
3359 for (i
= 0; i
< ndests
; ++i
)
3360 re_node_set_free (dests_node
+ i
);
3361 if (dests_node_malloced
)
3362 re_free (dests_alloc
);
3365 dest_states_malloced
= true;
3367 dest_states_word
= dest_states
+ ndests
;
3368 dest_states_nl
= dest_states_word
+ ndests
;
3369 bitset_empty (acceptable
);
3371 /* Then build the states for all destinations. */
3372 for (i
= 0; i
< ndests
; ++i
)
3375 re_node_set_empty (&follows
);
3376 /* Merge the follows of this destination states. */
3377 for (j
= 0; j
< dests_node
[i
].nelem
; ++j
)
3379 next_node
= dfa
->nexts
[dests_node
[i
].elems
[j
]];
3380 if (next_node
!= -1)
3382 err
= re_node_set_merge (&follows
, dfa
->eclosures
+ next_node
);
3383 if (BE (err
!= REG_NOERROR
, 0))
3387 dest_states
[i
] = re_acquire_state_context (&err
, dfa
, &follows
, 0);
3388 if (BE (dest_states
[i
] == NULL
&& err
!= REG_NOERROR
, 0))
3390 /* If the new state has context constraint,
3391 build appropriate states for these contexts. */
3392 if (dest_states
[i
]->has_constraint
)
3394 dest_states_word
[i
] = re_acquire_state_context (&err
, dfa
, &follows
,
3396 if (BE (dest_states_word
[i
] == NULL
&& err
!= REG_NOERROR
, 0))
3399 if (dest_states
[i
] != dest_states_word
[i
] && dfa
->mb_cur_max
> 1)
3400 need_word_trtable
= true;
3402 dest_states_nl
[i
] = re_acquire_state_context (&err
, dfa
, &follows
,
3404 if (BE (dest_states_nl
[i
] == NULL
&& err
!= REG_NOERROR
, 0))
3409 dest_states_word
[i
] = dest_states
[i
];
3410 dest_states_nl
[i
] = dest_states
[i
];
3412 bitset_merge (acceptable
, dests_ch
[i
]);
3415 if (!BE (need_word_trtable
, 0))
3417 /* We don't care about whether the following character is a word
3418 character, or we are in a single-byte character set so we can
3419 discern by looking at the character code: allocate a
3420 256-entry transition table. */
3421 trtable
= state
->trtable
=
3422 (re_dfastate_t
**) calloc (sizeof (re_dfastate_t
*), SBC_MAX
);
3423 if (BE (trtable
== NULL
, 0))
3426 /* For all characters ch...: */
3427 for (i
= 0; i
< BITSET_WORDS
; ++i
)
3428 for (ch
= i
* BITSET_WORD_BITS
, elem
= acceptable
[i
], mask
= 1;
3430 mask
<<= 1, elem
>>= 1, ++ch
)
3431 if (BE (elem
& 1, 0))
3433 /* There must be exactly one destination which accepts
3434 character ch. See group_nodes_into_DFAstates. */
3435 for (j
= 0; (dests_ch
[j
][i
] & mask
) == 0; ++j
)
3438 /* j-th destination accepts the word character ch. */
3439 if (dfa
->word_char
[i
] & mask
)
3440 trtable
[ch
] = dest_states_word
[j
];
3442 trtable
[ch
] = dest_states
[j
];
3447 /* We care about whether the following character is a word
3448 character, and we are in a multi-byte character set: discern
3449 by looking at the character code: build two 256-entry
3450 transition tables, one starting at trtable[0] and one
3451 starting at trtable[SBC_MAX]. */
3452 trtable
= state
->word_trtable
=
3453 (re_dfastate_t
**) calloc (sizeof (re_dfastate_t
*), 2 * SBC_MAX
);
3454 if (BE (trtable
== NULL
, 0))
3457 /* For all characters ch...: */
3458 for (i
= 0; i
< BITSET_WORDS
; ++i
)
3459 for (ch
= i
* BITSET_WORD_BITS
, elem
= acceptable
[i
], mask
= 1;
3461 mask
<<= 1, elem
>>= 1, ++ch
)
3462 if (BE (elem
& 1, 0))
3464 /* There must be exactly one destination which accepts
3465 character ch. See group_nodes_into_DFAstates. */
3466 for (j
= 0; (dests_ch
[j
][i
] & mask
) == 0; ++j
)
3469 /* j-th destination accepts the word character ch. */
3470 trtable
[ch
] = dest_states
[j
];
3471 trtable
[ch
+ SBC_MAX
] = dest_states_word
[j
];
3476 if (bitset_contain (acceptable
, NEWLINE_CHAR
))
3478 /* The current state accepts newline character. */
3479 for (j
= 0; j
< ndests
; ++j
)
3480 if (bitset_contain (dests_ch
[j
], NEWLINE_CHAR
))
3482 /* k-th destination accepts newline character. */
3483 trtable
[NEWLINE_CHAR
] = dest_states_nl
[j
];
3484 if (need_word_trtable
)
3485 trtable
[NEWLINE_CHAR
+ SBC_MAX
] = dest_states_nl
[j
];
3486 /* There must be only one destination which accepts
3487 newline. See group_nodes_into_DFAstates. */
3492 if (dest_states_malloced
)
3493 re_free (dest_states
);
3495 re_node_set_free (&follows
);
3496 for (i
= 0; i
< ndests
; ++i
)
3497 re_node_set_free (dests_node
+ i
);
3499 if (dests_node_malloced
)
3500 re_free (dests_alloc
);
3505 /* Group all nodes belonging to STATE into several destinations.
3506 Then for all destinations, set the nodes belonging to the destination
3507 to DESTS_NODE[i] and set the characters accepted by the destination
3508 to DEST_CH[i]. This function return the number of destinations. */
3511 group_nodes_into_DFAstates (const re_dfa_t
*dfa
, const re_dfastate_t
*state
,
3512 re_node_set
*dests_node
, bitset_t
*dests_ch
)
3517 Idx ndests
; /* Number of the destinations from 'state'. */
3518 bitset_t accepts
; /* Characters a node can accept. */
3519 const re_node_set
*cur_nodes
= &state
->nodes
;
3520 bitset_empty (accepts
);
3523 /* For all the nodes belonging to 'state', */
3524 for (i
= 0; i
< cur_nodes
->nelem
; ++i
)
3526 re_token_t
*node
= &dfa
->nodes
[cur_nodes
->elems
[i
]];
3527 re_token_type_t type
= node
->type
;
3528 unsigned int constraint
= node
->constraint
;
3530 /* Enumerate all single byte character this node can accept. */
3531 if (type
== CHARACTER
)
3532 bitset_set (accepts
, node
->opr
.c
);
3533 else if (type
== SIMPLE_BRACKET
)
3535 bitset_merge (accepts
, node
->opr
.sbcset
);
3537 else if (type
== OP_PERIOD
)
3539 #ifdef RE_ENABLE_I18N
3540 if (dfa
->mb_cur_max
> 1)
3541 bitset_merge (accepts
, dfa
->sb_char
);
3544 bitset_set_all (accepts
);
3545 if (!(dfa
->syntax
& RE_DOT_NEWLINE
))
3546 bitset_clear (accepts
, '\n');
3547 if (dfa
->syntax
& RE_DOT_NOT_NULL
)
3548 bitset_clear (accepts
, '\0');
3550 #ifdef RE_ENABLE_I18N
3551 else if (type
== OP_UTF8_PERIOD
)
3553 if (ASCII_CHARS
% BITSET_WORD_BITS
== 0)
3554 memset (accepts
, -1, ASCII_CHARS
/ CHAR_BIT
);
3556 bitset_merge (accepts
, utf8_sb_map
);
3557 if (!(dfa
->syntax
& RE_DOT_NEWLINE
))
3558 bitset_clear (accepts
, '\n');
3559 if (dfa
->syntax
& RE_DOT_NOT_NULL
)
3560 bitset_clear (accepts
, '\0');
3566 /* Check the 'accepts' and sift the characters which are not
3567 match it the context. */
3570 if (constraint
& NEXT_NEWLINE_CONSTRAINT
)
3572 bool accepts_newline
= bitset_contain (accepts
, NEWLINE_CHAR
);
3573 bitset_empty (accepts
);
3574 if (accepts_newline
)
3575 bitset_set (accepts
, NEWLINE_CHAR
);
3579 if (constraint
& NEXT_ENDBUF_CONSTRAINT
)
3581 bitset_empty (accepts
);
3585 if (constraint
& NEXT_WORD_CONSTRAINT
)
3587 bitset_word_t any_set
= 0;
3588 if (type
== CHARACTER
&& !node
->word_char
)
3590 bitset_empty (accepts
);
3593 #ifdef RE_ENABLE_I18N
3594 if (dfa
->mb_cur_max
> 1)
3595 for (j
= 0; j
< BITSET_WORDS
; ++j
)
3596 any_set
|= (accepts
[j
] &= (dfa
->word_char
[j
] | ~dfa
->sb_char
[j
]));
3599 for (j
= 0; j
< BITSET_WORDS
; ++j
)
3600 any_set
|= (accepts
[j
] &= dfa
->word_char
[j
]);
3604 if (constraint
& NEXT_NOTWORD_CONSTRAINT
)
3606 bitset_word_t any_set
= 0;
3607 if (type
== CHARACTER
&& node
->word_char
)
3609 bitset_empty (accepts
);
3612 #ifdef RE_ENABLE_I18N
3613 if (dfa
->mb_cur_max
> 1)
3614 for (j
= 0; j
< BITSET_WORDS
; ++j
)
3615 any_set
|= (accepts
[j
] &= ~(dfa
->word_char
[j
] & dfa
->sb_char
[j
]));
3618 for (j
= 0; j
< BITSET_WORDS
; ++j
)
3619 any_set
|= (accepts
[j
] &= ~dfa
->word_char
[j
]);
3625 /* Then divide 'accepts' into DFA states, or create a new
3626 state. Above, we make sure that accepts is not empty. */
3627 for (j
= 0; j
< ndests
; ++j
)
3629 bitset_t intersec
; /* Intersection sets, see below. */
3631 /* Flags, see below. */
3632 bitset_word_t has_intersec
, not_subset
, not_consumed
;
3634 /* Optimization, skip if this state doesn't accept the character. */
3635 if (type
== CHARACTER
&& !bitset_contain (dests_ch
[j
], node
->opr
.c
))
3638 /* Enumerate the intersection set of this state and 'accepts'. */
3640 for (k
= 0; k
< BITSET_WORDS
; ++k
)
3641 has_intersec
|= intersec
[k
] = accepts
[k
] & dests_ch
[j
][k
];
3642 /* And skip if the intersection set is empty. */
3646 /* Then check if this state is a subset of 'accepts'. */
3647 not_subset
= not_consumed
= 0;
3648 for (k
= 0; k
< BITSET_WORDS
; ++k
)
3650 not_subset
|= remains
[k
] = ~accepts
[k
] & dests_ch
[j
][k
];
3651 not_consumed
|= accepts
[k
] = accepts
[k
] & ~dests_ch
[j
][k
];
3654 /* If this state isn't a subset of 'accepts', create a
3655 new group state, which has the 'remains'. */
3658 bitset_copy (dests_ch
[ndests
], remains
);
3659 bitset_copy (dests_ch
[j
], intersec
);
3660 err
= re_node_set_init_copy (dests_node
+ ndests
, &dests_node
[j
]);
3661 if (BE (err
!= REG_NOERROR
, 0))
3666 /* Put the position in the current group. */
3667 ok
= re_node_set_insert (&dests_node
[j
], cur_nodes
->elems
[i
]);
3671 /* If all characters are consumed, go to next node. */
3675 /* Some characters remain, create a new group. */
3678 bitset_copy (dests_ch
[ndests
], accepts
);
3679 err
= re_node_set_init_1 (dests_node
+ ndests
, cur_nodes
->elems
[i
]);
3680 if (BE (err
!= REG_NOERROR
, 0))
3683 bitset_empty (accepts
);
3688 for (j
= 0; j
< ndests
; ++j
)
3689 re_node_set_free (dests_node
+ j
);
3693 #ifdef RE_ENABLE_I18N
3694 /* Check how many bytes the node 'dfa->nodes[node_idx]' accepts.
3695 Return the number of the bytes the node accepts.
3696 STR_IDX is the current index of the input string.
3698 This function handles the nodes which can accept one character, or
3699 one collating element like '.', '[a-z]', opposite to the other nodes
3700 can only accept one byte. */
3703 # include <locale/weight.h>
3707 check_node_accept_bytes (const re_dfa_t
*dfa
, Idx node_idx
,
3708 const re_string_t
*input
, Idx str_idx
)
3710 const re_token_t
*node
= dfa
->nodes
+ node_idx
;
3711 int char_len
, elem_len
;
3714 if (BE (node
->type
== OP_UTF8_PERIOD
, 0))
3716 unsigned char c
= re_string_byte_at (input
, str_idx
), d
;
3717 if (BE (c
< 0xc2, 1))
3720 if (str_idx
+ 2 > input
->len
)
3723 d
= re_string_byte_at (input
, str_idx
+ 1);
3725 return (d
< 0x80 || d
> 0xbf) ? 0 : 2;
3729 if (c
== 0xe0 && d
< 0xa0)
3735 if (c
== 0xf0 && d
< 0x90)
3741 if (c
== 0xf8 && d
< 0x88)
3747 if (c
== 0xfc && d
< 0x84)
3753 if (str_idx
+ char_len
> input
->len
)
3756 for (i
= 1; i
< char_len
; ++i
)
3758 d
= re_string_byte_at (input
, str_idx
+ i
);
3759 if (d
< 0x80 || d
> 0xbf)
3765 char_len
= re_string_char_size_at (input
, str_idx
);
3766 if (node
->type
== OP_PERIOD
)
3770 /* FIXME: I don't think this if is needed, as both '\n'
3771 and '\0' are char_len == 1. */
3772 /* '.' accepts any one character except the following two cases. */
3773 if ((!(dfa
->syntax
& RE_DOT_NEWLINE
) &&
3774 re_string_byte_at (input
, str_idx
) == '\n') ||
3775 ((dfa
->syntax
& RE_DOT_NOT_NULL
) &&
3776 re_string_byte_at (input
, str_idx
) == '\0'))
3781 elem_len
= re_string_elem_size_at (input
, str_idx
);
3782 if ((elem_len
<= 1 && char_len
<= 1) || char_len
== 0)
3785 if (node
->type
== COMPLEX_BRACKET
)
3787 const re_charset_t
*cset
= node
->opr
.mbcset
;
3789 const unsigned char *pin
3790 = ((const unsigned char *) re_string_get_buffer (input
) + str_idx
);
3795 wchar_t wc
= ((cset
->nranges
|| cset
->nchar_classes
|| cset
->nmbchars
)
3796 ? re_string_wchar_at (input
, str_idx
) : 0);
3798 /* match with multibyte character? */
3799 for (i
= 0; i
< cset
->nmbchars
; ++i
)
3800 if (wc
== cset
->mbchars
[i
])
3802 match_len
= char_len
;
3803 goto check_node_accept_bytes_match
;
3805 /* match with character_class? */
3806 for (i
= 0; i
< cset
->nchar_classes
; ++i
)
3808 wctype_t wt
= cset
->char_classes
[i
];
3809 if (__iswctype (wc
, wt
))
3811 match_len
= char_len
;
3812 goto check_node_accept_bytes_match
;
3817 nrules
= _NL_CURRENT_WORD (LC_COLLATE
, _NL_COLLATE_NRULES
);
3820 unsigned int in_collseq
= 0;
3821 const int32_t *table
, *indirect
;
3822 const unsigned char *weights
, *extra
;
3823 const char *collseqwc
;
3825 /* match with collating_symbol? */
3826 if (cset
->ncoll_syms
)
3827 extra
= (const unsigned char *)
3828 _NL_CURRENT (LC_COLLATE
, _NL_COLLATE_SYMB_EXTRAMB
);
3829 for (i
= 0; i
< cset
->ncoll_syms
; ++i
)
3831 const unsigned char *coll_sym
= extra
+ cset
->coll_syms
[i
];
3832 /* Compare the length of input collating element and
3833 the length of current collating element. */
3834 if (*coll_sym
!= elem_len
)
3836 /* Compare each bytes. */
3837 for (j
= 0; j
< *coll_sym
; j
++)
3838 if (pin
[j
] != coll_sym
[1 + j
])
3842 /* Match if every bytes is equal. */
3844 goto check_node_accept_bytes_match
;
3850 if (elem_len
<= char_len
)
3852 collseqwc
= _NL_CURRENT (LC_COLLATE
, _NL_COLLATE_COLLSEQWC
);
3853 in_collseq
= __collseq_table_lookup (collseqwc
, wc
);
3856 in_collseq
= find_collation_sequence_value (pin
, elem_len
);
3858 /* match with range expression? */
3859 /* FIXME: Implement rational ranges here, too. */
3860 for (i
= 0; i
< cset
->nranges
; ++i
)
3861 if (cset
->range_starts
[i
] <= in_collseq
3862 && in_collseq
<= cset
->range_ends
[i
])
3864 match_len
= elem_len
;
3865 goto check_node_accept_bytes_match
;
3868 /* match with equivalence_class? */
3869 if (cset
->nequiv_classes
)
3871 const unsigned char *cp
= pin
;
3872 table
= (const int32_t *)
3873 _NL_CURRENT (LC_COLLATE
, _NL_COLLATE_TABLEMB
);
3874 weights
= (const unsigned char *)
3875 _NL_CURRENT (LC_COLLATE
, _NL_COLLATE_WEIGHTMB
);
3876 extra
= (const unsigned char *)
3877 _NL_CURRENT (LC_COLLATE
, _NL_COLLATE_EXTRAMB
);
3878 indirect
= (const int32_t *)
3879 _NL_CURRENT (LC_COLLATE
, _NL_COLLATE_INDIRECTMB
);
3880 int32_t idx
= findidx (table
, indirect
, extra
, &cp
, elem_len
);
3881 int32_t rule
= idx
>> 24;
3885 size_t weight_len
= weights
[idx
];
3886 for (i
= 0; i
< cset
->nequiv_classes
; ++i
)
3888 int32_t equiv_class_idx
= cset
->equiv_classes
[i
];
3889 int32_t equiv_class_rule
= equiv_class_idx
>> 24;
3890 equiv_class_idx
&= 0xffffff;
3891 if (weights
[equiv_class_idx
] == weight_len
3892 && equiv_class_rule
== rule
3893 && memcmp (weights
+ idx
+ 1,
3894 weights
+ equiv_class_idx
+ 1,
3897 match_len
= elem_len
;
3898 goto check_node_accept_bytes_match
;
3907 /* match with range expression? */
3908 for (i
= 0; i
< cset
->nranges
; ++i
)
3910 if (cset
->range_starts
[i
] <= wc
&& wc
<= cset
->range_ends
[i
])
3912 match_len
= char_len
;
3913 goto check_node_accept_bytes_match
;
3917 check_node_accept_bytes_match
:
3918 if (!cset
->non_match
)
3925 return (elem_len
> char_len
) ? elem_len
: char_len
;
3933 find_collation_sequence_value (const unsigned char *mbs
, size_t mbs_len
)
3935 uint32_t nrules
= _NL_CURRENT_WORD (LC_COLLATE
, _NL_COLLATE_NRULES
);
3940 /* No valid character. Match it as a single byte character. */
3941 const unsigned char *collseq
= (const unsigned char *)
3942 _NL_CURRENT (LC_COLLATE
, _NL_COLLATE_COLLSEQMB
);
3943 return collseq
[mbs
[0]];
3950 const unsigned char *extra
= (const unsigned char *)
3951 _NL_CURRENT (LC_COLLATE
, _NL_COLLATE_SYMB_EXTRAMB
);
3952 int32_t extrasize
= (const unsigned char *)
3953 _NL_CURRENT (LC_COLLATE
, _NL_COLLATE_SYMB_EXTRAMB
+ 1) - extra
;
3955 for (idx
= 0; idx
< extrasize
;)
3959 int32_t elem_mbs_len
;
3960 /* Skip the name of collating element name. */
3961 idx
= idx
+ extra
[idx
] + 1;
3962 elem_mbs_len
= extra
[idx
++];
3963 if (mbs_len
== elem_mbs_len
)
3965 for (mbs_cnt
= 0; mbs_cnt
< elem_mbs_len
; ++mbs_cnt
)
3966 if (extra
[idx
+ mbs_cnt
] != mbs
[mbs_cnt
])
3968 if (mbs_cnt
== elem_mbs_len
)
3969 /* Found the entry. */
3972 /* Skip the byte sequence of the collating element. */
3973 idx
+= elem_mbs_len
;
3974 /* Adjust for the alignment. */
3975 idx
= (idx
+ 3) & ~3;
3976 /* Skip the collation sequence value. */
3977 idx
+= sizeof (uint32_t);
3978 /* Skip the wide char sequence of the collating element. */
3979 idx
= idx
+ sizeof (uint32_t) * (*(int32_t *) (extra
+ idx
) + 1);
3980 /* If we found the entry, return the sequence value. */
3982 return *(uint32_t *) (extra
+ idx
);
3983 /* Skip the collation sequence value. */
3984 idx
+= sizeof (uint32_t);
3990 #endif /* RE_ENABLE_I18N */
3992 /* Check whether the node accepts the byte which is IDX-th
3993 byte of the INPUT. */
3996 check_node_accept (const re_match_context_t
*mctx
, const re_token_t
*node
,
4000 ch
= re_string_byte_at (&mctx
->input
, idx
);
4004 if (node
->opr
.c
!= ch
)
4008 case SIMPLE_BRACKET
:
4009 if (!bitset_contain (node
->opr
.sbcset
, ch
))
4013 #ifdef RE_ENABLE_I18N
4014 case OP_UTF8_PERIOD
:
4015 if (ch
>= ASCII_CHARS
)
4020 if ((ch
== '\n' && !(mctx
->dfa
->syntax
& RE_DOT_NEWLINE
))
4021 || (ch
== '\0' && (mctx
->dfa
->syntax
& RE_DOT_NOT_NULL
)))
4029 if (node
->constraint
)
4031 /* The node has constraints. Check whether the current context
4032 satisfies the constraints. */
4033 unsigned int context
= re_string_context_at (&mctx
->input
, idx
,
4035 if (NOT_SATISFY_NEXT_CONSTRAINT (node
->constraint
, context
))
4042 /* Extend the buffers, if the buffers have run out. */
4044 static reg_errcode_t
4045 __attribute_warn_unused_result__
4046 extend_buffers (re_match_context_t
*mctx
, int min_len
)
4049 re_string_t
*pstr
= &mctx
->input
;
4051 /* Avoid overflow. */
4052 if (BE (MIN (IDX_MAX
, SIZE_MAX
/ sizeof (re_dfastate_t
*)) / 2
4053 <= pstr
->bufs_len
, 0))
4056 /* Double the lengths of the buffers, but allocate at least MIN_LEN. */
4057 ret
= re_string_realloc_buffers (pstr
,
4059 MIN (pstr
->len
, pstr
->bufs_len
* 2)));
4060 if (BE (ret
!= REG_NOERROR
, 0))
4063 if (mctx
->state_log
!= NULL
)
4065 /* And double the length of state_log. */
4066 /* XXX We have no indication of the size of this buffer. If this
4067 allocation fail we have no indication that the state_log array
4068 does not have the right size. */
4069 re_dfastate_t
**new_array
= re_realloc (mctx
->state_log
, re_dfastate_t
*,
4070 pstr
->bufs_len
+ 1);
4071 if (BE (new_array
== NULL
, 0))
4073 mctx
->state_log
= new_array
;
4076 /* Then reconstruct the buffers. */
4079 #ifdef RE_ENABLE_I18N
4080 if (pstr
->mb_cur_max
> 1)
4082 ret
= build_wcs_upper_buffer (pstr
);
4083 if (BE (ret
!= REG_NOERROR
, 0))
4087 #endif /* RE_ENABLE_I18N */
4088 build_upper_buffer (pstr
);
4092 #ifdef RE_ENABLE_I18N
4093 if (pstr
->mb_cur_max
> 1)
4094 build_wcs_buffer (pstr
);
4096 #endif /* RE_ENABLE_I18N */
4098 if (pstr
->trans
!= NULL
)
4099 re_string_translate_buffer (pstr
);
4106 /* Functions for matching context. */
4108 /* Initialize MCTX. */
4110 static reg_errcode_t
4111 __attribute_warn_unused_result__
4112 match_ctx_init (re_match_context_t
*mctx
, int eflags
, Idx n
)
4114 mctx
->eflags
= eflags
;
4115 mctx
->match_last
= -1;
4118 /* Avoid overflow. */
4119 size_t max_object_size
=
4120 MAX (sizeof (struct re_backref_cache_entry
),
4121 sizeof (re_sub_match_top_t
*));
4122 if (BE (MIN (IDX_MAX
, SIZE_MAX
/ max_object_size
) < n
, 0))
4125 mctx
->bkref_ents
= re_malloc (struct re_backref_cache_entry
, n
);
4126 mctx
->sub_tops
= re_malloc (re_sub_match_top_t
*, n
);
4127 if (BE (mctx
->bkref_ents
== NULL
|| mctx
->sub_tops
== NULL
, 0))
4130 /* Already zero-ed by the caller.
4132 mctx->bkref_ents = NULL;
4133 mctx->nbkref_ents = 0;
4134 mctx->nsub_tops = 0; */
4135 mctx
->abkref_ents
= n
;
4136 mctx
->max_mb_elem_len
= 1;
4137 mctx
->asub_tops
= n
;
4141 /* Clean the entries which depend on the current input in MCTX.
4142 This function must be invoked when the matcher changes the start index
4143 of the input, or changes the input string. */
4146 match_ctx_clean (re_match_context_t
*mctx
)
4149 for (st_idx
= 0; st_idx
< mctx
->nsub_tops
; ++st_idx
)
4152 re_sub_match_top_t
*top
= mctx
->sub_tops
[st_idx
];
4153 for (sl_idx
= 0; sl_idx
< top
->nlasts
; ++sl_idx
)
4155 re_sub_match_last_t
*last
= top
->lasts
[sl_idx
];
4156 re_free (last
->path
.array
);
4159 re_free (top
->lasts
);
4162 re_free (top
->path
->array
);
4163 re_free (top
->path
);
4168 mctx
->nsub_tops
= 0;
4169 mctx
->nbkref_ents
= 0;
4172 /* Free all the memory associated with MCTX. */
4175 match_ctx_free (re_match_context_t
*mctx
)
4177 /* First, free all the memory associated with MCTX->SUB_TOPS. */
4178 match_ctx_clean (mctx
);
4179 re_free (mctx
->sub_tops
);
4180 re_free (mctx
->bkref_ents
);
4183 /* Add a new backreference entry to MCTX.
4184 Note that we assume that caller never call this function with duplicate
4185 entry, and call with STR_IDX which isn't smaller than any existing entry.
4188 static reg_errcode_t
4189 __attribute_warn_unused_result__
4190 match_ctx_add_entry (re_match_context_t
*mctx
, Idx node
, Idx str_idx
, Idx from
,
4193 if (mctx
->nbkref_ents
>= mctx
->abkref_ents
)
4195 struct re_backref_cache_entry
* new_entry
;
4196 new_entry
= re_realloc (mctx
->bkref_ents
, struct re_backref_cache_entry
,
4197 mctx
->abkref_ents
* 2);
4198 if (BE (new_entry
== NULL
, 0))
4200 re_free (mctx
->bkref_ents
);
4203 mctx
->bkref_ents
= new_entry
;
4204 memset (mctx
->bkref_ents
+ mctx
->nbkref_ents
, '\0',
4205 sizeof (struct re_backref_cache_entry
) * mctx
->abkref_ents
);
4206 mctx
->abkref_ents
*= 2;
4208 if (mctx
->nbkref_ents
> 0
4209 && mctx
->bkref_ents
[mctx
->nbkref_ents
- 1].str_idx
== str_idx
)
4210 mctx
->bkref_ents
[mctx
->nbkref_ents
- 1].more
= 1;
4212 mctx
->bkref_ents
[mctx
->nbkref_ents
].node
= node
;
4213 mctx
->bkref_ents
[mctx
->nbkref_ents
].str_idx
= str_idx
;
4214 mctx
->bkref_ents
[mctx
->nbkref_ents
].subexp_from
= from
;
4215 mctx
->bkref_ents
[mctx
->nbkref_ents
].subexp_to
= to
;
4217 /* This is a cache that saves negative results of check_dst_limits_calc_pos.
4218 If bit N is clear, means that this entry won't epsilon-transition to
4219 an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression. If
4220 it is set, check_dst_limits_calc_pos_1 will recurse and try to find one
4223 A backreference does not epsilon-transition unless it is empty, so set
4224 to all zeros if FROM != TO. */
4225 mctx
->bkref_ents
[mctx
->nbkref_ents
].eps_reachable_subexps_map
4226 = (from
== to
? -1 : 0);
4228 mctx
->bkref_ents
[mctx
->nbkref_ents
++].more
= 0;
4229 if (mctx
->max_mb_elem_len
< to
- from
)
4230 mctx
->max_mb_elem_len
= to
- from
;
4234 /* Return the first entry with the same str_idx, or -1 if none is
4235 found. Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX. */
4238 search_cur_bkref_entry (const re_match_context_t
*mctx
, Idx str_idx
)
4240 Idx left
, right
, mid
, last
;
4241 last
= right
= mctx
->nbkref_ents
;
4242 for (left
= 0; left
< right
;)
4244 mid
= (left
+ right
) / 2;
4245 if (mctx
->bkref_ents
[mid
].str_idx
< str_idx
)
4250 if (left
< last
&& mctx
->bkref_ents
[left
].str_idx
== str_idx
)
4256 /* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches
4259 static reg_errcode_t
4260 __attribute_warn_unused_result__
4261 match_ctx_add_subtop (re_match_context_t
*mctx
, Idx node
, Idx str_idx
)
4264 assert (mctx
->sub_tops
!= NULL
);
4265 assert (mctx
->asub_tops
> 0);
4267 if (BE (mctx
->nsub_tops
== mctx
->asub_tops
, 0))
4269 Idx new_asub_tops
= mctx
->asub_tops
* 2;
4270 re_sub_match_top_t
**new_array
= re_realloc (mctx
->sub_tops
,
4271 re_sub_match_top_t
*,
4273 if (BE (new_array
== NULL
, 0))
4275 mctx
->sub_tops
= new_array
;
4276 mctx
->asub_tops
= new_asub_tops
;
4278 mctx
->sub_tops
[mctx
->nsub_tops
] = calloc (1, sizeof (re_sub_match_top_t
));
4279 if (BE (mctx
->sub_tops
[mctx
->nsub_tops
] == NULL
, 0))
4281 mctx
->sub_tops
[mctx
->nsub_tops
]->node
= node
;
4282 mctx
->sub_tops
[mctx
->nsub_tops
++]->str_idx
= str_idx
;
4286 /* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches
4287 at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP. */
4289 static re_sub_match_last_t
*
4290 match_ctx_add_sublast (re_sub_match_top_t
*subtop
, Idx node
, Idx str_idx
)
4292 re_sub_match_last_t
*new_entry
;
4293 if (BE (subtop
->nlasts
== subtop
->alasts
, 0))
4295 Idx new_alasts
= 2 * subtop
->alasts
+ 1;
4296 re_sub_match_last_t
**new_array
= re_realloc (subtop
->lasts
,
4297 re_sub_match_last_t
*,
4299 if (BE (new_array
== NULL
, 0))
4301 subtop
->lasts
= new_array
;
4302 subtop
->alasts
= new_alasts
;
4304 new_entry
= calloc (1, sizeof (re_sub_match_last_t
));
4305 if (BE (new_entry
!= NULL
, 1))
4307 subtop
->lasts
[subtop
->nlasts
] = new_entry
;
4308 new_entry
->node
= node
;
4309 new_entry
->str_idx
= str_idx
;
4316 sift_ctx_init (re_sift_context_t
*sctx
, re_dfastate_t
**sifted_sts
,
4317 re_dfastate_t
**limited_sts
, Idx last_node
, Idx last_str_idx
)
4319 sctx
->sifted_states
= sifted_sts
;
4320 sctx
->limited_states
= limited_sts
;
4321 sctx
->last_node
= last_node
;
4322 sctx
->last_str_idx
= last_str_idx
;
4323 re_node_set_init_empty (&sctx
->limits
);