1 /* Extended regular expression matching and search library.
2 Copyright (C) 2002-2014 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <http://www.gnu.org/licenses/>. */
22 static reg_errcode_t
match_ctx_init (re_match_context_t
*cache
, int eflags
,
23 int n
) internal_function
;
24 static void match_ctx_clean (re_match_context_t
*mctx
) internal_function
;
25 static void match_ctx_free (re_match_context_t
*cache
) internal_function
;
26 static reg_errcode_t
match_ctx_add_entry (re_match_context_t
*cache
, int node
,
27 int str_idx
, int from
, int to
)
29 static int search_cur_bkref_entry (const re_match_context_t
*mctx
, int str_idx
)
31 static reg_errcode_t
match_ctx_add_subtop (re_match_context_t
*mctx
, int node
,
32 int str_idx
) internal_function
;
33 static re_sub_match_last_t
* match_ctx_add_sublast (re_sub_match_top_t
*subtop
,
34 int node
, int str_idx
)
36 static void sift_ctx_init (re_sift_context_t
*sctx
, re_dfastate_t
**sifted_sts
,
37 re_dfastate_t
**limited_sts
, int last_node
,
40 static reg_errcode_t
re_search_internal (const regex_t
*preg
,
41 const char *string
, int length
,
42 int start
, int range
, int stop
,
43 size_t nmatch
, regmatch_t pmatch
[],
44 int eflags
) internal_function
;
45 static int re_search_2_stub (struct re_pattern_buffer
*bufp
,
46 const char *string1
, int length1
,
47 const char *string2
, int length2
,
48 int start
, int range
, struct re_registers
*regs
,
49 int stop
, int ret_len
) internal_function
;
50 static int re_search_stub (struct re_pattern_buffer
*bufp
,
51 const char *string
, int length
, int start
,
52 int range
, int stop
, struct re_registers
*regs
,
53 int ret_len
) internal_function
;
54 static unsigned re_copy_regs (struct re_registers
*regs
, regmatch_t
*pmatch
,
55 int nregs
, int regs_allocated
) internal_function
;
56 static reg_errcode_t
prune_impossible_nodes (re_match_context_t
*mctx
)
58 static int check_matching (re_match_context_t
*mctx
, int fl_longest_match
,
59 int *p_match_first
) internal_function
;
60 static int check_halt_state_context (const re_match_context_t
*mctx
,
61 const re_dfastate_t
*state
, int idx
)
63 static void update_regs (const re_dfa_t
*dfa
, regmatch_t
*pmatch
,
64 regmatch_t
*prev_idx_match
, int cur_node
,
65 int cur_idx
, int nmatch
) internal_function
;
66 static reg_errcode_t
push_fail_stack (struct re_fail_stack_t
*fs
,
67 int str_idx
, int dest_node
, int nregs
,
69 re_node_set
*eps_via_nodes
)
71 static reg_errcode_t
set_regs (const regex_t
*preg
,
72 const re_match_context_t
*mctx
,
73 size_t nmatch
, regmatch_t
*pmatch
,
74 int fl_backtrack
) internal_function
;
75 static reg_errcode_t
free_fail_stack_return (struct re_fail_stack_t
*fs
)
79 static int sift_states_iter_mb (const re_match_context_t
*mctx
,
80 re_sift_context_t
*sctx
,
81 int node_idx
, int str_idx
, int max_str_idx
)
83 #endif /* RE_ENABLE_I18N */
84 static reg_errcode_t
sift_states_backward (const re_match_context_t
*mctx
,
85 re_sift_context_t
*sctx
)
87 static reg_errcode_t
build_sifted_states (const re_match_context_t
*mctx
,
88 re_sift_context_t
*sctx
, int str_idx
,
89 re_node_set
*cur_dest
)
91 static reg_errcode_t
update_cur_sifted_state (const re_match_context_t
*mctx
,
92 re_sift_context_t
*sctx
,
94 re_node_set
*dest_nodes
)
96 static reg_errcode_t
add_epsilon_src_nodes (const re_dfa_t
*dfa
,
97 re_node_set
*dest_nodes
,
98 const re_node_set
*candidates
)
100 static int check_dst_limits (const re_match_context_t
*mctx
,
102 int dst_node
, int dst_idx
, int src_node
,
103 int src_idx
) internal_function
;
104 static int check_dst_limits_calc_pos_1 (const re_match_context_t
*mctx
,
105 int boundaries
, int subexp_idx
,
106 int from_node
, int bkref_idx
)
108 static int check_dst_limits_calc_pos (const re_match_context_t
*mctx
,
109 int limit
, int subexp_idx
,
110 int node
, int str_idx
,
111 int bkref_idx
) internal_function
;
112 static reg_errcode_t
check_subexp_limits (const re_dfa_t
*dfa
,
113 re_node_set
*dest_nodes
,
114 const re_node_set
*candidates
,
116 struct re_backref_cache_entry
*bkref_ents
,
117 int str_idx
) internal_function
;
118 static reg_errcode_t
sift_states_bkref (const re_match_context_t
*mctx
,
119 re_sift_context_t
*sctx
,
120 int str_idx
, const re_node_set
*candidates
)
122 static reg_errcode_t
merge_state_array (const re_dfa_t
*dfa
,
124 re_dfastate_t
**src
, int num
)
126 static re_dfastate_t
*find_recover_state (reg_errcode_t
*err
,
127 re_match_context_t
*mctx
) internal_function
;
128 static re_dfastate_t
*transit_state (reg_errcode_t
*err
,
129 re_match_context_t
*mctx
,
130 re_dfastate_t
*state
) internal_function
;
131 static re_dfastate_t
*merge_state_with_log (reg_errcode_t
*err
,
132 re_match_context_t
*mctx
,
133 re_dfastate_t
*next_state
)
135 static reg_errcode_t
check_subexp_matching_top (re_match_context_t
*mctx
,
136 re_node_set
*cur_nodes
,
137 int str_idx
) internal_function
;
139 static re_dfastate_t
*transit_state_sb (reg_errcode_t
*err
,
140 re_match_context_t
*mctx
,
141 re_dfastate_t
*pstate
)
144 #ifdef RE_ENABLE_I18N
145 static reg_errcode_t
transit_state_mb (re_match_context_t
*mctx
,
146 re_dfastate_t
*pstate
)
148 #endif /* RE_ENABLE_I18N */
149 static reg_errcode_t
transit_state_bkref (re_match_context_t
*mctx
,
150 const re_node_set
*nodes
)
152 static reg_errcode_t
get_subexp (re_match_context_t
*mctx
,
153 int bkref_node
, int bkref_str_idx
)
155 static reg_errcode_t
get_subexp_sub (re_match_context_t
*mctx
,
156 const re_sub_match_top_t
*sub_top
,
157 re_sub_match_last_t
*sub_last
,
158 int bkref_node
, int bkref_str
)
160 static int find_subexp_node (const re_dfa_t
*dfa
, const re_node_set
*nodes
,
161 int subexp_idx
, int type
) internal_function
;
162 static reg_errcode_t
check_arrival (re_match_context_t
*mctx
,
163 state_array_t
*path
, int top_node
,
164 int top_str
, int last_node
, int last_str
,
165 int type
) internal_function
;
166 static reg_errcode_t
check_arrival_add_next_nodes (re_match_context_t
*mctx
,
168 re_node_set
*cur_nodes
,
169 re_node_set
*next_nodes
)
171 static reg_errcode_t
check_arrival_expand_ecl (const re_dfa_t
*dfa
,
172 re_node_set
*cur_nodes
,
173 int ex_subexp
, int type
)
175 static reg_errcode_t
check_arrival_expand_ecl_sub (const re_dfa_t
*dfa
,
176 re_node_set
*dst_nodes
,
177 int target
, int ex_subexp
,
178 int type
) internal_function
;
179 static reg_errcode_t
expand_bkref_cache (re_match_context_t
*mctx
,
180 re_node_set
*cur_nodes
, int cur_str
,
181 int subexp_num
, int type
)
183 static int build_trtable (const re_dfa_t
*dfa
,
184 re_dfastate_t
*state
) internal_function
;
185 #ifdef RE_ENABLE_I18N
186 static int check_node_accept_bytes (const re_dfa_t
*dfa
, int node_idx
,
187 const re_string_t
*input
, int idx
)
190 static unsigned int find_collation_sequence_value (const unsigned char *mbs
,
194 #endif /* RE_ENABLE_I18N */
195 static int group_nodes_into_DFAstates (const re_dfa_t
*dfa
,
196 const re_dfastate_t
*state
,
197 re_node_set
*states_node
,
198 bitset_t
*states_ch
) internal_function
;
199 static int check_node_accept (const re_match_context_t
*mctx
,
200 const re_token_t
*node
, int idx
)
202 static reg_errcode_t
extend_buffers (re_match_context_t
*mctx
, int min_len
)
205 /* Entry point for POSIX code. */
207 /* regexec searches for a given pattern, specified by PREG, in the
210 If NMATCH is zero or REG_NOSUB was set in the cflags argument to
211 `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at
212 least NMATCH elements, and we set them to the offsets of the
213 corresponding matched substrings.
215 EFLAGS specifies `execution flags' which affect matching: if
216 REG_NOTBOL is set, then ^ does not match at the beginning of the
217 string; if REG_NOTEOL is set, then $ does not match at the end.
219 We return 0 if we find a match and REG_NOMATCH if not. */
222 regexec (preg
, string
, nmatch
, pmatch
, eflags
)
223 const regex_t
*__restrict preg
;
224 const char *__restrict string
;
231 re_dfa_t
*dfa
= (re_dfa_t
*) preg
->buffer
;
233 if (eflags
& ~(REG_NOTBOL
| REG_NOTEOL
| REG_STARTEND
))
236 if (eflags
& REG_STARTEND
)
238 start
= pmatch
[0].rm_so
;
239 length
= pmatch
[0].rm_eo
;
244 length
= strlen (string
);
247 __libc_lock_lock (dfa
->lock
);
249 err
= re_search_internal (preg
, string
, length
, start
, length
- start
,
250 length
, 0, NULL
, eflags
);
252 err
= re_search_internal (preg
, string
, length
, start
, length
- start
,
253 length
, nmatch
, pmatch
, eflags
);
254 __libc_lock_unlock (dfa
->lock
);
255 return err
!= REG_NOERROR
;
259 # include <shlib-compat.h>
260 versioned_symbol (libc
, __regexec
, regexec
, GLIBC_2_3_4
);
262 # if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)
263 __typeof__ (__regexec
) __compat_regexec
;
266 attribute_compat_text_section
267 __compat_regexec (const regex_t
*__restrict preg
,
268 const char *__restrict string
, size_t nmatch
,
269 regmatch_t pmatch
[], int eflags
)
271 return regexec (preg
, string
, nmatch
, pmatch
,
272 eflags
& (REG_NOTBOL
| REG_NOTEOL
));
274 compat_symbol (libc
, __compat_regexec
, regexec
, GLIBC_2_0
);
278 /* Entry points for GNU code. */
280 /* re_match, re_search, re_match_2, re_search_2
282 The former two functions operate on STRING with length LENGTH,
283 while the later two operate on concatenation of STRING1 and STRING2
284 with lengths LENGTH1 and LENGTH2, respectively.
286 re_match() matches the compiled pattern in BUFP against the string,
287 starting at index START.
289 re_search() first tries matching at index START, then it tries to match
290 starting from index START + 1, and so on. The last start position tried
291 is START + RANGE. (Thus RANGE = 0 forces re_search to operate the same
294 The parameter STOP of re_{match,search}_2 specifies that no match exceeding
295 the first STOP characters of the concatenation of the strings should be
298 If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match
299 and all groups is stroed in REGS. (For the "_2" variants, the offsets are
300 computed relative to the concatenation, not relative to the individual
303 On success, re_match* functions return the length of the match, re_search*
304 return the position of the start of the match. Return value -1 means no
305 match was found and -2 indicates an internal error. */
308 re_match (bufp
, string
, length
, start
, regs
)
309 struct re_pattern_buffer
*bufp
;
312 struct re_registers
*regs
;
314 return re_search_stub (bufp
, string
, length
, start
, 0, length
, regs
, 1);
317 weak_alias (__re_match
, re_match
)
321 re_search (bufp
, string
, length
, start
, range
, regs
)
322 struct re_pattern_buffer
*bufp
;
324 int length
, start
, range
;
325 struct re_registers
*regs
;
327 return re_search_stub (bufp
, string
, length
, start
, range
, length
, regs
, 0);
330 weak_alias (__re_search
, re_search
)
334 re_match_2 (bufp
, string1
, length1
, string2
, length2
, start
, regs
, stop
)
335 struct re_pattern_buffer
*bufp
;
336 const char *string1
, *string2
;
337 int length1
, length2
, start
, stop
;
338 struct re_registers
*regs
;
340 return re_search_2_stub (bufp
, string1
, length1
, string2
, length2
,
341 start
, 0, regs
, stop
, 1);
344 weak_alias (__re_match_2
, re_match_2
)
348 re_search_2 (bufp
, string1
, length1
, string2
, length2
, start
, range
, regs
, stop
)
349 struct re_pattern_buffer
*bufp
;
350 const char *string1
, *string2
;
351 int length1
, length2
, start
, range
, stop
;
352 struct re_registers
*regs
;
354 return re_search_2_stub (bufp
, string1
, length1
, string2
, length2
,
355 start
, range
, regs
, stop
, 0);
358 weak_alias (__re_search_2
, re_search_2
)
362 re_search_2_stub (bufp
, string1
, length1
, string2
, length2
, start
, range
, regs
,
364 struct re_pattern_buffer
*bufp
;
365 const char *string1
, *string2
;
366 int length1
, length2
, start
, range
, stop
, ret_len
;
367 struct re_registers
*regs
;
371 int len
= length1
+ length2
;
374 if (BE (length1
< 0 || length2
< 0 || stop
< 0 || len
< length1
, 0))
377 /* Concatenate the strings. */
381 s
= re_malloc (char, len
);
383 if (BE (s
== NULL
, 0))
386 memcpy (__mempcpy (s
, string1
, length1
), string2
, length2
);
388 memcpy (s
, string1
, length1
);
389 memcpy (s
+ length1
, string2
, length2
);
398 rval
= re_search_stub (bufp
, str
, len
, start
, range
, stop
, regs
, ret_len
);
403 /* The parameters have the same meaning as those of re_search.
404 Additional parameters:
405 If RET_LEN is nonzero the length of the match is returned (re_match style);
406 otherwise the position of the match is returned. */
409 re_search_stub (bufp
, string
, length
, start
, range
, stop
, regs
, ret_len
)
410 struct re_pattern_buffer
*bufp
;
412 int length
, start
, range
, stop
, ret_len
;
413 struct re_registers
*regs
;
415 reg_errcode_t result
;
419 re_dfa_t
*dfa
= (re_dfa_t
*) bufp
->buffer
;
421 /* Check for out-of-range. */
422 if (BE (start
< 0 || start
> length
, 0))
424 if (BE (start
+ range
> length
, 0))
425 range
= length
- start
;
426 else if (BE (start
+ range
< 0, 0))
429 __libc_lock_lock (dfa
->lock
);
431 eflags
|= (bufp
->not_bol
) ? REG_NOTBOL
: 0;
432 eflags
|= (bufp
->not_eol
) ? REG_NOTEOL
: 0;
434 /* Compile fastmap if we haven't yet. */
435 if (range
> 0 && bufp
->fastmap
!= NULL
&& !bufp
->fastmap_accurate
)
436 re_compile_fastmap (bufp
);
438 if (BE (bufp
->no_sub
, 0))
441 /* We need at least 1 register. */
444 else if (BE (bufp
->regs_allocated
== REGS_FIXED
&&
445 regs
->num_regs
< bufp
->re_nsub
+ 1, 0))
447 nregs
= regs
->num_regs
;
448 if (BE (nregs
< 1, 0))
450 /* Nothing can be copied to regs. */
456 nregs
= bufp
->re_nsub
+ 1;
457 pmatch
= re_malloc (regmatch_t
, nregs
);
458 if (BE (pmatch
== NULL
, 0))
464 result
= re_search_internal (bufp
, string
, length
, start
, range
, stop
,
465 nregs
, pmatch
, eflags
);
469 /* I hope we needn't fill ther regs with -1's when no match was found. */
470 if (result
!= REG_NOERROR
)
472 else if (regs
!= NULL
)
474 /* If caller wants register contents data back, copy them. */
475 bufp
->regs_allocated
= re_copy_regs (regs
, pmatch
, nregs
,
476 bufp
->regs_allocated
);
477 if (BE (bufp
->regs_allocated
== REGS_UNALLOCATED
, 0))
481 if (BE (rval
== 0, 1))
485 assert (pmatch
[0].rm_so
== start
);
486 rval
= pmatch
[0].rm_eo
- start
;
489 rval
= pmatch
[0].rm_so
;
493 __libc_lock_unlock (dfa
->lock
);
498 re_copy_regs (regs
, pmatch
, nregs
, regs_allocated
)
499 struct re_registers
*regs
;
501 int nregs
, regs_allocated
;
503 int rval
= REGS_REALLOCATE
;
505 int need_regs
= nregs
+ 1;
506 /* We need one extra element beyond `num_regs' for the `-1' marker GNU code
509 /* Have the register data arrays been allocated? */
510 if (regs_allocated
== REGS_UNALLOCATED
)
511 { /* No. So allocate them with malloc. */
512 regs
->start
= re_malloc (regoff_t
, need_regs
);
513 if (BE (regs
->start
== NULL
, 0))
514 return REGS_UNALLOCATED
;
515 regs
->end
= re_malloc (regoff_t
, need_regs
);
516 if (BE (regs
->end
== NULL
, 0))
518 re_free (regs
->start
);
519 return REGS_UNALLOCATED
;
521 regs
->num_regs
= need_regs
;
523 else if (regs_allocated
== REGS_REALLOCATE
)
524 { /* Yes. If we need more elements than were already
525 allocated, reallocate them. If we need fewer, just
527 if (BE (need_regs
> regs
->num_regs
, 0))
529 regoff_t
*new_start
= re_realloc (regs
->start
, regoff_t
, need_regs
);
531 if (BE (new_start
== NULL
, 0))
532 return REGS_UNALLOCATED
;
533 new_end
= re_realloc (regs
->end
, regoff_t
, need_regs
);
534 if (BE (new_end
== NULL
, 0))
537 return REGS_UNALLOCATED
;
539 regs
->start
= new_start
;
541 regs
->num_regs
= need_regs
;
546 assert (regs_allocated
== REGS_FIXED
);
547 /* This function may not be called with REGS_FIXED and nregs too big. */
548 assert (regs
->num_regs
>= nregs
);
553 for (i
= 0; i
< nregs
; ++i
)
555 regs
->start
[i
] = pmatch
[i
].rm_so
;
556 regs
->end
[i
] = pmatch
[i
].rm_eo
;
558 for ( ; i
< regs
->num_regs
; ++i
)
559 regs
->start
[i
] = regs
->end
[i
] = -1;
564 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
565 ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use
566 this memory for recording register information. STARTS and ENDS
567 must be allocated using the malloc library routine, and must each
568 be at least NUM_REGS * sizeof (regoff_t) bytes long.
570 If NUM_REGS == 0, then subsequent matches should allocate their own
573 Unless this function is called, the first search or match using
574 PATTERN_BUFFER will allocate its own register data, without
575 freeing the old data. */
578 re_set_registers (bufp
, regs
, num_regs
, starts
, ends
)
579 struct re_pattern_buffer
*bufp
;
580 struct re_registers
*regs
;
582 regoff_t
*starts
, *ends
;
586 bufp
->regs_allocated
= REGS_REALLOCATE
;
587 regs
->num_regs
= num_regs
;
588 regs
->start
= starts
;
593 bufp
->regs_allocated
= REGS_UNALLOCATED
;
595 regs
->start
= regs
->end
= (regoff_t
*) 0;
599 weak_alias (__re_set_registers
, re_set_registers
)
602 /* Entry points compatible with 4.2 BSD regex library. We don't define
603 them unless specifically requested. */
605 #if defined _REGEX_RE_COMP || defined _LIBC
613 return 0 == regexec (&re_comp_buf
, s
, 0, NULL
, 0);
615 #endif /* _REGEX_RE_COMP */
617 /* Internal entry point. */
619 /* Searches for a compiled pattern PREG in the string STRING, whose
620 length is LENGTH. NMATCH, PMATCH, and EFLAGS have the same
621 mingings with regexec. START, and RANGE have the same meanings
623 Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
624 otherwise return the error code.
625 Note: We assume front end functions already check ranges.
626 (START + RANGE >= 0 && START + RANGE <= LENGTH) */
629 __attribute_warn_unused_result__
630 re_search_internal (preg
, string
, length
, start
, range
, stop
, nmatch
, pmatch
,
634 int length
, start
, range
, stop
, eflags
;
639 const re_dfa_t
*dfa
= (const re_dfa_t
*) preg
->buffer
;
640 int left_lim
, right_lim
, incr
;
641 int fl_longest_match
, match_first
, match_kind
, match_last
= -1;
644 #if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
645 re_match_context_t mctx
= { .dfa
= dfa
};
647 re_match_context_t mctx
;
649 char *fastmap
= (preg
->fastmap
!= NULL
&& preg
->fastmap_accurate
650 && range
&& !preg
->can_be_null
) ? preg
->fastmap
: NULL
;
651 RE_TRANSLATE_TYPE t
= preg
->translate
;
653 #if !(defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L))
654 memset (&mctx
, '\0', sizeof (re_match_context_t
));
658 extra_nmatch
= (nmatch
> preg
->re_nsub
) ? nmatch
- (preg
->re_nsub
+ 1) : 0;
659 nmatch
-= extra_nmatch
;
661 /* Check if the DFA haven't been compiled. */
662 if (BE (preg
->used
== 0 || dfa
->init_state
== NULL
663 || dfa
->init_state_word
== NULL
|| dfa
->init_state_nl
== NULL
664 || dfa
->init_state_begbuf
== NULL
, 0))
668 /* We assume front-end functions already check them. */
669 assert (start
+ range
>= 0 && start
+ range
<= length
);
672 /* If initial states with non-begbuf contexts have no elements,
673 the regex must be anchored. If preg->newline_anchor is set,
674 we'll never use init_state_nl, so do not check it. */
675 if (dfa
->init_state
->nodes
.nelem
== 0
676 && dfa
->init_state_word
->nodes
.nelem
== 0
677 && (dfa
->init_state_nl
->nodes
.nelem
== 0
678 || !preg
->newline_anchor
))
680 if (start
!= 0 && start
+ range
!= 0)
685 /* We must check the longest matching, if nmatch > 0. */
686 fl_longest_match
= (nmatch
!= 0 || dfa
->nbackref
);
688 err
= re_string_allocate (&mctx
.input
, string
, length
, dfa
->nodes_len
+ 1,
689 preg
->translate
, preg
->syntax
& RE_ICASE
, dfa
);
690 if (BE (err
!= REG_NOERROR
, 0))
692 mctx
.input
.stop
= stop
;
693 mctx
.input
.raw_stop
= stop
;
694 mctx
.input
.newline_anchor
= preg
->newline_anchor
;
696 err
= match_ctx_init (&mctx
, eflags
, dfa
->nbackref
* 2);
697 if (BE (err
!= REG_NOERROR
, 0))
700 /* We will log all the DFA states through which the dfa pass,
701 if nmatch > 1, or this dfa has "multibyte node", which is a
702 back-reference or a node which can accept multibyte character or
703 multi character collating element. */
704 if (nmatch
> 1 || dfa
->has_mb_node
)
706 /* Avoid overflow. */
707 if (BE (SIZE_MAX
/ sizeof (re_dfastate_t
*) <= mctx
.input
.bufs_len
, 0))
713 mctx
.state_log
= re_malloc (re_dfastate_t
*, mctx
.input
.bufs_len
+ 1);
714 if (BE (mctx
.state_log
== NULL
, 0))
721 mctx
.state_log
= NULL
;
724 mctx
.input
.tip_context
= (eflags
& REG_NOTBOL
) ? CONTEXT_BEGBUF
725 : CONTEXT_NEWLINE
| CONTEXT_BEGBUF
;
727 /* Check incrementally whether of not the input string match. */
728 incr
= (range
< 0) ? -1 : 1;
729 left_lim
= (range
< 0) ? start
+ range
: start
;
730 right_lim
= (range
< 0) ? start
: start
+ range
;
731 sb
= dfa
->mb_cur_max
== 1;
734 ? ((sb
|| !(preg
->syntax
& RE_ICASE
|| t
) ? 4 : 0)
735 | (range
>= 0 ? 2 : 0)
736 | (t
!= NULL
? 1 : 0))
739 for (;; match_first
+= incr
)
742 if (match_first
< left_lim
|| right_lim
< match_first
)
745 /* Advance as rapidly as possible through the string, until we
746 find a plausible place to start matching. This may be done
747 with varying efficiency, so there are various possibilities:
748 only the most common of them are specialized, in order to
749 save on code size. We use a switch statement for speed. */
757 /* Fastmap with single-byte translation, match forward. */
758 while (BE (match_first
< right_lim
, 1)
759 && !fastmap
[t
[(unsigned char) string
[match_first
]]])
761 goto forward_match_found_start_or_reached_end
;
764 /* Fastmap without translation, match forward. */
765 while (BE (match_first
< right_lim
, 1)
766 && !fastmap
[(unsigned char) string
[match_first
]])
769 forward_match_found_start_or_reached_end
:
770 if (BE (match_first
== right_lim
, 0))
772 ch
= match_first
>= length
773 ? 0 : (unsigned char) string
[match_first
];
774 if (!fastmap
[t
? t
[ch
] : ch
])
781 /* Fastmap without multi-byte translation, match backwards. */
782 while (match_first
>= left_lim
)
784 ch
= match_first
>= length
785 ? 0 : (unsigned char) string
[match_first
];
786 if (fastmap
[t
? t
[ch
] : ch
])
790 if (match_first
< left_lim
)
795 /* In this case, we can't determine easily the current byte,
796 since it might be a component byte of a multibyte
797 character. Then we use the constructed buffer instead. */
800 /* If MATCH_FIRST is out of the valid range, reconstruct the
802 unsigned int offset
= match_first
- mctx
.input
.raw_mbs_idx
;
803 if (BE (offset
>= (unsigned int) mctx
.input
.valid_raw_len
, 0))
805 err
= re_string_reconstruct (&mctx
.input
, match_first
,
807 if (BE (err
!= REG_NOERROR
, 0))
810 offset
= match_first
- mctx
.input
.raw_mbs_idx
;
812 /* If MATCH_FIRST is out of the buffer, leave it as '\0'.
813 Note that MATCH_FIRST must not be smaller than 0. */
814 ch
= (match_first
>= length
815 ? 0 : re_string_byte_at (&mctx
.input
, offset
));
819 if (match_first
< left_lim
|| match_first
> right_lim
)
828 /* Reconstruct the buffers so that the matcher can assume that
829 the matching starts from the beginning of the buffer. */
830 err
= re_string_reconstruct (&mctx
.input
, match_first
, eflags
);
831 if (BE (err
!= REG_NOERROR
, 0))
834 #ifdef RE_ENABLE_I18N
835 /* Don't consider this char as a possible match start if it part,
836 yet isn't the head, of a multibyte character. */
837 if (!sb
&& !re_string_first_byte (&mctx
.input
, 0))
841 /* It seems to be appropriate one, then use the matcher. */
842 /* We assume that the matching starts from 0. */
843 mctx
.state_log_top
= mctx
.nbkref_ents
= mctx
.max_mb_elem_len
= 0;
844 match_last
= check_matching (&mctx
, fl_longest_match
,
845 range
>= 0 ? &match_first
: NULL
);
846 if (match_last
!= -1)
848 if (BE (match_last
== -2, 0))
855 mctx
.match_last
= match_last
;
856 if ((!preg
->no_sub
&& nmatch
> 1) || dfa
->nbackref
)
858 re_dfastate_t
*pstate
= mctx
.state_log
[match_last
];
859 mctx
.last_node
= check_halt_state_context (&mctx
, pstate
,
862 if ((!preg
->no_sub
&& nmatch
> 1 && dfa
->has_plural_match
)
865 err
= prune_impossible_nodes (&mctx
);
866 if (err
== REG_NOERROR
)
868 if (BE (err
!= REG_NOMATCH
, 0))
873 break; /* We found a match. */
877 match_ctx_clean (&mctx
);
881 assert (match_last
!= -1);
882 assert (err
== REG_NOERROR
);
885 /* Set pmatch[] if we need. */
890 /* Initialize registers. */
891 for (reg_idx
= 1; reg_idx
< nmatch
; ++reg_idx
)
892 pmatch
[reg_idx
].rm_so
= pmatch
[reg_idx
].rm_eo
= -1;
894 /* Set the points where matching start/end. */
896 pmatch
[0].rm_eo
= mctx
.match_last
;
898 if (!preg
->no_sub
&& nmatch
> 1)
900 err
= set_regs (preg
, &mctx
, nmatch
, pmatch
,
901 dfa
->has_plural_match
&& dfa
->nbackref
> 0);
902 if (BE (err
!= REG_NOERROR
, 0))
906 /* At last, add the offset to the each registers, since we slided
907 the buffers so that we could assume that the matching starts
909 for (reg_idx
= 0; reg_idx
< nmatch
; ++reg_idx
)
910 if (pmatch
[reg_idx
].rm_so
!= -1)
912 #ifdef RE_ENABLE_I18N
913 if (BE (mctx
.input
.offsets_needed
!= 0, 0))
915 pmatch
[reg_idx
].rm_so
=
916 (pmatch
[reg_idx
].rm_so
== mctx
.input
.valid_len
917 ? mctx
.input
.valid_raw_len
918 : mctx
.input
.offsets
[pmatch
[reg_idx
].rm_so
]);
919 pmatch
[reg_idx
].rm_eo
=
920 (pmatch
[reg_idx
].rm_eo
== mctx
.input
.valid_len
921 ? mctx
.input
.valid_raw_len
922 : mctx
.input
.offsets
[pmatch
[reg_idx
].rm_eo
]);
925 assert (mctx
.input
.offsets_needed
== 0);
927 pmatch
[reg_idx
].rm_so
+= match_first
;
928 pmatch
[reg_idx
].rm_eo
+= match_first
;
930 for (reg_idx
= 0; reg_idx
< extra_nmatch
; ++reg_idx
)
932 pmatch
[nmatch
+ reg_idx
].rm_so
= -1;
933 pmatch
[nmatch
+ reg_idx
].rm_eo
= -1;
937 for (reg_idx
= 0; reg_idx
+ 1 < nmatch
; reg_idx
++)
938 if (dfa
->subexp_map
[reg_idx
] != reg_idx
)
940 pmatch
[reg_idx
+ 1].rm_so
941 = pmatch
[dfa
->subexp_map
[reg_idx
] + 1].rm_so
;
942 pmatch
[reg_idx
+ 1].rm_eo
943 = pmatch
[dfa
->subexp_map
[reg_idx
] + 1].rm_eo
;
948 re_free (mctx
.state_log
);
950 match_ctx_free (&mctx
);
951 re_string_destruct (&mctx
.input
);
956 __attribute_warn_unused_result__
957 prune_impossible_nodes (mctx
)
958 re_match_context_t
*mctx
;
960 const re_dfa_t
*const dfa
= mctx
->dfa
;
961 int halt_node
, match_last
;
963 re_dfastate_t
**sifted_states
;
964 re_dfastate_t
**lim_states
= NULL
;
965 re_sift_context_t sctx
;
967 assert (mctx
->state_log
!= NULL
);
969 match_last
= mctx
->match_last
;
970 halt_node
= mctx
->last_node
;
972 /* Avoid overflow. */
973 if (BE (SIZE_MAX
/ sizeof (re_dfastate_t
*) <= match_last
, 0))
976 sifted_states
= re_malloc (re_dfastate_t
*, match_last
+ 1);
977 if (BE (sifted_states
== NULL
, 0))
984 lim_states
= re_malloc (re_dfastate_t
*, match_last
+ 1);
985 if (BE (lim_states
== NULL
, 0))
992 memset (lim_states
, '\0',
993 sizeof (re_dfastate_t
*) * (match_last
+ 1));
994 sift_ctx_init (&sctx
, sifted_states
, lim_states
, halt_node
,
996 ret
= sift_states_backward (mctx
, &sctx
);
997 re_node_set_free (&sctx
.limits
);
998 if (BE (ret
!= REG_NOERROR
, 0))
1000 if (sifted_states
[0] != NULL
|| lim_states
[0] != NULL
)
1010 } while (mctx
->state_log
[match_last
] == NULL
1011 || !mctx
->state_log
[match_last
]->halt
);
1012 halt_node
= check_halt_state_context (mctx
,
1013 mctx
->state_log
[match_last
],
1016 ret
= merge_state_array (dfa
, sifted_states
, lim_states
,
1018 re_free (lim_states
);
1020 if (BE (ret
!= REG_NOERROR
, 0))
1025 sift_ctx_init (&sctx
, sifted_states
, lim_states
, halt_node
, match_last
);
1026 ret
= sift_states_backward (mctx
, &sctx
);
1027 re_node_set_free (&sctx
.limits
);
1028 if (BE (ret
!= REG_NOERROR
, 0))
1030 if (sifted_states
[0] == NULL
)
1036 re_free (mctx
->state_log
);
1037 mctx
->state_log
= sifted_states
;
1038 sifted_states
= NULL
;
1039 mctx
->last_node
= halt_node
;
1040 mctx
->match_last
= match_last
;
1043 re_free (sifted_states
);
1044 re_free (lim_states
);
1048 /* Acquire an initial state and return it.
1049 We must select appropriate initial state depending on the context,
1050 since initial states may have constraints like "\<", "^", etc.. */
1052 static inline re_dfastate_t
*
1053 __attribute ((always_inline
)) internal_function
1054 acquire_init_state_context (reg_errcode_t
*err
, const re_match_context_t
*mctx
,
1057 const re_dfa_t
*const dfa
= mctx
->dfa
;
1058 if (dfa
->init_state
->has_constraint
)
1060 unsigned int context
;
1061 context
= re_string_context_at (&mctx
->input
, idx
- 1, mctx
->eflags
);
1062 if (IS_WORD_CONTEXT (context
))
1063 return dfa
->init_state_word
;
1064 else if (IS_ORDINARY_CONTEXT (context
))
1065 return dfa
->init_state
;
1066 else if (IS_BEGBUF_CONTEXT (context
) && IS_NEWLINE_CONTEXT (context
))
1067 return dfa
->init_state_begbuf
;
1068 else if (IS_NEWLINE_CONTEXT (context
))
1069 return dfa
->init_state_nl
;
1070 else if (IS_BEGBUF_CONTEXT (context
))
1072 /* It is relatively rare case, then calculate on demand. */
1073 return re_acquire_state_context (err
, dfa
,
1074 dfa
->init_state
->entrance_nodes
,
1078 /* Must not happen? */
1079 return dfa
->init_state
;
1082 return dfa
->init_state
;
1085 /* Check whether the regular expression match input string INPUT or not,
1086 and return the index where the matching end, return -1 if not match,
1087 or return -2 in case of an error.
1088 FL_LONGEST_MATCH means we want the POSIX longest matching.
1089 If P_MATCH_FIRST is not NULL, and the match fails, it is set to the
1090 next place where we may want to try matching.
1091 Note that the matcher assume that the maching starts from the current
1092 index of the buffer. */
1095 internal_function __attribute_warn_unused_result__
1096 check_matching (re_match_context_t
*mctx
, int fl_longest_match
,
1099 const re_dfa_t
*const dfa
= mctx
->dfa
;
1102 int match_last
= -1;
1103 int cur_str_idx
= re_string_cur_idx (&mctx
->input
);
1104 re_dfastate_t
*cur_state
;
1105 int at_init_state
= p_match_first
!= NULL
;
1106 int next_start_idx
= cur_str_idx
;
1109 cur_state
= acquire_init_state_context (&err
, mctx
, cur_str_idx
);
1110 /* An initial state must not be NULL (invalid). */
1111 if (BE (cur_state
== NULL
, 0))
1113 assert (err
== REG_ESPACE
);
1117 if (mctx
->state_log
!= NULL
)
1119 mctx
->state_log
[cur_str_idx
] = cur_state
;
1121 /* Check OP_OPEN_SUBEXP in the initial state in case that we use them
1122 later. E.g. Processing back references. */
1123 if (BE (dfa
->nbackref
, 0))
1126 err
= check_subexp_matching_top (mctx
, &cur_state
->nodes
, 0);
1127 if (BE (err
!= REG_NOERROR
, 0))
1130 if (cur_state
->has_backref
)
1132 err
= transit_state_bkref (mctx
, &cur_state
->nodes
);
1133 if (BE (err
!= REG_NOERROR
, 0))
1139 /* If the RE accepts NULL string. */
1140 if (BE (cur_state
->halt
, 0))
1142 if (!cur_state
->has_constraint
1143 || check_halt_state_context (mctx
, cur_state
, cur_str_idx
))
1145 if (!fl_longest_match
)
1149 match_last
= cur_str_idx
;
1155 while (!re_string_eoi (&mctx
->input
))
1157 re_dfastate_t
*old_state
= cur_state
;
1158 int next_char_idx
= re_string_cur_idx (&mctx
->input
) + 1;
1160 if ((BE (next_char_idx
>= mctx
->input
.bufs_len
, 0)
1161 && mctx
->input
.bufs_len
< mctx
->input
.len
)
1162 || (BE (next_char_idx
>= mctx
->input
.valid_len
, 0)
1163 && mctx
->input
.valid_len
< mctx
->input
.len
))
1165 err
= extend_buffers (mctx
, next_char_idx
+ 1);
1166 if (BE (err
!= REG_NOERROR
, 0))
1168 assert (err
== REG_ESPACE
);
1173 cur_state
= transit_state (&err
, mctx
, cur_state
);
1174 if (mctx
->state_log
!= NULL
)
1175 cur_state
= merge_state_with_log (&err
, mctx
, cur_state
);
1177 if (cur_state
== NULL
)
1179 /* Reached the invalid state or an error. Try to recover a valid
1180 state using the state log, if available and if we have not
1181 already found a valid (even if not the longest) match. */
1182 if (BE (err
!= REG_NOERROR
, 0))
1185 if (mctx
->state_log
== NULL
1186 || (match
&& !fl_longest_match
)
1187 || (cur_state
= find_recover_state (&err
, mctx
)) == NULL
)
1191 if (BE (at_init_state
, 0))
1193 if (old_state
== cur_state
)
1194 next_start_idx
= next_char_idx
;
1199 if (cur_state
->halt
)
1201 /* Reached a halt state.
1202 Check the halt state can satisfy the current context. */
1203 if (!cur_state
->has_constraint
1204 || check_halt_state_context (mctx
, cur_state
,
1205 re_string_cur_idx (&mctx
->input
)))
1207 /* We found an appropriate halt state. */
1208 match_last
= re_string_cur_idx (&mctx
->input
);
1211 /* We found a match, do not modify match_first below. */
1212 p_match_first
= NULL
;
1213 if (!fl_longest_match
)
1220 *p_match_first
+= next_start_idx
;
1225 /* Check NODE match the current context. */
1229 check_halt_node_context (const re_dfa_t
*dfa
, int node
, unsigned int context
)
1231 re_token_type_t type
= dfa
->nodes
[node
].type
;
1232 unsigned int constraint
= dfa
->nodes
[node
].constraint
;
1233 if (type
!= END_OF_RE
)
1237 if (NOT_SATISFY_NEXT_CONSTRAINT (constraint
, context
))
1242 /* Check the halt state STATE match the current context.
1243 Return 0 if not match, if the node, STATE has, is a halt node and
1244 match the context, return the node. */
1248 check_halt_state_context (const re_match_context_t
*mctx
,
1249 const re_dfastate_t
*state
, int idx
)
1252 unsigned int context
;
1254 assert (state
->halt
);
1256 context
= re_string_context_at (&mctx
->input
, idx
, mctx
->eflags
);
1257 for (i
= 0; i
< state
->nodes
.nelem
; ++i
)
1258 if (check_halt_node_context (mctx
->dfa
, state
->nodes
.elems
[i
], context
))
1259 return state
->nodes
.elems
[i
];
1263 /* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
1264 corresponding to the DFA).
1265 Return the destination node, and update EPS_VIA_NODES, return -1 in case
1270 proceed_next_node (const re_match_context_t
*mctx
, int nregs
, regmatch_t
*regs
,
1271 int *pidx
, int node
, re_node_set
*eps_via_nodes
,
1272 struct re_fail_stack_t
*fs
)
1274 const re_dfa_t
*const dfa
= mctx
->dfa
;
1276 if (IS_EPSILON_NODE (dfa
->nodes
[node
].type
))
1278 re_node_set
*cur_nodes
= &mctx
->state_log
[*pidx
]->nodes
;
1279 re_node_set
*edests
= &dfa
->edests
[node
];
1281 err
= re_node_set_insert (eps_via_nodes
, node
);
1282 if (BE (err
< 0, 0))
1284 /* Pick up a valid destination, or return -1 if none is found. */
1285 for (dest_node
= -1, i
= 0; i
< edests
->nelem
; ++i
)
1287 int candidate
= edests
->elems
[i
];
1288 if (!re_node_set_contains (cur_nodes
, candidate
))
1290 if (dest_node
== -1)
1291 dest_node
= candidate
;
1295 /* In order to avoid infinite loop like "(a*)*", return the second
1296 epsilon-transition if the first was already considered. */
1297 if (re_node_set_contains (eps_via_nodes
, dest_node
))
1300 /* Otherwise, push the second epsilon-transition on the fail stack. */
1302 && push_fail_stack (fs
, *pidx
, candidate
, nregs
, regs
,
1306 /* We know we are going to exit. */
1315 re_token_type_t type
= dfa
->nodes
[node
].type
;
1317 #ifdef RE_ENABLE_I18N
1318 if (dfa
->nodes
[node
].accept_mb
)
1319 naccepted
= check_node_accept_bytes (dfa
, node
, &mctx
->input
, *pidx
);
1321 #endif /* RE_ENABLE_I18N */
1322 if (type
== OP_BACK_REF
)
1324 int subexp_idx
= dfa
->nodes
[node
].opr
.idx
+ 1;
1325 naccepted
= regs
[subexp_idx
].rm_eo
- regs
[subexp_idx
].rm_so
;
1328 if (regs
[subexp_idx
].rm_so
== -1 || regs
[subexp_idx
].rm_eo
== -1)
1332 char *buf
= (char *) re_string_get_buffer (&mctx
->input
);
1333 if (memcmp (buf
+ regs
[subexp_idx
].rm_so
, buf
+ *pidx
,
1342 err
= re_node_set_insert (eps_via_nodes
, node
);
1343 if (BE (err
< 0, 0))
1345 dest_node
= dfa
->edests
[node
].elems
[0];
1346 if (re_node_set_contains (&mctx
->state_log
[*pidx
]->nodes
,
1353 || check_node_accept (mctx
, dfa
->nodes
+ node
, *pidx
))
1355 int dest_node
= dfa
->nexts
[node
];
1356 *pidx
= (naccepted
== 0) ? *pidx
+ 1 : *pidx
+ naccepted
;
1357 if (fs
&& (*pidx
> mctx
->match_last
|| mctx
->state_log
[*pidx
] == NULL
1358 || !re_node_set_contains (&mctx
->state_log
[*pidx
]->nodes
,
1361 re_node_set_empty (eps_via_nodes
);
1368 static reg_errcode_t
1369 internal_function __attribute_warn_unused_result__
1370 push_fail_stack (struct re_fail_stack_t
*fs
, int str_idx
, int dest_node
,
1371 int nregs
, regmatch_t
*regs
, re_node_set
*eps_via_nodes
)
1374 int num
= fs
->num
++;
1375 if (fs
->num
== fs
->alloc
)
1377 struct re_fail_stack_ent_t
*new_array
;
1378 new_array
= realloc (fs
->stack
, (sizeof (struct re_fail_stack_ent_t
)
1380 if (new_array
== NULL
)
1383 fs
->stack
= new_array
;
1385 fs
->stack
[num
].idx
= str_idx
;
1386 fs
->stack
[num
].node
= dest_node
;
1387 fs
->stack
[num
].regs
= re_malloc (regmatch_t
, nregs
);
1388 if (fs
->stack
[num
].regs
== NULL
)
1390 memcpy (fs
->stack
[num
].regs
, regs
, sizeof (regmatch_t
) * nregs
);
1391 err
= re_node_set_init_copy (&fs
->stack
[num
].eps_via_nodes
, eps_via_nodes
);
1397 pop_fail_stack (struct re_fail_stack_t
*fs
, int *pidx
, int nregs
,
1398 regmatch_t
*regs
, re_node_set
*eps_via_nodes
)
1400 int num
= --fs
->num
;
1402 *pidx
= fs
->stack
[num
].idx
;
1403 memcpy (regs
, fs
->stack
[num
].regs
, sizeof (regmatch_t
) * nregs
);
1404 re_node_set_free (eps_via_nodes
);
1405 re_free (fs
->stack
[num
].regs
);
1406 *eps_via_nodes
= fs
->stack
[num
].eps_via_nodes
;
1407 return fs
->stack
[num
].node
;
1410 /* Set the positions where the subexpressions are starts/ends to registers
1412 Note: We assume that pmatch[0] is already set, and
1413 pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch. */
1415 static reg_errcode_t
1416 internal_function __attribute_warn_unused_result__
1417 set_regs (const regex_t
*preg
, const re_match_context_t
*mctx
, size_t nmatch
,
1418 regmatch_t
*pmatch
, int fl_backtrack
)
1420 const re_dfa_t
*dfa
= (const re_dfa_t
*) preg
->buffer
;
1422 re_node_set eps_via_nodes
;
1423 struct re_fail_stack_t
*fs
;
1424 struct re_fail_stack_t fs_body
= { 0, 2, NULL
};
1425 regmatch_t
*prev_idx_match
;
1426 int prev_idx_match_malloced
= 0;
1429 assert (nmatch
> 1);
1430 assert (mctx
->state_log
!= NULL
);
1435 fs
->stack
= re_malloc (struct re_fail_stack_ent_t
, fs
->alloc
);
1436 if (fs
->stack
== NULL
)
1442 cur_node
= dfa
->init_node
;
1443 re_node_set_init_empty (&eps_via_nodes
);
1445 if (__libc_use_alloca (nmatch
* sizeof (regmatch_t
)))
1446 prev_idx_match
= (regmatch_t
*) alloca (nmatch
* sizeof (regmatch_t
));
1449 prev_idx_match
= re_malloc (regmatch_t
, nmatch
);
1450 if (prev_idx_match
== NULL
)
1452 free_fail_stack_return (fs
);
1455 prev_idx_match_malloced
= 1;
1457 memcpy (prev_idx_match
, pmatch
, sizeof (regmatch_t
) * nmatch
);
1459 for (idx
= pmatch
[0].rm_so
; idx
<= pmatch
[0].rm_eo
;)
1461 update_regs (dfa
, pmatch
, prev_idx_match
, cur_node
, idx
, nmatch
);
1463 if (idx
== pmatch
[0].rm_eo
&& cur_node
== mctx
->last_node
)
1468 for (reg_idx
= 0; reg_idx
< nmatch
; ++reg_idx
)
1469 if (pmatch
[reg_idx
].rm_so
> -1 && pmatch
[reg_idx
].rm_eo
== -1)
1471 if (reg_idx
== nmatch
)
1473 re_node_set_free (&eps_via_nodes
);
1474 if (prev_idx_match_malloced
)
1475 re_free (prev_idx_match
);
1476 return free_fail_stack_return (fs
);
1478 cur_node
= pop_fail_stack (fs
, &idx
, nmatch
, pmatch
,
1483 re_node_set_free (&eps_via_nodes
);
1484 if (prev_idx_match_malloced
)
1485 re_free (prev_idx_match
);
1490 /* Proceed to next node. */
1491 cur_node
= proceed_next_node (mctx
, nmatch
, pmatch
, &idx
, cur_node
,
1492 &eps_via_nodes
, fs
);
1494 if (BE (cur_node
< 0, 0))
1496 if (BE (cur_node
== -2, 0))
1498 re_node_set_free (&eps_via_nodes
);
1499 if (prev_idx_match_malloced
)
1500 re_free (prev_idx_match
);
1501 free_fail_stack_return (fs
);
1505 cur_node
= pop_fail_stack (fs
, &idx
, nmatch
, pmatch
,
1509 re_node_set_free (&eps_via_nodes
);
1510 if (prev_idx_match_malloced
)
1511 re_free (prev_idx_match
);
1516 re_node_set_free (&eps_via_nodes
);
1517 if (prev_idx_match_malloced
)
1518 re_free (prev_idx_match
);
1519 return free_fail_stack_return (fs
);
1522 static reg_errcode_t
1524 free_fail_stack_return (struct re_fail_stack_t
*fs
)
1529 for (fs_idx
= 0; fs_idx
< fs
->num
; ++fs_idx
)
1531 re_node_set_free (&fs
->stack
[fs_idx
].eps_via_nodes
);
1532 re_free (fs
->stack
[fs_idx
].regs
);
1534 re_free (fs
->stack
);
1541 update_regs (const re_dfa_t
*dfa
, regmatch_t
*pmatch
,
1542 regmatch_t
*prev_idx_match
, int cur_node
, int cur_idx
, int nmatch
)
1544 int type
= dfa
->nodes
[cur_node
].type
;
1545 if (type
== OP_OPEN_SUBEXP
)
1547 int reg_num
= dfa
->nodes
[cur_node
].opr
.idx
+ 1;
1549 /* We are at the first node of this sub expression. */
1550 if (reg_num
< nmatch
)
1552 pmatch
[reg_num
].rm_so
= cur_idx
;
1553 pmatch
[reg_num
].rm_eo
= -1;
1556 else if (type
== OP_CLOSE_SUBEXP
)
1558 int reg_num
= dfa
->nodes
[cur_node
].opr
.idx
+ 1;
1559 if (reg_num
< nmatch
)
1561 /* We are at the last node of this sub expression. */
1562 if (pmatch
[reg_num
].rm_so
< cur_idx
)
1564 pmatch
[reg_num
].rm_eo
= cur_idx
;
1565 /* This is a non-empty match or we are not inside an optional
1566 subexpression. Accept this right away. */
1567 memcpy (prev_idx_match
, pmatch
, sizeof (regmatch_t
) * nmatch
);
1571 if (dfa
->nodes
[cur_node
].opt_subexp
1572 && prev_idx_match
[reg_num
].rm_so
!= -1)
1573 /* We transited through an empty match for an optional
1574 subexpression, like (a?)*, and this is not the subexp's
1575 first match. Copy back the old content of the registers
1576 so that matches of an inner subexpression are undone as
1577 well, like in ((a?))*. */
1578 memcpy (pmatch
, prev_idx_match
, sizeof (regmatch_t
) * nmatch
);
1580 /* We completed a subexpression, but it may be part of
1581 an optional one, so do not update PREV_IDX_MATCH. */
1582 pmatch
[reg_num
].rm_eo
= cur_idx
;
1588 /* This function checks the STATE_LOG from the SCTX->last_str_idx to 0
1589 and sift the nodes in each states according to the following rules.
1590 Updated state_log will be wrote to STATE_LOG.
1592 Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if...
1593 1. When STR_IDX == MATCH_LAST(the last index in the state_log):
1594 If `a' isn't the LAST_NODE and `a' can't epsilon transit to
1595 the LAST_NODE, we throw away the node `a'.
1596 2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts
1597 string `s' and transit to `b':
1598 i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
1600 ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
1601 thrown away, we throw away the node `a'.
1602 3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b':
1603 i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
1605 ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away,
1606 we throw away the node `a'. */
1608 #define STATE_NODE_CONTAINS(state,node) \
1609 ((state) != NULL && re_node_set_contains (&(state)->nodes, node))
1611 static reg_errcode_t
1613 sift_states_backward (const re_match_context_t
*mctx
, re_sift_context_t
*sctx
)
1617 int str_idx
= sctx
->last_str_idx
;
1618 re_node_set cur_dest
;
1621 assert (mctx
->state_log
!= NULL
&& mctx
->state_log
[str_idx
] != NULL
);
1624 /* Build sifted state_log[str_idx]. It has the nodes which can epsilon
1625 transit to the last_node and the last_node itself. */
1626 err
= re_node_set_init_1 (&cur_dest
, sctx
->last_node
);
1627 if (BE (err
!= REG_NOERROR
, 0))
1629 err
= update_cur_sifted_state (mctx
, sctx
, str_idx
, &cur_dest
);
1630 if (BE (err
!= REG_NOERROR
, 0))
1633 /* Then check each states in the state_log. */
1636 /* Update counters. */
1637 null_cnt
= (sctx
->sifted_states
[str_idx
] == NULL
) ? null_cnt
+ 1 : 0;
1638 if (null_cnt
> mctx
->max_mb_elem_len
)
1640 memset (sctx
->sifted_states
, '\0',
1641 sizeof (re_dfastate_t
*) * str_idx
);
1642 re_node_set_free (&cur_dest
);
1645 re_node_set_empty (&cur_dest
);
1648 if (mctx
->state_log
[str_idx
])
1650 err
= build_sifted_states (mctx
, sctx
, str_idx
, &cur_dest
);
1651 if (BE (err
!= REG_NOERROR
, 0))
1655 /* Add all the nodes which satisfy the following conditions:
1656 - It can epsilon transit to a node in CUR_DEST.
1658 And update state_log. */
1659 err
= update_cur_sifted_state (mctx
, sctx
, str_idx
, &cur_dest
);
1660 if (BE (err
!= REG_NOERROR
, 0))
1665 re_node_set_free (&cur_dest
);
1669 static reg_errcode_t
1670 internal_function __attribute_warn_unused_result__
1671 build_sifted_states (const re_match_context_t
*mctx
, re_sift_context_t
*sctx
,
1672 int str_idx
, re_node_set
*cur_dest
)
1674 const re_dfa_t
*const dfa
= mctx
->dfa
;
1675 const re_node_set
*cur_src
= &mctx
->state_log
[str_idx
]->non_eps_nodes
;
1678 /* Then build the next sifted state.
1679 We build the next sifted state on `cur_dest', and update
1680 `sifted_states[str_idx]' with `cur_dest'.
1682 `cur_dest' is the sifted state from `state_log[str_idx + 1]'.
1683 `cur_src' points the node_set of the old `state_log[str_idx]'
1684 (with the epsilon nodes pre-filtered out). */
1685 for (i
= 0; i
< cur_src
->nelem
; i
++)
1687 int prev_node
= cur_src
->elems
[i
];
1692 re_token_type_t type
= dfa
->nodes
[prev_node
].type
;
1693 assert (!IS_EPSILON_NODE (type
));
1695 #ifdef RE_ENABLE_I18N
1696 /* If the node may accept `multi byte'. */
1697 if (dfa
->nodes
[prev_node
].accept_mb
)
1698 naccepted
= sift_states_iter_mb (mctx
, sctx
, prev_node
,
1699 str_idx
, sctx
->last_str_idx
);
1700 #endif /* RE_ENABLE_I18N */
1702 /* We don't check backreferences here.
1703 See update_cur_sifted_state(). */
1705 && check_node_accept (mctx
, dfa
->nodes
+ prev_node
, str_idx
)
1706 && STATE_NODE_CONTAINS (sctx
->sifted_states
[str_idx
+ 1],
1707 dfa
->nexts
[prev_node
]))
1713 if (sctx
->limits
.nelem
)
1715 int to_idx
= str_idx
+ naccepted
;
1716 if (check_dst_limits (mctx
, &sctx
->limits
,
1717 dfa
->nexts
[prev_node
], to_idx
,
1718 prev_node
, str_idx
))
1721 ret
= re_node_set_insert (cur_dest
, prev_node
);
1722 if (BE (ret
== -1, 0))
1729 /* Helper functions. */
1731 static reg_errcode_t
1733 clean_state_log_if_needed (re_match_context_t
*mctx
, int next_state_log_idx
)
1735 int top
= mctx
->state_log_top
;
1737 if ((next_state_log_idx
>= mctx
->input
.bufs_len
1738 && mctx
->input
.bufs_len
< mctx
->input
.len
)
1739 || (next_state_log_idx
>= mctx
->input
.valid_len
1740 && mctx
->input
.valid_len
< mctx
->input
.len
))
1743 err
= extend_buffers (mctx
, next_state_log_idx
+ 1);
1744 if (BE (err
!= REG_NOERROR
, 0))
1748 if (top
< next_state_log_idx
)
1750 memset (mctx
->state_log
+ top
+ 1, '\0',
1751 sizeof (re_dfastate_t
*) * (next_state_log_idx
- top
));
1752 mctx
->state_log_top
= next_state_log_idx
;
1757 static reg_errcode_t
1759 merge_state_array (const re_dfa_t
*dfa
, re_dfastate_t
**dst
,
1760 re_dfastate_t
**src
, int num
)
1764 for (st_idx
= 0; st_idx
< num
; ++st_idx
)
1766 if (dst
[st_idx
] == NULL
)
1767 dst
[st_idx
] = src
[st_idx
];
1768 else if (src
[st_idx
] != NULL
)
1770 re_node_set merged_set
;
1771 err
= re_node_set_init_union (&merged_set
, &dst
[st_idx
]->nodes
,
1772 &src
[st_idx
]->nodes
);
1773 if (BE (err
!= REG_NOERROR
, 0))
1775 dst
[st_idx
] = re_acquire_state (&err
, dfa
, &merged_set
);
1776 re_node_set_free (&merged_set
);
1777 if (BE (err
!= REG_NOERROR
, 0))
1784 static reg_errcode_t
1786 update_cur_sifted_state (const re_match_context_t
*mctx
,
1787 re_sift_context_t
*sctx
, int str_idx
,
1788 re_node_set
*dest_nodes
)
1790 const re_dfa_t
*const dfa
= mctx
->dfa
;
1791 reg_errcode_t err
= REG_NOERROR
;
1792 const re_node_set
*candidates
;
1793 candidates
= ((mctx
->state_log
[str_idx
] == NULL
) ? NULL
1794 : &mctx
->state_log
[str_idx
]->nodes
);
1796 if (dest_nodes
->nelem
== 0)
1797 sctx
->sifted_states
[str_idx
] = NULL
;
1802 /* At first, add the nodes which can epsilon transit to a node in
1804 err
= add_epsilon_src_nodes (dfa
, dest_nodes
, candidates
);
1805 if (BE (err
!= REG_NOERROR
, 0))
1808 /* Then, check the limitations in the current sift_context. */
1809 if (sctx
->limits
.nelem
)
1811 err
= check_subexp_limits (dfa
, dest_nodes
, candidates
, &sctx
->limits
,
1812 mctx
->bkref_ents
, str_idx
);
1813 if (BE (err
!= REG_NOERROR
, 0))
1818 sctx
->sifted_states
[str_idx
] = re_acquire_state (&err
, dfa
, dest_nodes
);
1819 if (BE (err
!= REG_NOERROR
, 0))
1823 if (candidates
&& mctx
->state_log
[str_idx
]->has_backref
)
1825 err
= sift_states_bkref (mctx
, sctx
, str_idx
, candidates
);
1826 if (BE (err
!= REG_NOERROR
, 0))
1832 static reg_errcode_t
1833 internal_function __attribute_warn_unused_result__
1834 add_epsilon_src_nodes (const re_dfa_t
*dfa
, re_node_set
*dest_nodes
,
1835 const re_node_set
*candidates
)
1837 reg_errcode_t err
= REG_NOERROR
;
1840 re_dfastate_t
*state
= re_acquire_state (&err
, dfa
, dest_nodes
);
1841 if (BE (err
!= REG_NOERROR
, 0))
1844 if (!state
->inveclosure
.alloc
)
1846 err
= re_node_set_alloc (&state
->inveclosure
, dest_nodes
->nelem
);
1847 if (BE (err
!= REG_NOERROR
, 0))
1849 for (i
= 0; i
< dest_nodes
->nelem
; i
++)
1851 err
= re_node_set_merge (&state
->inveclosure
,
1852 dfa
->inveclosures
+ dest_nodes
->elems
[i
]);
1853 if (BE (err
!= REG_NOERROR
, 0))
1857 return re_node_set_add_intersect (dest_nodes
, candidates
,
1858 &state
->inveclosure
);
1861 static reg_errcode_t
1863 sub_epsilon_src_nodes (const re_dfa_t
*dfa
, int node
, re_node_set
*dest_nodes
,
1864 const re_node_set
*candidates
)
1868 re_node_set
*inv_eclosure
= dfa
->inveclosures
+ node
;
1869 re_node_set except_nodes
;
1870 re_node_set_init_empty (&except_nodes
);
1871 for (ecl_idx
= 0; ecl_idx
< inv_eclosure
->nelem
; ++ecl_idx
)
1873 int cur_node
= inv_eclosure
->elems
[ecl_idx
];
1874 if (cur_node
== node
)
1876 if (IS_EPSILON_NODE (dfa
->nodes
[cur_node
].type
))
1878 int edst1
= dfa
->edests
[cur_node
].elems
[0];
1879 int edst2
= ((dfa
->edests
[cur_node
].nelem
> 1)
1880 ? dfa
->edests
[cur_node
].elems
[1] : -1);
1881 if ((!re_node_set_contains (inv_eclosure
, edst1
)
1882 && re_node_set_contains (dest_nodes
, edst1
))
1884 && !re_node_set_contains (inv_eclosure
, edst2
)
1885 && re_node_set_contains (dest_nodes
, edst2
)))
1887 err
= re_node_set_add_intersect (&except_nodes
, candidates
,
1888 dfa
->inveclosures
+ cur_node
);
1889 if (BE (err
!= REG_NOERROR
, 0))
1891 re_node_set_free (&except_nodes
);
1897 for (ecl_idx
= 0; ecl_idx
< inv_eclosure
->nelem
; ++ecl_idx
)
1899 int cur_node
= inv_eclosure
->elems
[ecl_idx
];
1900 if (!re_node_set_contains (&except_nodes
, cur_node
))
1902 int idx
= re_node_set_contains (dest_nodes
, cur_node
) - 1;
1903 re_node_set_remove_at (dest_nodes
, idx
);
1906 re_node_set_free (&except_nodes
);
1912 check_dst_limits (const re_match_context_t
*mctx
, re_node_set
*limits
,
1913 int dst_node
, int dst_idx
, int src_node
, int src_idx
)
1915 const re_dfa_t
*const dfa
= mctx
->dfa
;
1916 int lim_idx
, src_pos
, dst_pos
;
1918 int dst_bkref_idx
= search_cur_bkref_entry (mctx
, dst_idx
);
1919 int src_bkref_idx
= search_cur_bkref_entry (mctx
, src_idx
);
1920 for (lim_idx
= 0; lim_idx
< limits
->nelem
; ++lim_idx
)
1923 struct re_backref_cache_entry
*ent
;
1924 ent
= mctx
->bkref_ents
+ limits
->elems
[lim_idx
];
1925 subexp_idx
= dfa
->nodes
[ent
->node
].opr
.idx
;
1927 dst_pos
= check_dst_limits_calc_pos (mctx
, limits
->elems
[lim_idx
],
1928 subexp_idx
, dst_node
, dst_idx
,
1930 src_pos
= check_dst_limits_calc_pos (mctx
, limits
->elems
[lim_idx
],
1931 subexp_idx
, src_node
, src_idx
,
1935 <src> <dst> ( <subexp> )
1936 ( <subexp> ) <src> <dst>
1937 ( <subexp1> <src> <subexp2> <dst> <subexp3> ) */
1938 if (src_pos
== dst_pos
)
1939 continue; /* This is unrelated limitation. */
1948 check_dst_limits_calc_pos_1 (const re_match_context_t
*mctx
, int boundaries
,
1949 int subexp_idx
, int from_node
, int bkref_idx
)
1951 const re_dfa_t
*const dfa
= mctx
->dfa
;
1952 const re_node_set
*eclosures
= dfa
->eclosures
+ from_node
;
1955 /* Else, we are on the boundary: examine the nodes on the epsilon
1957 for (node_idx
= 0; node_idx
< eclosures
->nelem
; ++node_idx
)
1959 int node
= eclosures
->elems
[node_idx
];
1960 switch (dfa
->nodes
[node
].type
)
1963 if (bkref_idx
!= -1)
1965 struct re_backref_cache_entry
*ent
= mctx
->bkref_ents
+ bkref_idx
;
1970 if (ent
->node
!= node
)
1973 if (subexp_idx
< BITSET_WORD_BITS
1974 && !(ent
->eps_reachable_subexps_map
1975 & ((bitset_word_t
) 1 << subexp_idx
)))
1978 /* Recurse trying to reach the OP_OPEN_SUBEXP and
1979 OP_CLOSE_SUBEXP cases below. But, if the
1980 destination node is the same node as the source
1981 node, don't recurse because it would cause an
1982 infinite loop: a regex that exhibits this behavior
1984 dst
= dfa
->edests
[node
].elems
[0];
1985 if (dst
== from_node
)
1989 else /* if (boundaries & 2) */
1994 check_dst_limits_calc_pos_1 (mctx
, boundaries
, subexp_idx
,
1996 if (cpos
== -1 /* && (boundaries & 1) */)
1998 if (cpos
== 0 && (boundaries
& 2))
2001 if (subexp_idx
< BITSET_WORD_BITS
)
2002 ent
->eps_reachable_subexps_map
2003 &= ~((bitset_word_t
) 1 << subexp_idx
);
2005 while (ent
++->more
);
2009 case OP_OPEN_SUBEXP
:
2010 if ((boundaries
& 1) && subexp_idx
== dfa
->nodes
[node
].opr
.idx
)
2014 case OP_CLOSE_SUBEXP
:
2015 if ((boundaries
& 2) && subexp_idx
== dfa
->nodes
[node
].opr
.idx
)
2024 return (boundaries
& 2) ? 1 : 0;
2029 check_dst_limits_calc_pos (const re_match_context_t
*mctx
, int limit
,
2030 int subexp_idx
, int from_node
, int str_idx
,
2033 struct re_backref_cache_entry
*lim
= mctx
->bkref_ents
+ limit
;
2036 /* If we are outside the range of the subexpression, return -1 or 1. */
2037 if (str_idx
< lim
->subexp_from
)
2040 if (lim
->subexp_to
< str_idx
)
2043 /* If we are within the subexpression, return 0. */
2044 boundaries
= (str_idx
== lim
->subexp_from
);
2045 boundaries
|= (str_idx
== lim
->subexp_to
) << 1;
2046 if (boundaries
== 0)
2049 /* Else, examine epsilon closure. */
2050 return check_dst_limits_calc_pos_1 (mctx
, boundaries
, subexp_idx
,
2051 from_node
, bkref_idx
);
2054 /* Check the limitations of sub expressions LIMITS, and remove the nodes
2055 which are against limitations from DEST_NODES. */
2057 static reg_errcode_t
2059 check_subexp_limits (const re_dfa_t
*dfa
, re_node_set
*dest_nodes
,
2060 const re_node_set
*candidates
, re_node_set
*limits
,
2061 struct re_backref_cache_entry
*bkref_ents
, int str_idx
)
2064 int node_idx
, lim_idx
;
2066 for (lim_idx
= 0; lim_idx
< limits
->nelem
; ++lim_idx
)
2069 struct re_backref_cache_entry
*ent
;
2070 ent
= bkref_ents
+ limits
->elems
[lim_idx
];
2072 if (str_idx
<= ent
->subexp_from
|| ent
->str_idx
< str_idx
)
2073 continue; /* This is unrelated limitation. */
2075 subexp_idx
= dfa
->nodes
[ent
->node
].opr
.idx
;
2076 if (ent
->subexp_to
== str_idx
)
2080 for (node_idx
= 0; node_idx
< dest_nodes
->nelem
; ++node_idx
)
2082 int node
= dest_nodes
->elems
[node_idx
];
2083 re_token_type_t type
= dfa
->nodes
[node
].type
;
2084 if (type
== OP_OPEN_SUBEXP
2085 && subexp_idx
== dfa
->nodes
[node
].opr
.idx
)
2087 else if (type
== OP_CLOSE_SUBEXP
2088 && subexp_idx
== dfa
->nodes
[node
].opr
.idx
)
2092 /* Check the limitation of the open subexpression. */
2093 /* Note that (ent->subexp_to = str_idx != ent->subexp_from). */
2096 err
= sub_epsilon_src_nodes (dfa
, ops_node
, dest_nodes
,
2098 if (BE (err
!= REG_NOERROR
, 0))
2102 /* Check the limitation of the close subexpression. */
2104 for (node_idx
= 0; node_idx
< dest_nodes
->nelem
; ++node_idx
)
2106 int node
= dest_nodes
->elems
[node_idx
];
2107 if (!re_node_set_contains (dfa
->inveclosures
+ node
,
2109 && !re_node_set_contains (dfa
->eclosures
+ node
,
2112 /* It is against this limitation.
2113 Remove it form the current sifted state. */
2114 err
= sub_epsilon_src_nodes (dfa
, node
, dest_nodes
,
2116 if (BE (err
!= REG_NOERROR
, 0))
2122 else /* (ent->subexp_to != str_idx) */
2124 for (node_idx
= 0; node_idx
< dest_nodes
->nelem
; ++node_idx
)
2126 int node
= dest_nodes
->elems
[node_idx
];
2127 re_token_type_t type
= dfa
->nodes
[node
].type
;
2128 if (type
== OP_CLOSE_SUBEXP
|| type
== OP_OPEN_SUBEXP
)
2130 if (subexp_idx
!= dfa
->nodes
[node
].opr
.idx
)
2132 /* It is against this limitation.
2133 Remove it form the current sifted state. */
2134 err
= sub_epsilon_src_nodes (dfa
, node
, dest_nodes
,
2136 if (BE (err
!= REG_NOERROR
, 0))
2145 static reg_errcode_t
2146 internal_function __attribute_warn_unused_result__
2147 sift_states_bkref (const re_match_context_t
*mctx
, re_sift_context_t
*sctx
,
2148 int str_idx
, const re_node_set
*candidates
)
2150 const re_dfa_t
*const dfa
= mctx
->dfa
;
2153 re_sift_context_t local_sctx
;
2154 int first_idx
= search_cur_bkref_entry (mctx
, str_idx
);
2156 if (first_idx
== -1)
2159 local_sctx
.sifted_states
= NULL
; /* Mark that it hasn't been initialized. */
2161 for (node_idx
= 0; node_idx
< candidates
->nelem
; ++node_idx
)
2164 re_token_type_t type
;
2165 struct re_backref_cache_entry
*entry
;
2166 node
= candidates
->elems
[node_idx
];
2167 type
= dfa
->nodes
[node
].type
;
2168 /* Avoid infinite loop for the REs like "()\1+". */
2169 if (node
== sctx
->last_node
&& str_idx
== sctx
->last_str_idx
)
2171 if (type
!= OP_BACK_REF
)
2174 entry
= mctx
->bkref_ents
+ first_idx
;
2175 enabled_idx
= first_idx
;
2182 re_dfastate_t
*cur_state
;
2184 if (entry
->node
!= node
)
2186 subexp_len
= entry
->subexp_to
- entry
->subexp_from
;
2187 to_idx
= str_idx
+ subexp_len
;
2188 dst_node
= (subexp_len
? dfa
->nexts
[node
]
2189 : dfa
->edests
[node
].elems
[0]);
2191 if (to_idx
> sctx
->last_str_idx
2192 || sctx
->sifted_states
[to_idx
] == NULL
2193 || !STATE_NODE_CONTAINS (sctx
->sifted_states
[to_idx
], dst_node
)
2194 || check_dst_limits (mctx
, &sctx
->limits
, node
,
2195 str_idx
, dst_node
, to_idx
))
2198 if (local_sctx
.sifted_states
== NULL
)
2201 err
= re_node_set_init_copy (&local_sctx
.limits
, &sctx
->limits
);
2202 if (BE (err
!= REG_NOERROR
, 0))
2205 local_sctx
.last_node
= node
;
2206 local_sctx
.last_str_idx
= str_idx
;
2207 ret
= re_node_set_insert (&local_sctx
.limits
, enabled_idx
);
2208 if (BE (ret
< 0, 0))
2213 cur_state
= local_sctx
.sifted_states
[str_idx
];
2214 err
= sift_states_backward (mctx
, &local_sctx
);
2215 if (BE (err
!= REG_NOERROR
, 0))
2217 if (sctx
->limited_states
!= NULL
)
2219 err
= merge_state_array (dfa
, sctx
->limited_states
,
2220 local_sctx
.sifted_states
,
2222 if (BE (err
!= REG_NOERROR
, 0))
2225 local_sctx
.sifted_states
[str_idx
] = cur_state
;
2226 re_node_set_remove (&local_sctx
.limits
, enabled_idx
);
2228 /* mctx->bkref_ents may have changed, reload the pointer. */
2229 entry
= mctx
->bkref_ents
+ enabled_idx
;
2231 while (enabled_idx
++, entry
++->more
);
2235 if (local_sctx
.sifted_states
!= NULL
)
2237 re_node_set_free (&local_sctx
.limits
);
2244 #ifdef RE_ENABLE_I18N
2247 sift_states_iter_mb (const re_match_context_t
*mctx
, re_sift_context_t
*sctx
,
2248 int node_idx
, int str_idx
, int max_str_idx
)
2250 const re_dfa_t
*const dfa
= mctx
->dfa
;
2252 /* Check the node can accept `multi byte'. */
2253 naccepted
= check_node_accept_bytes (dfa
, node_idx
, &mctx
->input
, str_idx
);
2254 if (naccepted
> 0 && str_idx
+ naccepted
<= max_str_idx
&&
2255 !STATE_NODE_CONTAINS (sctx
->sifted_states
[str_idx
+ naccepted
],
2256 dfa
->nexts
[node_idx
]))
2257 /* The node can't accept the `multi byte', or the
2258 destination was already thrown away, then the node
2259 could't accept the current input `multi byte'. */
2261 /* Otherwise, it is sure that the node could accept
2262 `naccepted' bytes input. */
2265 #endif /* RE_ENABLE_I18N */
2268 /* Functions for state transition. */
2270 /* Return the next state to which the current state STATE will transit by
2271 accepting the current input byte, and update STATE_LOG if necessary.
2272 If STATE can accept a multibyte char/collating element/back reference
2273 update the destination of STATE_LOG. */
2275 static re_dfastate_t
*
2276 internal_function __attribute_warn_unused_result__
2277 transit_state (reg_errcode_t
*err
, re_match_context_t
*mctx
,
2278 re_dfastate_t
*state
)
2280 re_dfastate_t
**trtable
;
2283 #ifdef RE_ENABLE_I18N
2284 /* If the current state can accept multibyte. */
2285 if (BE (state
->accept_mb
, 0))
2287 *err
= transit_state_mb (mctx
, state
);
2288 if (BE (*err
!= REG_NOERROR
, 0))
2291 #endif /* RE_ENABLE_I18N */
2293 /* Then decide the next state with the single byte. */
2296 /* don't use transition table */
2297 return transit_state_sb (err
, mctx
, state
);
2300 /* Use transition table */
2301 ch
= re_string_fetch_byte (&mctx
->input
);
2304 trtable
= state
->trtable
;
2305 if (BE (trtable
!= NULL
, 1))
2308 trtable
= state
->word_trtable
;
2309 if (BE (trtable
!= NULL
, 1))
2311 unsigned int context
;
2313 = re_string_context_at (&mctx
->input
,
2314 re_string_cur_idx (&mctx
->input
) - 1,
2316 if (IS_WORD_CONTEXT (context
))
2317 return trtable
[ch
+ SBC_MAX
];
2322 if (!build_trtable (mctx
->dfa
, state
))
2328 /* Retry, we now have a transition table. */
2332 /* Update the state_log if we need */
2335 merge_state_with_log (reg_errcode_t
*err
, re_match_context_t
*mctx
,
2336 re_dfastate_t
*next_state
)
2338 const re_dfa_t
*const dfa
= mctx
->dfa
;
2339 int cur_idx
= re_string_cur_idx (&mctx
->input
);
2341 if (cur_idx
> mctx
->state_log_top
)
2343 mctx
->state_log
[cur_idx
] = next_state
;
2344 mctx
->state_log_top
= cur_idx
;
2346 else if (mctx
->state_log
[cur_idx
] == 0)
2348 mctx
->state_log
[cur_idx
] = next_state
;
2352 re_dfastate_t
*pstate
;
2353 unsigned int context
;
2354 re_node_set next_nodes
, *log_nodes
, *table_nodes
= NULL
;
2355 /* If (state_log[cur_idx] != 0), it implies that cur_idx is
2356 the destination of a multibyte char/collating element/
2357 back reference. Then the next state is the union set of
2358 these destinations and the results of the transition table. */
2359 pstate
= mctx
->state_log
[cur_idx
];
2360 log_nodes
= pstate
->entrance_nodes
;
2361 if (next_state
!= NULL
)
2363 table_nodes
= next_state
->entrance_nodes
;
2364 *err
= re_node_set_init_union (&next_nodes
, table_nodes
,
2366 if (BE (*err
!= REG_NOERROR
, 0))
2370 next_nodes
= *log_nodes
;
2371 /* Note: We already add the nodes of the initial state,
2372 then we don't need to add them here. */
2374 context
= re_string_context_at (&mctx
->input
,
2375 re_string_cur_idx (&mctx
->input
) - 1,
2377 next_state
= mctx
->state_log
[cur_idx
]
2378 = re_acquire_state_context (err
, dfa
, &next_nodes
, context
);
2379 /* We don't need to check errors here, since the return value of
2380 this function is next_state and ERR is already set. */
2382 if (table_nodes
!= NULL
)
2383 re_node_set_free (&next_nodes
);
2386 if (BE (dfa
->nbackref
, 0) && next_state
!= NULL
)
2388 /* Check OP_OPEN_SUBEXP in the current state in case that we use them
2389 later. We must check them here, since the back references in the
2390 next state might use them. */
2391 *err
= check_subexp_matching_top (mctx
, &next_state
->nodes
,
2393 if (BE (*err
!= REG_NOERROR
, 0))
2396 /* If the next state has back references. */
2397 if (next_state
->has_backref
)
2399 *err
= transit_state_bkref (mctx
, &next_state
->nodes
);
2400 if (BE (*err
!= REG_NOERROR
, 0))
2402 next_state
= mctx
->state_log
[cur_idx
];
2409 /* Skip bytes in the input that correspond to part of a
2410 multi-byte match, then look in the log for a state
2411 from which to restart matching. */
2414 find_recover_state (reg_errcode_t
*err
, re_match_context_t
*mctx
)
2416 re_dfastate_t
*cur_state
;
2419 int max
= mctx
->state_log_top
;
2420 int cur_str_idx
= re_string_cur_idx (&mctx
->input
);
2424 if (++cur_str_idx
> max
)
2426 re_string_skip_bytes (&mctx
->input
, 1);
2428 while (mctx
->state_log
[cur_str_idx
] == NULL
);
2430 cur_state
= merge_state_with_log (err
, mctx
, NULL
);
2432 while (*err
== REG_NOERROR
&& cur_state
== NULL
);
2436 /* Helper functions for transit_state. */
2438 /* From the node set CUR_NODES, pick up the nodes whose types are
2439 OP_OPEN_SUBEXP and which have corresponding back references in the regular
2440 expression. And register them to use them later for evaluating the
2441 correspoding back references. */
2443 static reg_errcode_t
2445 check_subexp_matching_top (re_match_context_t
*mctx
, re_node_set
*cur_nodes
,
2448 const re_dfa_t
*const dfa
= mctx
->dfa
;
2452 /* TODO: This isn't efficient.
2453 Because there might be more than one nodes whose types are
2454 OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2457 for (node_idx
= 0; node_idx
< cur_nodes
->nelem
; ++node_idx
)
2459 int node
= cur_nodes
->elems
[node_idx
];
2460 if (dfa
->nodes
[node
].type
== OP_OPEN_SUBEXP
2461 && dfa
->nodes
[node
].opr
.idx
< BITSET_WORD_BITS
2462 && (dfa
->used_bkref_map
2463 & ((bitset_word_t
) 1 << dfa
->nodes
[node
].opr
.idx
)))
2465 err
= match_ctx_add_subtop (mctx
, node
, str_idx
);
2466 if (BE (err
!= REG_NOERROR
, 0))
2474 /* Return the next state to which the current state STATE will transit by
2475 accepting the current input byte. */
2477 static re_dfastate_t
*
2478 transit_state_sb (reg_errcode_t
*err
, re_match_context_t
*mctx
,
2479 re_dfastate_t
*state
)
2481 const re_dfa_t
*const dfa
= mctx
->dfa
;
2482 re_node_set next_nodes
;
2483 re_dfastate_t
*next_state
;
2484 int node_cnt
, cur_str_idx
= re_string_cur_idx (&mctx
->input
);
2485 unsigned int context
;
2487 *err
= re_node_set_alloc (&next_nodes
, state
->nodes
.nelem
+ 1);
2488 if (BE (*err
!= REG_NOERROR
, 0))
2490 for (node_cnt
= 0; node_cnt
< state
->nodes
.nelem
; ++node_cnt
)
2492 int cur_node
= state
->nodes
.elems
[node_cnt
];
2493 if (check_node_accept (mctx
, dfa
->nodes
+ cur_node
, cur_str_idx
))
2495 *err
= re_node_set_merge (&next_nodes
,
2496 dfa
->eclosures
+ dfa
->nexts
[cur_node
]);
2497 if (BE (*err
!= REG_NOERROR
, 0))
2499 re_node_set_free (&next_nodes
);
2504 context
= re_string_context_at (&mctx
->input
, cur_str_idx
, mctx
->eflags
);
2505 next_state
= re_acquire_state_context (err
, dfa
, &next_nodes
, context
);
2506 /* We don't need to check errors here, since the return value of
2507 this function is next_state and ERR is already set. */
2509 re_node_set_free (&next_nodes
);
2510 re_string_skip_bytes (&mctx
->input
, 1);
2515 #ifdef RE_ENABLE_I18N
2516 static reg_errcode_t
2518 transit_state_mb (re_match_context_t
*mctx
, re_dfastate_t
*pstate
)
2520 const re_dfa_t
*const dfa
= mctx
->dfa
;
2524 for (i
= 0; i
< pstate
->nodes
.nelem
; ++i
)
2526 re_node_set dest_nodes
, *new_nodes
;
2527 int cur_node_idx
= pstate
->nodes
.elems
[i
];
2528 int naccepted
, dest_idx
;
2529 unsigned int context
;
2530 re_dfastate_t
*dest_state
;
2532 if (!dfa
->nodes
[cur_node_idx
].accept_mb
)
2535 if (dfa
->nodes
[cur_node_idx
].constraint
)
2537 context
= re_string_context_at (&mctx
->input
,
2538 re_string_cur_idx (&mctx
->input
),
2540 if (NOT_SATISFY_NEXT_CONSTRAINT (dfa
->nodes
[cur_node_idx
].constraint
,
2545 /* How many bytes the node can accept? */
2546 naccepted
= check_node_accept_bytes (dfa
, cur_node_idx
, &mctx
->input
,
2547 re_string_cur_idx (&mctx
->input
));
2551 /* The node can accepts `naccepted' bytes. */
2552 dest_idx
= re_string_cur_idx (&mctx
->input
) + naccepted
;
2553 mctx
->max_mb_elem_len
= ((mctx
->max_mb_elem_len
< naccepted
) ? naccepted
2554 : mctx
->max_mb_elem_len
);
2555 err
= clean_state_log_if_needed (mctx
, dest_idx
);
2556 if (BE (err
!= REG_NOERROR
, 0))
2559 assert (dfa
->nexts
[cur_node_idx
] != -1);
2561 new_nodes
= dfa
->eclosures
+ dfa
->nexts
[cur_node_idx
];
2563 dest_state
= mctx
->state_log
[dest_idx
];
2564 if (dest_state
== NULL
)
2565 dest_nodes
= *new_nodes
;
2568 err
= re_node_set_init_union (&dest_nodes
,
2569 dest_state
->entrance_nodes
, new_nodes
);
2570 if (BE (err
!= REG_NOERROR
, 0))
2573 context
= re_string_context_at (&mctx
->input
, dest_idx
- 1,
2575 mctx
->state_log
[dest_idx
]
2576 = re_acquire_state_context (&err
, dfa
, &dest_nodes
, context
);
2577 if (dest_state
!= NULL
)
2578 re_node_set_free (&dest_nodes
);
2579 if (BE (mctx
->state_log
[dest_idx
] == NULL
&& err
!= REG_NOERROR
, 0))
2584 #endif /* RE_ENABLE_I18N */
2586 static reg_errcode_t
2588 transit_state_bkref (re_match_context_t
*mctx
, const re_node_set
*nodes
)
2590 const re_dfa_t
*const dfa
= mctx
->dfa
;
2593 int cur_str_idx
= re_string_cur_idx (&mctx
->input
);
2595 for (i
= 0; i
< nodes
->nelem
; ++i
)
2597 int dest_str_idx
, prev_nelem
, bkc_idx
;
2598 int node_idx
= nodes
->elems
[i
];
2599 unsigned int context
;
2600 const re_token_t
*node
= dfa
->nodes
+ node_idx
;
2601 re_node_set
*new_dest_nodes
;
2603 /* Check whether `node' is a backreference or not. */
2604 if (node
->type
!= OP_BACK_REF
)
2607 if (node
->constraint
)
2609 context
= re_string_context_at (&mctx
->input
, cur_str_idx
,
2611 if (NOT_SATISFY_NEXT_CONSTRAINT (node
->constraint
, context
))
2615 /* `node' is a backreference.
2616 Check the substring which the substring matched. */
2617 bkc_idx
= mctx
->nbkref_ents
;
2618 err
= get_subexp (mctx
, node_idx
, cur_str_idx
);
2619 if (BE (err
!= REG_NOERROR
, 0))
2622 /* And add the epsilon closures (which is `new_dest_nodes') of
2623 the backreference to appropriate state_log. */
2625 assert (dfa
->nexts
[node_idx
] != -1);
2627 for (; bkc_idx
< mctx
->nbkref_ents
; ++bkc_idx
)
2630 re_dfastate_t
*dest_state
;
2631 struct re_backref_cache_entry
*bkref_ent
;
2632 bkref_ent
= mctx
->bkref_ents
+ bkc_idx
;
2633 if (bkref_ent
->node
!= node_idx
|| bkref_ent
->str_idx
!= cur_str_idx
)
2635 subexp_len
= bkref_ent
->subexp_to
- bkref_ent
->subexp_from
;
2636 new_dest_nodes
= (subexp_len
== 0
2637 ? dfa
->eclosures
+ dfa
->edests
[node_idx
].elems
[0]
2638 : dfa
->eclosures
+ dfa
->nexts
[node_idx
]);
2639 dest_str_idx
= (cur_str_idx
+ bkref_ent
->subexp_to
2640 - bkref_ent
->subexp_from
);
2641 context
= re_string_context_at (&mctx
->input
, dest_str_idx
- 1,
2643 dest_state
= mctx
->state_log
[dest_str_idx
];
2644 prev_nelem
= ((mctx
->state_log
[cur_str_idx
] == NULL
) ? 0
2645 : mctx
->state_log
[cur_str_idx
]->nodes
.nelem
);
2646 /* Add `new_dest_node' to state_log. */
2647 if (dest_state
== NULL
)
2649 mctx
->state_log
[dest_str_idx
]
2650 = re_acquire_state_context (&err
, dfa
, new_dest_nodes
,
2652 if (BE (mctx
->state_log
[dest_str_idx
] == NULL
2653 && err
!= REG_NOERROR
, 0))
2658 re_node_set dest_nodes
;
2659 err
= re_node_set_init_union (&dest_nodes
,
2660 dest_state
->entrance_nodes
,
2662 if (BE (err
!= REG_NOERROR
, 0))
2664 re_node_set_free (&dest_nodes
);
2667 mctx
->state_log
[dest_str_idx
]
2668 = re_acquire_state_context (&err
, dfa
, &dest_nodes
, context
);
2669 re_node_set_free (&dest_nodes
);
2670 if (BE (mctx
->state_log
[dest_str_idx
] == NULL
2671 && err
!= REG_NOERROR
, 0))
2674 /* We need to check recursively if the backreference can epsilon
2677 && mctx
->state_log
[cur_str_idx
]->nodes
.nelem
> prev_nelem
)
2679 err
= check_subexp_matching_top (mctx
, new_dest_nodes
,
2681 if (BE (err
!= REG_NOERROR
, 0))
2683 err
= transit_state_bkref (mctx
, new_dest_nodes
);
2684 if (BE (err
!= REG_NOERROR
, 0))
2694 /* Enumerate all the candidates which the backreference BKREF_NODE can match
2695 at BKREF_STR_IDX, and register them by match_ctx_add_entry().
2696 Note that we might collect inappropriate candidates here.
2697 However, the cost of checking them strictly here is too high, then we
2698 delay these checking for prune_impossible_nodes(). */
2700 static reg_errcode_t
2701 internal_function __attribute_warn_unused_result__
2702 get_subexp (re_match_context_t
*mctx
, int bkref_node
, int bkref_str_idx
)
2704 const re_dfa_t
*const dfa
= mctx
->dfa
;
2705 int subexp_num
, sub_top_idx
;
2706 const char *buf
= (const char *) re_string_get_buffer (&mctx
->input
);
2707 /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX. */
2708 int cache_idx
= search_cur_bkref_entry (mctx
, bkref_str_idx
);
2709 if (cache_idx
!= -1)
2711 const struct re_backref_cache_entry
*entry
2712 = mctx
->bkref_ents
+ cache_idx
;
2714 if (entry
->node
== bkref_node
)
2715 return REG_NOERROR
; /* We already checked it. */
2716 while (entry
++->more
);
2719 subexp_num
= dfa
->nodes
[bkref_node
].opr
.idx
;
2721 /* For each sub expression */
2722 for (sub_top_idx
= 0; sub_top_idx
< mctx
->nsub_tops
; ++sub_top_idx
)
2725 re_sub_match_top_t
*sub_top
= mctx
->sub_tops
[sub_top_idx
];
2726 re_sub_match_last_t
*sub_last
;
2727 int sub_last_idx
, sl_str
, bkref_str_off
;
2729 if (dfa
->nodes
[sub_top
->node
].opr
.idx
!= subexp_num
)
2730 continue; /* It isn't related. */
2732 sl_str
= sub_top
->str_idx
;
2733 bkref_str_off
= bkref_str_idx
;
2734 /* At first, check the last node of sub expressions we already
2736 for (sub_last_idx
= 0; sub_last_idx
< sub_top
->nlasts
; ++sub_last_idx
)
2739 sub_last
= sub_top
->lasts
[sub_last_idx
];
2740 sl_str_diff
= sub_last
->str_idx
- sl_str
;
2741 /* The matched string by the sub expression match with the substring
2742 at the back reference? */
2743 if (sl_str_diff
> 0)
2745 if (BE (bkref_str_off
+ sl_str_diff
> mctx
->input
.valid_len
, 0))
2747 /* Not enough chars for a successful match. */
2748 if (bkref_str_off
+ sl_str_diff
> mctx
->input
.len
)
2751 err
= clean_state_log_if_needed (mctx
,
2754 if (BE (err
!= REG_NOERROR
, 0))
2756 buf
= (const char *) re_string_get_buffer (&mctx
->input
);
2758 if (memcmp (buf
+ bkref_str_off
, buf
+ sl_str
, sl_str_diff
) != 0)
2759 /* We don't need to search this sub expression any more. */
2762 bkref_str_off
+= sl_str_diff
;
2763 sl_str
+= sl_str_diff
;
2764 err
= get_subexp_sub (mctx
, sub_top
, sub_last
, bkref_node
,
2767 /* Reload buf, since the preceding call might have reallocated
2769 buf
= (const char *) re_string_get_buffer (&mctx
->input
);
2771 if (err
== REG_NOMATCH
)
2773 if (BE (err
!= REG_NOERROR
, 0))
2777 if (sub_last_idx
< sub_top
->nlasts
)
2779 if (sub_last_idx
> 0)
2781 /* Then, search for the other last nodes of the sub expression. */
2782 for (; sl_str
<= bkref_str_idx
; ++sl_str
)
2784 int cls_node
, sl_str_off
;
2785 const re_node_set
*nodes
;
2786 sl_str_off
= sl_str
- sub_top
->str_idx
;
2787 /* The matched string by the sub expression match with the substring
2788 at the back reference? */
2791 if (BE (bkref_str_off
>= mctx
->input
.valid_len
, 0))
2793 /* If we are at the end of the input, we cannot match. */
2794 if (bkref_str_off
>= mctx
->input
.len
)
2797 err
= extend_buffers (mctx
, bkref_str_off
+ 1);
2798 if (BE (err
!= REG_NOERROR
, 0))
2801 buf
= (const char *) re_string_get_buffer (&mctx
->input
);
2803 if (buf
[bkref_str_off
++] != buf
[sl_str
- 1])
2804 break; /* We don't need to search this sub expression
2807 if (mctx
->state_log
[sl_str
] == NULL
)
2809 /* Does this state have a ')' of the sub expression? */
2810 nodes
= &mctx
->state_log
[sl_str
]->nodes
;
2811 cls_node
= find_subexp_node (dfa
, nodes
, subexp_num
,
2815 if (sub_top
->path
== NULL
)
2817 sub_top
->path
= calloc (sizeof (state_array_t
),
2818 sl_str
- sub_top
->str_idx
+ 1);
2819 if (sub_top
->path
== NULL
)
2822 /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node
2823 in the current context? */
2824 err
= check_arrival (mctx
, sub_top
->path
, sub_top
->node
,
2825 sub_top
->str_idx
, cls_node
, sl_str
,
2827 if (err
== REG_NOMATCH
)
2829 if (BE (err
!= REG_NOERROR
, 0))
2831 sub_last
= match_ctx_add_sublast (sub_top
, cls_node
, sl_str
);
2832 if (BE (sub_last
== NULL
, 0))
2834 err
= get_subexp_sub (mctx
, sub_top
, sub_last
, bkref_node
,
2836 if (err
== REG_NOMATCH
)
2843 /* Helper functions for get_subexp(). */
2845 /* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR.
2846 If it can arrive, register the sub expression expressed with SUB_TOP
2849 static reg_errcode_t
2851 get_subexp_sub (re_match_context_t
*mctx
, const re_sub_match_top_t
*sub_top
,
2852 re_sub_match_last_t
*sub_last
, int bkref_node
, int bkref_str
)
2856 /* Can the subexpression arrive the back reference? */
2857 err
= check_arrival (mctx
, &sub_last
->path
, sub_last
->node
,
2858 sub_last
->str_idx
, bkref_node
, bkref_str
,
2860 if (err
!= REG_NOERROR
)
2862 err
= match_ctx_add_entry (mctx
, bkref_node
, bkref_str
, sub_top
->str_idx
,
2864 if (BE (err
!= REG_NOERROR
, 0))
2866 to_idx
= bkref_str
+ sub_last
->str_idx
- sub_top
->str_idx
;
2867 return clean_state_log_if_needed (mctx
, to_idx
);
2870 /* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX.
2871 Search '(' if FL_OPEN, or search ')' otherwise.
2872 TODO: This function isn't efficient...
2873 Because there might be more than one nodes whose types are
2874 OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2880 find_subexp_node (const re_dfa_t
*dfa
, const re_node_set
*nodes
,
2881 int subexp_idx
, int type
)
2884 for (cls_idx
= 0; cls_idx
< nodes
->nelem
; ++cls_idx
)
2886 int cls_node
= nodes
->elems
[cls_idx
];
2887 const re_token_t
*node
= dfa
->nodes
+ cls_node
;
2888 if (node
->type
== type
2889 && node
->opr
.idx
== subexp_idx
)
2895 /* Check whether the node TOP_NODE at TOP_STR can arrive to the node
2896 LAST_NODE at LAST_STR. We record the path onto PATH since it will be
2898 Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise. */
2900 static reg_errcode_t
2901 internal_function __attribute_warn_unused_result__
2902 check_arrival (re_match_context_t
*mctx
, state_array_t
*path
, int top_node
,
2903 int top_str
, int last_node
, int last_str
, int type
)
2905 const re_dfa_t
*const dfa
= mctx
->dfa
;
2906 reg_errcode_t err
= REG_NOERROR
;
2907 int subexp_num
, backup_cur_idx
, str_idx
, null_cnt
;
2908 re_dfastate_t
*cur_state
= NULL
;
2909 re_node_set
*cur_nodes
, next_nodes
;
2910 re_dfastate_t
**backup_state_log
;
2911 unsigned int context
;
2913 subexp_num
= dfa
->nodes
[top_node
].opr
.idx
;
2914 /* Extend the buffer if we need. */
2915 if (BE (path
->alloc
< last_str
+ mctx
->max_mb_elem_len
+ 1, 0))
2917 re_dfastate_t
**new_array
;
2918 int old_alloc
= path
->alloc
;
2919 path
->alloc
+= last_str
+ mctx
->max_mb_elem_len
+ 1;
2920 new_array
= re_realloc (path
->array
, re_dfastate_t
*, path
->alloc
);
2921 if (BE (new_array
== NULL
, 0))
2923 path
->alloc
= old_alloc
;
2926 path
->array
= new_array
;
2927 memset (new_array
+ old_alloc
, '\0',
2928 sizeof (re_dfastate_t
*) * (path
->alloc
- old_alloc
));
2931 str_idx
= path
->next_idx
?: top_str
;
2933 /* Temporary modify MCTX. */
2934 backup_state_log
= mctx
->state_log
;
2935 backup_cur_idx
= mctx
->input
.cur_idx
;
2936 mctx
->state_log
= path
->array
;
2937 mctx
->input
.cur_idx
= str_idx
;
2939 /* Setup initial node set. */
2940 context
= re_string_context_at (&mctx
->input
, str_idx
- 1, mctx
->eflags
);
2941 if (str_idx
== top_str
)
2943 err
= re_node_set_init_1 (&next_nodes
, top_node
);
2944 if (BE (err
!= REG_NOERROR
, 0))
2946 err
= check_arrival_expand_ecl (dfa
, &next_nodes
, subexp_num
, type
);
2947 if (BE (err
!= REG_NOERROR
, 0))
2949 re_node_set_free (&next_nodes
);
2955 cur_state
= mctx
->state_log
[str_idx
];
2956 if (cur_state
&& cur_state
->has_backref
)
2958 err
= re_node_set_init_copy (&next_nodes
, &cur_state
->nodes
);
2959 if (BE (err
!= REG_NOERROR
, 0))
2963 re_node_set_init_empty (&next_nodes
);
2965 if (str_idx
== top_str
|| (cur_state
&& cur_state
->has_backref
))
2967 if (next_nodes
.nelem
)
2969 err
= expand_bkref_cache (mctx
, &next_nodes
, str_idx
,
2971 if (BE (err
!= REG_NOERROR
, 0))
2973 re_node_set_free (&next_nodes
);
2977 cur_state
= re_acquire_state_context (&err
, dfa
, &next_nodes
, context
);
2978 if (BE (cur_state
== NULL
&& err
!= REG_NOERROR
, 0))
2980 re_node_set_free (&next_nodes
);
2983 mctx
->state_log
[str_idx
] = cur_state
;
2986 for (null_cnt
= 0; str_idx
< last_str
&& null_cnt
<= mctx
->max_mb_elem_len
;)
2988 re_node_set_empty (&next_nodes
);
2989 if (mctx
->state_log
[str_idx
+ 1])
2991 err
= re_node_set_merge (&next_nodes
,
2992 &mctx
->state_log
[str_idx
+ 1]->nodes
);
2993 if (BE (err
!= REG_NOERROR
, 0))
2995 re_node_set_free (&next_nodes
);
3001 err
= check_arrival_add_next_nodes (mctx
, str_idx
,
3002 &cur_state
->non_eps_nodes
,
3004 if (BE (err
!= REG_NOERROR
, 0))
3006 re_node_set_free (&next_nodes
);
3011 if (next_nodes
.nelem
)
3013 err
= check_arrival_expand_ecl (dfa
, &next_nodes
, subexp_num
, type
);
3014 if (BE (err
!= REG_NOERROR
, 0))
3016 re_node_set_free (&next_nodes
);
3019 err
= expand_bkref_cache (mctx
, &next_nodes
, str_idx
,
3021 if (BE (err
!= REG_NOERROR
, 0))
3023 re_node_set_free (&next_nodes
);
3027 context
= re_string_context_at (&mctx
->input
, str_idx
- 1, mctx
->eflags
);
3028 cur_state
= re_acquire_state_context (&err
, dfa
, &next_nodes
, context
);
3029 if (BE (cur_state
== NULL
&& err
!= REG_NOERROR
, 0))
3031 re_node_set_free (&next_nodes
);
3034 mctx
->state_log
[str_idx
] = cur_state
;
3035 null_cnt
= cur_state
== NULL
? null_cnt
+ 1 : 0;
3037 re_node_set_free (&next_nodes
);
3038 cur_nodes
= (mctx
->state_log
[last_str
] == NULL
? NULL
3039 : &mctx
->state_log
[last_str
]->nodes
);
3040 path
->next_idx
= str_idx
;
3043 mctx
->state_log
= backup_state_log
;
3044 mctx
->input
.cur_idx
= backup_cur_idx
;
3046 /* Then check the current node set has the node LAST_NODE. */
3047 if (cur_nodes
!= NULL
&& re_node_set_contains (cur_nodes
, last_node
))
3053 /* Helper functions for check_arrival. */
3055 /* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them
3057 TODO: This function is similar to the functions transit_state*(),
3058 however this function has many additional works.
3059 Can't we unify them? */
3061 static reg_errcode_t
3062 internal_function __attribute_warn_unused_result__
3063 check_arrival_add_next_nodes (re_match_context_t
*mctx
, int str_idx
,
3064 re_node_set
*cur_nodes
, re_node_set
*next_nodes
)
3066 const re_dfa_t
*const dfa
= mctx
->dfa
;
3069 reg_errcode_t err
= REG_NOERROR
;
3070 re_node_set union_set
;
3071 re_node_set_init_empty (&union_set
);
3072 for (cur_idx
= 0; cur_idx
< cur_nodes
->nelem
; ++cur_idx
)
3075 int cur_node
= cur_nodes
->elems
[cur_idx
];
3077 re_token_type_t type
= dfa
->nodes
[cur_node
].type
;
3078 assert (!IS_EPSILON_NODE (type
));
3080 #ifdef RE_ENABLE_I18N
3081 /* If the node may accept `multi byte'. */
3082 if (dfa
->nodes
[cur_node
].accept_mb
)
3084 naccepted
= check_node_accept_bytes (dfa
, cur_node
, &mctx
->input
,
3088 re_dfastate_t
*dest_state
;
3089 int next_node
= dfa
->nexts
[cur_node
];
3090 int next_idx
= str_idx
+ naccepted
;
3091 dest_state
= mctx
->state_log
[next_idx
];
3092 re_node_set_empty (&union_set
);
3095 err
= re_node_set_merge (&union_set
, &dest_state
->nodes
);
3096 if (BE (err
!= REG_NOERROR
, 0))
3098 re_node_set_free (&union_set
);
3102 result
= re_node_set_insert (&union_set
, next_node
);
3103 if (BE (result
< 0, 0))
3105 re_node_set_free (&union_set
);
3108 mctx
->state_log
[next_idx
] = re_acquire_state (&err
, dfa
,
3110 if (BE (mctx
->state_log
[next_idx
] == NULL
3111 && err
!= REG_NOERROR
, 0))
3113 re_node_set_free (&union_set
);
3118 #endif /* RE_ENABLE_I18N */
3120 || check_node_accept (mctx
, dfa
->nodes
+ cur_node
, str_idx
))
3122 result
= re_node_set_insert (next_nodes
, dfa
->nexts
[cur_node
]);
3123 if (BE (result
< 0, 0))
3125 re_node_set_free (&union_set
);
3130 re_node_set_free (&union_set
);
3134 /* For all the nodes in CUR_NODES, add the epsilon closures of them to
3135 CUR_NODES, however exclude the nodes which are:
3136 - inside the sub expression whose number is EX_SUBEXP, if FL_OPEN.
3137 - out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN.
3140 static reg_errcode_t
3142 check_arrival_expand_ecl (const re_dfa_t
*dfa
, re_node_set
*cur_nodes
,
3143 int ex_subexp
, int type
)
3146 int idx
, outside_node
;
3147 re_node_set new_nodes
;
3149 assert (cur_nodes
->nelem
);
3151 err
= re_node_set_alloc (&new_nodes
, cur_nodes
->nelem
);
3152 if (BE (err
!= REG_NOERROR
, 0))
3154 /* Create a new node set NEW_NODES with the nodes which are epsilon
3155 closures of the node in CUR_NODES. */
3157 for (idx
= 0; idx
< cur_nodes
->nelem
; ++idx
)
3159 int cur_node
= cur_nodes
->elems
[idx
];
3160 const re_node_set
*eclosure
= dfa
->eclosures
+ cur_node
;
3161 outside_node
= find_subexp_node (dfa
, eclosure
, ex_subexp
, type
);
3162 if (outside_node
== -1)
3164 /* There are no problematic nodes, just merge them. */
3165 err
= re_node_set_merge (&new_nodes
, eclosure
);
3166 if (BE (err
!= REG_NOERROR
, 0))
3168 re_node_set_free (&new_nodes
);
3174 /* There are problematic nodes, re-calculate incrementally. */
3175 err
= check_arrival_expand_ecl_sub (dfa
, &new_nodes
, cur_node
,
3177 if (BE (err
!= REG_NOERROR
, 0))
3179 re_node_set_free (&new_nodes
);
3184 re_node_set_free (cur_nodes
);
3185 *cur_nodes
= new_nodes
;
3189 /* Helper function for check_arrival_expand_ecl.
3190 Check incrementally the epsilon closure of TARGET, and if it isn't
3191 problematic append it to DST_NODES. */
3193 static reg_errcode_t
3194 internal_function __attribute_warn_unused_result__
3195 check_arrival_expand_ecl_sub (const re_dfa_t
*dfa
, re_node_set
*dst_nodes
,
3196 int target
, int ex_subexp
, int type
)
3199 for (cur_node
= target
; !re_node_set_contains (dst_nodes
, cur_node
);)
3203 if (dfa
->nodes
[cur_node
].type
== type
3204 && dfa
->nodes
[cur_node
].opr
.idx
== ex_subexp
)
3206 if (type
== OP_CLOSE_SUBEXP
)
3208 err
= re_node_set_insert (dst_nodes
, cur_node
);
3209 if (BE (err
== -1, 0))
3214 err
= re_node_set_insert (dst_nodes
, cur_node
);
3215 if (BE (err
== -1, 0))
3217 if (dfa
->edests
[cur_node
].nelem
== 0)
3219 if (dfa
->edests
[cur_node
].nelem
== 2)
3221 err
= check_arrival_expand_ecl_sub (dfa
, dst_nodes
,
3222 dfa
->edests
[cur_node
].elems
[1],
3224 if (BE (err
!= REG_NOERROR
, 0))
3227 cur_node
= dfa
->edests
[cur_node
].elems
[0];
3233 /* For all the back references in the current state, calculate the
3234 destination of the back references by the appropriate entry
3235 in MCTX->BKREF_ENTS. */
3237 static reg_errcode_t
3238 internal_function __attribute_warn_unused_result__
3239 expand_bkref_cache (re_match_context_t
*mctx
, re_node_set
*cur_nodes
,
3240 int cur_str
, int subexp_num
, int type
)
3242 const re_dfa_t
*const dfa
= mctx
->dfa
;
3244 int cache_idx_start
= search_cur_bkref_entry (mctx
, cur_str
);
3245 struct re_backref_cache_entry
*ent
;
3247 if (cache_idx_start
== -1)
3251 ent
= mctx
->bkref_ents
+ cache_idx_start
;
3254 int to_idx
, next_node
;
3256 /* Is this entry ENT is appropriate? */
3257 if (!re_node_set_contains (cur_nodes
, ent
->node
))
3260 to_idx
= cur_str
+ ent
->subexp_to
- ent
->subexp_from
;
3261 /* Calculate the destination of the back reference, and append it
3262 to MCTX->STATE_LOG. */
3263 if (to_idx
== cur_str
)
3265 /* The backreference did epsilon transit, we must re-check all the
3266 node in the current state. */
3267 re_node_set new_dests
;
3268 reg_errcode_t err2
, err3
;
3269 next_node
= dfa
->edests
[ent
->node
].elems
[0];
3270 if (re_node_set_contains (cur_nodes
, next_node
))
3272 err
= re_node_set_init_1 (&new_dests
, next_node
);
3273 err2
= check_arrival_expand_ecl (dfa
, &new_dests
, subexp_num
, type
);
3274 err3
= re_node_set_merge (cur_nodes
, &new_dests
);
3275 re_node_set_free (&new_dests
);
3276 if (BE (err
!= REG_NOERROR
|| err2
!= REG_NOERROR
3277 || err3
!= REG_NOERROR
, 0))
3279 err
= (err
!= REG_NOERROR
? err
3280 : (err2
!= REG_NOERROR
? err2
: err3
));
3283 /* TODO: It is still inefficient... */
3288 re_node_set union_set
;
3289 next_node
= dfa
->nexts
[ent
->node
];
3290 if (mctx
->state_log
[to_idx
])
3293 if (re_node_set_contains (&mctx
->state_log
[to_idx
]->nodes
,
3296 err
= re_node_set_init_copy (&union_set
,
3297 &mctx
->state_log
[to_idx
]->nodes
);
3298 ret
= re_node_set_insert (&union_set
, next_node
);
3299 if (BE (err
!= REG_NOERROR
|| ret
< 0, 0))
3301 re_node_set_free (&union_set
);
3302 err
= err
!= REG_NOERROR
? err
: REG_ESPACE
;
3308 err
= re_node_set_init_1 (&union_set
, next_node
);
3309 if (BE (err
!= REG_NOERROR
, 0))
3312 mctx
->state_log
[to_idx
] = re_acquire_state (&err
, dfa
, &union_set
);
3313 re_node_set_free (&union_set
);
3314 if (BE (mctx
->state_log
[to_idx
] == NULL
3315 && err
!= REG_NOERROR
, 0))
3319 while (ent
++->more
);
3323 /* Build transition table for the state.
3324 Return 1 if succeeded, otherwise return NULL. */
3328 build_trtable (const re_dfa_t
*dfa
, re_dfastate_t
*state
)
3331 int i
, j
, ch
, need_word_trtable
= 0;
3332 bitset_word_t elem
, mask
;
3333 bool dests_node_malloced
= false;
3334 bool dest_states_malloced
= false;
3335 int ndests
; /* Number of the destination states from `state'. */
3336 re_dfastate_t
**trtable
;
3337 re_dfastate_t
**dest_states
= NULL
, **dest_states_word
, **dest_states_nl
;
3338 re_node_set follows
, *dests_node
;
3340 bitset_t acceptable
;
3344 re_node_set dests_node
[SBC_MAX
];
3345 bitset_t dests_ch
[SBC_MAX
];
3348 /* We build DFA states which corresponds to the destination nodes
3349 from `state'. `dests_node[i]' represents the nodes which i-th
3350 destination state contains, and `dests_ch[i]' represents the
3351 characters which i-th destination state accepts. */
3352 if (__libc_use_alloca (sizeof (struct dests_alloc
)))
3353 dests_alloc
= (struct dests_alloc
*) alloca (sizeof (struct dests_alloc
));
3356 dests_alloc
= re_malloc (struct dests_alloc
, 1);
3357 if (BE (dests_alloc
== NULL
, 0))
3359 dests_node_malloced
= true;
3361 dests_node
= dests_alloc
->dests_node
;
3362 dests_ch
= dests_alloc
->dests_ch
;
3364 /* Initialize transiton table. */
3365 state
->word_trtable
= state
->trtable
= NULL
;
3367 /* At first, group all nodes belonging to `state' into several
3369 ndests
= group_nodes_into_DFAstates (dfa
, state
, dests_node
, dests_ch
);
3370 if (BE (ndests
<= 0, 0))
3372 if (dests_node_malloced
)
3374 /* Return 0 in case of an error, 1 otherwise. */
3377 state
->trtable
= (re_dfastate_t
**)
3378 calloc (sizeof (re_dfastate_t
*), SBC_MAX
);
3379 if (BE (state
->trtable
== NULL
, 0))
3386 err
= re_node_set_alloc (&follows
, ndests
+ 1);
3387 if (BE (err
!= REG_NOERROR
, 0))
3390 /* Avoid arithmetic overflow in size calculation. */
3391 if (BE ((((SIZE_MAX
- (sizeof (re_node_set
) + sizeof (bitset_t
)) * SBC_MAX
)
3392 / (3 * sizeof (re_dfastate_t
*)))
3397 if (__libc_use_alloca ((sizeof (re_node_set
) + sizeof (bitset_t
)) * SBC_MAX
3398 + ndests
* 3 * sizeof (re_dfastate_t
*)))
3399 dest_states
= (re_dfastate_t
**)
3400 alloca (ndests
* 3 * sizeof (re_dfastate_t
*));
3403 dest_states
= (re_dfastate_t
**)
3404 malloc (ndests
* 3 * sizeof (re_dfastate_t
*));
3405 if (BE (dest_states
== NULL
, 0))
3408 if (dest_states_malloced
)
3410 re_node_set_free (&follows
);
3411 for (i
= 0; i
< ndests
; ++i
)
3412 re_node_set_free (dests_node
+ i
);
3413 if (dests_node_malloced
)
3417 dest_states_malloced
= true;
3419 dest_states_word
= dest_states
+ ndests
;
3420 dest_states_nl
= dest_states_word
+ ndests
;
3421 bitset_empty (acceptable
);
3423 /* Then build the states for all destinations. */
3424 for (i
= 0; i
< ndests
; ++i
)
3427 re_node_set_empty (&follows
);
3428 /* Merge the follows of this destination states. */
3429 for (j
= 0; j
< dests_node
[i
].nelem
; ++j
)
3431 next_node
= dfa
->nexts
[dests_node
[i
].elems
[j
]];
3432 if (next_node
!= -1)
3434 err
= re_node_set_merge (&follows
, dfa
->eclosures
+ next_node
);
3435 if (BE (err
!= REG_NOERROR
, 0))
3439 dest_states
[i
] = re_acquire_state_context (&err
, dfa
, &follows
, 0);
3440 if (BE (dest_states
[i
] == NULL
&& err
!= REG_NOERROR
, 0))
3442 /* If the new state has context constraint,
3443 build appropriate states for these contexts. */
3444 if (dest_states
[i
]->has_constraint
)
3446 dest_states_word
[i
] = re_acquire_state_context (&err
, dfa
, &follows
,
3448 if (BE (dest_states_word
[i
] == NULL
&& err
!= REG_NOERROR
, 0))
3451 if (dest_states
[i
] != dest_states_word
[i
] && dfa
->mb_cur_max
> 1)
3452 need_word_trtable
= 1;
3454 dest_states_nl
[i
] = re_acquire_state_context (&err
, dfa
, &follows
,
3456 if (BE (dest_states_nl
[i
] == NULL
&& err
!= REG_NOERROR
, 0))
3461 dest_states_word
[i
] = dest_states
[i
];
3462 dest_states_nl
[i
] = dest_states
[i
];
3464 bitset_merge (acceptable
, dests_ch
[i
]);
3467 if (!BE (need_word_trtable
, 0))
3469 /* We don't care about whether the following character is a word
3470 character, or we are in a single-byte character set so we can
3471 discern by looking at the character code: allocate a
3472 256-entry transition table. */
3473 trtable
= state
->trtable
=
3474 (re_dfastate_t
**) calloc (sizeof (re_dfastate_t
*), SBC_MAX
);
3475 if (BE (trtable
== NULL
, 0))
3478 /* For all characters ch...: */
3479 for (i
= 0; i
< BITSET_WORDS
; ++i
)
3480 for (ch
= i
* BITSET_WORD_BITS
, elem
= acceptable
[i
], mask
= 1;
3482 mask
<<= 1, elem
>>= 1, ++ch
)
3483 if (BE (elem
& 1, 0))
3485 /* There must be exactly one destination which accepts
3486 character ch. See group_nodes_into_DFAstates. */
3487 for (j
= 0; (dests_ch
[j
][i
] & mask
) == 0; ++j
)
3490 /* j-th destination accepts the word character ch. */
3491 if (dfa
->word_char
[i
] & mask
)
3492 trtable
[ch
] = dest_states_word
[j
];
3494 trtable
[ch
] = dest_states
[j
];
3499 /* We care about whether the following character is a word
3500 character, and we are in a multi-byte character set: discern
3501 by looking at the character code: build two 256-entry
3502 transition tables, one starting at trtable[0] and one
3503 starting at trtable[SBC_MAX]. */
3504 trtable
= state
->word_trtable
=
3505 (re_dfastate_t
**) calloc (sizeof (re_dfastate_t
*), 2 * SBC_MAX
);
3506 if (BE (trtable
== NULL
, 0))
3509 /* For all characters ch...: */
3510 for (i
= 0; i
< BITSET_WORDS
; ++i
)
3511 for (ch
= i
* BITSET_WORD_BITS
, elem
= acceptable
[i
], mask
= 1;
3513 mask
<<= 1, elem
>>= 1, ++ch
)
3514 if (BE (elem
& 1, 0))
3516 /* There must be exactly one destination which accepts
3517 character ch. See group_nodes_into_DFAstates. */
3518 for (j
= 0; (dests_ch
[j
][i
] & mask
) == 0; ++j
)
3521 /* j-th destination accepts the word character ch. */
3522 trtable
[ch
] = dest_states
[j
];
3523 trtable
[ch
+ SBC_MAX
] = dest_states_word
[j
];
3528 if (bitset_contain (acceptable
, NEWLINE_CHAR
))
3530 /* The current state accepts newline character. */
3531 for (j
= 0; j
< ndests
; ++j
)
3532 if (bitset_contain (dests_ch
[j
], NEWLINE_CHAR
))
3534 /* k-th destination accepts newline character. */
3535 trtable
[NEWLINE_CHAR
] = dest_states_nl
[j
];
3536 if (need_word_trtable
)
3537 trtable
[NEWLINE_CHAR
+ SBC_MAX
] = dest_states_nl
[j
];
3538 /* There must be only one destination which accepts
3539 newline. See group_nodes_into_DFAstates. */
3544 if (dest_states_malloced
)
3547 re_node_set_free (&follows
);
3548 for (i
= 0; i
< ndests
; ++i
)
3549 re_node_set_free (dests_node
+ i
);
3551 if (dests_node_malloced
)
3557 /* Group all nodes belonging to STATE into several destinations.
3558 Then for all destinations, set the nodes belonging to the destination
3559 to DESTS_NODE[i] and set the characters accepted by the destination
3560 to DEST_CH[i]. This function return the number of destinations. */
3564 group_nodes_into_DFAstates (const re_dfa_t
*dfa
, const re_dfastate_t
*state
,
3565 re_node_set
*dests_node
, bitset_t
*dests_ch
)
3570 int ndests
; /* Number of the destinations from `state'. */
3571 bitset_t accepts
; /* Characters a node can accept. */
3572 const re_node_set
*cur_nodes
= &state
->nodes
;
3573 bitset_empty (accepts
);
3576 /* For all the nodes belonging to `state', */
3577 for (i
= 0; i
< cur_nodes
->nelem
; ++i
)
3579 re_token_t
*node
= &dfa
->nodes
[cur_nodes
->elems
[i
]];
3580 re_token_type_t type
= node
->type
;
3581 unsigned int constraint
= node
->constraint
;
3583 /* Enumerate all single byte character this node can accept. */
3584 if (type
== CHARACTER
)
3585 bitset_set (accepts
, node
->opr
.c
);
3586 else if (type
== SIMPLE_BRACKET
)
3588 bitset_merge (accepts
, node
->opr
.sbcset
);
3590 else if (type
== OP_PERIOD
)
3592 #ifdef RE_ENABLE_I18N
3593 if (dfa
->mb_cur_max
> 1)
3594 bitset_merge (accepts
, dfa
->sb_char
);
3597 bitset_set_all (accepts
);
3598 if (!(dfa
->syntax
& RE_DOT_NEWLINE
))
3599 bitset_clear (accepts
, '\n');
3600 if (dfa
->syntax
& RE_DOT_NOT_NULL
)
3601 bitset_clear (accepts
, '\0');
3603 #ifdef RE_ENABLE_I18N
3604 else if (type
== OP_UTF8_PERIOD
)
3606 memset (accepts
, '\xff', sizeof (bitset_t
) / 2);
3607 if (!(dfa
->syntax
& RE_DOT_NEWLINE
))
3608 bitset_clear (accepts
, '\n');
3609 if (dfa
->syntax
& RE_DOT_NOT_NULL
)
3610 bitset_clear (accepts
, '\0');
3616 /* Check the `accepts' and sift the characters which are not
3617 match it the context. */
3620 if (constraint
& NEXT_NEWLINE_CONSTRAINT
)
3622 bool accepts_newline
= bitset_contain (accepts
, NEWLINE_CHAR
);
3623 bitset_empty (accepts
);
3624 if (accepts_newline
)
3625 bitset_set (accepts
, NEWLINE_CHAR
);
3629 if (constraint
& NEXT_ENDBUF_CONSTRAINT
)
3631 bitset_empty (accepts
);
3635 if (constraint
& NEXT_WORD_CONSTRAINT
)
3637 bitset_word_t any_set
= 0;
3638 if (type
== CHARACTER
&& !node
->word_char
)
3640 bitset_empty (accepts
);
3643 #ifdef RE_ENABLE_I18N
3644 if (dfa
->mb_cur_max
> 1)
3645 for (j
= 0; j
< BITSET_WORDS
; ++j
)
3646 any_set
|= (accepts
[j
] &= (dfa
->word_char
[j
] | ~dfa
->sb_char
[j
]));
3649 for (j
= 0; j
< BITSET_WORDS
; ++j
)
3650 any_set
|= (accepts
[j
] &= dfa
->word_char
[j
]);
3654 if (constraint
& NEXT_NOTWORD_CONSTRAINT
)
3656 bitset_word_t any_set
= 0;
3657 if (type
== CHARACTER
&& node
->word_char
)
3659 bitset_empty (accepts
);
3662 #ifdef RE_ENABLE_I18N
3663 if (dfa
->mb_cur_max
> 1)
3664 for (j
= 0; j
< BITSET_WORDS
; ++j
)
3665 any_set
|= (accepts
[j
] &= ~(dfa
->word_char
[j
] & dfa
->sb_char
[j
]));
3668 for (j
= 0; j
< BITSET_WORDS
; ++j
)
3669 any_set
|= (accepts
[j
] &= ~dfa
->word_char
[j
]);
3675 /* Then divide `accepts' into DFA states, or create a new
3676 state. Above, we make sure that accepts is not empty. */
3677 for (j
= 0; j
< ndests
; ++j
)
3679 bitset_t intersec
; /* Intersection sets, see below. */
3681 /* Flags, see below. */
3682 bitset_word_t has_intersec
, not_subset
, not_consumed
;
3684 /* Optimization, skip if this state doesn't accept the character. */
3685 if (type
== CHARACTER
&& !bitset_contain (dests_ch
[j
], node
->opr
.c
))
3688 /* Enumerate the intersection set of this state and `accepts'. */
3690 for (k
= 0; k
< BITSET_WORDS
; ++k
)
3691 has_intersec
|= intersec
[k
] = accepts
[k
] & dests_ch
[j
][k
];
3692 /* And skip if the intersection set is empty. */
3696 /* Then check if this state is a subset of `accepts'. */
3697 not_subset
= not_consumed
= 0;
3698 for (k
= 0; k
< BITSET_WORDS
; ++k
)
3700 not_subset
|= remains
[k
] = ~accepts
[k
] & dests_ch
[j
][k
];
3701 not_consumed
|= accepts
[k
] = accepts
[k
] & ~dests_ch
[j
][k
];
3704 /* If this state isn't a subset of `accepts', create a
3705 new group state, which has the `remains'. */
3708 bitset_copy (dests_ch
[ndests
], remains
);
3709 bitset_copy (dests_ch
[j
], intersec
);
3710 err
= re_node_set_init_copy (dests_node
+ ndests
, &dests_node
[j
]);
3711 if (BE (err
!= REG_NOERROR
, 0))
3716 /* Put the position in the current group. */
3717 result
= re_node_set_insert (&dests_node
[j
], cur_nodes
->elems
[i
]);
3718 if (BE (result
< 0, 0))
3721 /* If all characters are consumed, go to next node. */
3725 /* Some characters remain, create a new group. */
3728 bitset_copy (dests_ch
[ndests
], accepts
);
3729 err
= re_node_set_init_1 (dests_node
+ ndests
, cur_nodes
->elems
[i
]);
3730 if (BE (err
!= REG_NOERROR
, 0))
3733 bitset_empty (accepts
);
3738 for (j
= 0; j
< ndests
; ++j
)
3739 re_node_set_free (dests_node
+ j
);
3743 #ifdef RE_ENABLE_I18N
3744 /* Check how many bytes the node `dfa->nodes[node_idx]' accepts.
3745 Return the number of the bytes the node accepts.
3746 STR_IDX is the current index of the input string.
3748 This function handles the nodes which can accept one character, or
3749 one collating element like '.', '[a-z]', opposite to the other nodes
3750 can only accept one byte. */
3753 # include <locale/weight.h>
3758 check_node_accept_bytes (const re_dfa_t
*dfa
, int node_idx
,
3759 const re_string_t
*input
, int str_idx
)
3761 const re_token_t
*node
= dfa
->nodes
+ node_idx
;
3762 int char_len
, elem_len
;
3765 if (BE (node
->type
== OP_UTF8_PERIOD
, 0))
3767 unsigned char c
= re_string_byte_at (input
, str_idx
), d
;
3768 if (BE (c
< 0xc2, 1))
3771 if (str_idx
+ 2 > input
->len
)
3774 d
= re_string_byte_at (input
, str_idx
+ 1);
3776 return (d
< 0x80 || d
> 0xbf) ? 0 : 2;
3780 if (c
== 0xe0 && d
< 0xa0)
3786 if (c
== 0xf0 && d
< 0x90)
3792 if (c
== 0xf8 && d
< 0x88)
3798 if (c
== 0xfc && d
< 0x84)
3804 if (str_idx
+ char_len
> input
->len
)
3807 for (i
= 1; i
< char_len
; ++i
)
3809 d
= re_string_byte_at (input
, str_idx
+ i
);
3810 if (d
< 0x80 || d
> 0xbf)
3816 char_len
= re_string_char_size_at (input
, str_idx
);
3817 if (node
->type
== OP_PERIOD
)
3821 /* FIXME: I don't think this if is needed, as both '\n'
3822 and '\0' are char_len == 1. */
3823 /* '.' accepts any one character except the following two cases. */
3824 if ((!(dfa
->syntax
& RE_DOT_NEWLINE
) &&
3825 re_string_byte_at (input
, str_idx
) == '\n') ||
3826 ((dfa
->syntax
& RE_DOT_NOT_NULL
) &&
3827 re_string_byte_at (input
, str_idx
) == '\0'))
3832 elem_len
= re_string_elem_size_at (input
, str_idx
);
3833 if ((elem_len
<= 1 && char_len
<= 1) || char_len
== 0)
3836 if (node
->type
== COMPLEX_BRACKET
)
3838 const re_charset_t
*cset
= node
->opr
.mbcset
;
3840 const unsigned char *pin
3841 = ((const unsigned char *) re_string_get_buffer (input
) + str_idx
);
3846 wchar_t wc
= ((cset
->nranges
|| cset
->nchar_classes
|| cset
->nmbchars
)
3847 ? re_string_wchar_at (input
, str_idx
) : 0);
3849 /* match with multibyte character? */
3850 for (i
= 0; i
< cset
->nmbchars
; ++i
)
3851 if (wc
== cset
->mbchars
[i
])
3853 match_len
= char_len
;
3854 goto check_node_accept_bytes_match
;
3856 /* match with character_class? */
3857 for (i
= 0; i
< cset
->nchar_classes
; ++i
)
3859 wctype_t wt
= cset
->char_classes
[i
];
3860 if (__iswctype (wc
, wt
))
3862 match_len
= char_len
;
3863 goto check_node_accept_bytes_match
;
3868 nrules
= _NL_CURRENT_WORD (LC_COLLATE
, _NL_COLLATE_NRULES
);
3871 unsigned int in_collseq
= 0;
3872 const int32_t *table
, *indirect
;
3873 const unsigned char *weights
, *extra
;
3874 const char *collseqwc
;
3876 /* match with collating_symbol? */
3877 if (cset
->ncoll_syms
)
3878 extra
= (const unsigned char *)
3879 _NL_CURRENT (LC_COLLATE
, _NL_COLLATE_SYMB_EXTRAMB
);
3880 for (i
= 0; i
< cset
->ncoll_syms
; ++i
)
3882 const unsigned char *coll_sym
= extra
+ cset
->coll_syms
[i
];
3883 /* Compare the length of input collating element and
3884 the length of current collating element. */
3885 if (*coll_sym
!= elem_len
)
3887 /* Compare each bytes. */
3888 for (j
= 0; j
< *coll_sym
; j
++)
3889 if (pin
[j
] != coll_sym
[1 + j
])
3893 /* Match if every bytes is equal. */
3895 goto check_node_accept_bytes_match
;
3901 if (elem_len
<= char_len
)
3903 collseqwc
= _NL_CURRENT (LC_COLLATE
, _NL_COLLATE_COLLSEQWC
);
3904 in_collseq
= __collseq_table_lookup (collseqwc
, wc
);
3907 in_collseq
= find_collation_sequence_value (pin
, elem_len
);
3909 /* match with range expression? */
3910 for (i
= 0; i
< cset
->nranges
; ++i
)
3911 if (cset
->range_starts
[i
] <= in_collseq
3912 && in_collseq
<= cset
->range_ends
[i
])
3914 match_len
= elem_len
;
3915 goto check_node_accept_bytes_match
;
3918 /* match with equivalence_class? */
3919 if (cset
->nequiv_classes
)
3921 const unsigned char *cp
= pin
;
3922 table
= (const int32_t *)
3923 _NL_CURRENT (LC_COLLATE
, _NL_COLLATE_TABLEMB
);
3924 weights
= (const unsigned char *)
3925 _NL_CURRENT (LC_COLLATE
, _NL_COLLATE_WEIGHTMB
);
3926 extra
= (const unsigned char *)
3927 _NL_CURRENT (LC_COLLATE
, _NL_COLLATE_EXTRAMB
);
3928 indirect
= (const int32_t *)
3929 _NL_CURRENT (LC_COLLATE
, _NL_COLLATE_INDIRECTMB
);
3930 int32_t idx
= findidx (table
, indirect
, extra
, &cp
, elem_len
);
3932 for (i
= 0; i
< cset
->nequiv_classes
; ++i
)
3934 int32_t equiv_class_idx
= cset
->equiv_classes
[i
];
3935 size_t weight_len
= weights
[idx
& 0xffffff];
3936 if (weight_len
== weights
[equiv_class_idx
& 0xffffff]
3937 && (idx
>> 24) == (equiv_class_idx
>> 24))
3942 equiv_class_idx
&= 0xffffff;
3944 while (cnt
<= weight_len
3945 && (weights
[equiv_class_idx
+ 1 + cnt
]
3946 == weights
[idx
+ 1 + cnt
]))
3948 if (cnt
> weight_len
)
3950 match_len
= elem_len
;
3951 goto check_node_accept_bytes_match
;
3960 /* match with range expression? */
3962 wchar_t cmp_buf
[] = {L
'\0', L
'\0', wc
, L
'\0', L
'\0', L
'\0'};
3964 wchar_t cmp_buf
[] = {L
'\0', L
'\0', L
'\0', L
'\0', L
'\0', L
'\0'};
3967 for (i
= 0; i
< cset
->nranges
; ++i
)
3969 cmp_buf
[0] = cset
->range_starts
[i
];
3970 cmp_buf
[4] = cset
->range_ends
[i
];
3971 if (wcscoll (cmp_buf
, cmp_buf
+ 2) <= 0
3972 && wcscoll (cmp_buf
+ 2, cmp_buf
+ 4) <= 0)
3974 match_len
= char_len
;
3975 goto check_node_accept_bytes_match
;
3979 check_node_accept_bytes_match
:
3980 if (!cset
->non_match
)
3987 return (elem_len
> char_len
) ? elem_len
: char_len
;
3996 find_collation_sequence_value (const unsigned char *mbs
, size_t mbs_len
)
3998 uint32_t nrules
= _NL_CURRENT_WORD (LC_COLLATE
, _NL_COLLATE_NRULES
);
4003 /* No valid character. Match it as a single byte character. */
4004 const unsigned char *collseq
= (const unsigned char *)
4005 _NL_CURRENT (LC_COLLATE
, _NL_COLLATE_COLLSEQMB
);
4006 return collseq
[mbs
[0]];
4013 const unsigned char *extra
= (const unsigned char *)
4014 _NL_CURRENT (LC_COLLATE
, _NL_COLLATE_SYMB_EXTRAMB
);
4015 int32_t extrasize
= (const unsigned char *)
4016 _NL_CURRENT (LC_COLLATE
, _NL_COLLATE_SYMB_EXTRAMB
+ 1) - extra
;
4018 for (idx
= 0; idx
< extrasize
;)
4020 int mbs_cnt
, found
= 0;
4021 int32_t elem_mbs_len
;
4022 /* Skip the name of collating element name. */
4023 idx
= idx
+ extra
[idx
] + 1;
4024 elem_mbs_len
= extra
[idx
++];
4025 if (mbs_len
== elem_mbs_len
)
4027 for (mbs_cnt
= 0; mbs_cnt
< elem_mbs_len
; ++mbs_cnt
)
4028 if (extra
[idx
+ mbs_cnt
] != mbs
[mbs_cnt
])
4030 if (mbs_cnt
== elem_mbs_len
)
4031 /* Found the entry. */
4034 /* Skip the byte sequence of the collating element. */
4035 idx
+= elem_mbs_len
;
4036 /* Adjust for the alignment. */
4037 idx
= (idx
+ 3) & ~3;
4038 /* Skip the collation sequence value. */
4039 idx
+= sizeof (uint32_t);
4040 /* Skip the wide char sequence of the collating element. */
4041 idx
= idx
+ sizeof (uint32_t) * (*(int32_t *) (extra
+ idx
) + 1);
4042 /* If we found the entry, return the sequence value. */
4044 return *(uint32_t *) (extra
+ idx
);
4045 /* Skip the collation sequence value. */
4046 idx
+= sizeof (uint32_t);
4052 #endif /* RE_ENABLE_I18N */
4054 /* Check whether the node accepts the byte which is IDX-th
4055 byte of the INPUT. */
4059 check_node_accept (const re_match_context_t
*mctx
, const re_token_t
*node
,
4063 ch
= re_string_byte_at (&mctx
->input
, idx
);
4067 if (node
->opr
.c
!= ch
)
4071 case SIMPLE_BRACKET
:
4072 if (!bitset_contain (node
->opr
.sbcset
, ch
))
4076 #ifdef RE_ENABLE_I18N
4077 case OP_UTF8_PERIOD
:
4083 if ((ch
== '\n' && !(mctx
->dfa
->syntax
& RE_DOT_NEWLINE
))
4084 || (ch
== '\0' && (mctx
->dfa
->syntax
& RE_DOT_NOT_NULL
)))
4092 if (node
->constraint
)
4094 /* The node has constraints. Check whether the current context
4095 satisfies the constraints. */
4096 unsigned int context
= re_string_context_at (&mctx
->input
, idx
,
4098 if (NOT_SATISFY_NEXT_CONSTRAINT (node
->constraint
, context
))
4105 /* Extend the buffers, if the buffers have run out. */
4107 static reg_errcode_t
4108 internal_function __attribute_warn_unused_result__
4109 extend_buffers (re_match_context_t
*mctx
, int min_len
)
4112 re_string_t
*pstr
= &mctx
->input
;
4114 /* Avoid overflow. */
4115 if (BE (INT_MAX
/ 2 / sizeof (re_dfastate_t
*) <= pstr
->bufs_len
, 0))
4118 /* Double the lengthes of the buffers, but allocate at least MIN_LEN. */
4119 ret
= re_string_realloc_buffers (pstr
,
4121 MIN (pstr
->len
, pstr
->bufs_len
* 2)));
4122 if (BE (ret
!= REG_NOERROR
, 0))
4125 if (mctx
->state_log
!= NULL
)
4127 /* And double the length of state_log. */
4128 /* XXX We have no indication of the size of this buffer. If this
4129 allocation fail we have no indication that the state_log array
4130 does not have the right size. */
4131 re_dfastate_t
**new_array
= re_realloc (mctx
->state_log
, re_dfastate_t
*,
4132 pstr
->bufs_len
+ 1);
4133 if (BE (new_array
== NULL
, 0))
4135 mctx
->state_log
= new_array
;
4138 /* Then reconstruct the buffers. */
4141 #ifdef RE_ENABLE_I18N
4142 if (pstr
->mb_cur_max
> 1)
4144 ret
= build_wcs_upper_buffer (pstr
);
4145 if (BE (ret
!= REG_NOERROR
, 0))
4149 #endif /* RE_ENABLE_I18N */
4150 build_upper_buffer (pstr
);
4154 #ifdef RE_ENABLE_I18N
4155 if (pstr
->mb_cur_max
> 1)
4156 build_wcs_buffer (pstr
);
4158 #endif /* RE_ENABLE_I18N */
4160 if (pstr
->trans
!= NULL
)
4161 re_string_translate_buffer (pstr
);
4168 /* Functions for matching context. */
4170 /* Initialize MCTX. */
4172 static reg_errcode_t
4173 internal_function __attribute_warn_unused_result__
4174 match_ctx_init (re_match_context_t
*mctx
, int eflags
, int n
)
4176 mctx
->eflags
= eflags
;
4177 mctx
->match_last
= -1;
4180 mctx
->bkref_ents
= re_malloc (struct re_backref_cache_entry
, n
);
4181 mctx
->sub_tops
= re_malloc (re_sub_match_top_t
*, n
);
4182 if (BE (mctx
->bkref_ents
== NULL
|| mctx
->sub_tops
== NULL
, 0))
4185 /* Already zero-ed by the caller.
4187 mctx->bkref_ents = NULL;
4188 mctx->nbkref_ents = 0;
4189 mctx->nsub_tops = 0; */
4190 mctx
->abkref_ents
= n
;
4191 mctx
->max_mb_elem_len
= 1;
4192 mctx
->asub_tops
= n
;
4196 /* Clean the entries which depend on the current input in MCTX.
4197 This function must be invoked when the matcher changes the start index
4198 of the input, or changes the input string. */
4202 match_ctx_clean (re_match_context_t
*mctx
)
4205 for (st_idx
= 0; st_idx
< mctx
->nsub_tops
; ++st_idx
)
4208 re_sub_match_top_t
*top
= mctx
->sub_tops
[st_idx
];
4209 for (sl_idx
= 0; sl_idx
< top
->nlasts
; ++sl_idx
)
4211 re_sub_match_last_t
*last
= top
->lasts
[sl_idx
];
4212 re_free (last
->path
.array
);
4215 re_free (top
->lasts
);
4218 re_free (top
->path
->array
);
4219 re_free (top
->path
);
4224 mctx
->nsub_tops
= 0;
4225 mctx
->nbkref_ents
= 0;
4228 /* Free all the memory associated with MCTX. */
4232 match_ctx_free (re_match_context_t
*mctx
)
4234 /* First, free all the memory associated with MCTX->SUB_TOPS. */
4235 match_ctx_clean (mctx
);
4236 re_free (mctx
->sub_tops
);
4237 re_free (mctx
->bkref_ents
);
4240 /* Add a new backreference entry to MCTX.
4241 Note that we assume that caller never call this function with duplicate
4242 entry, and call with STR_IDX which isn't smaller than any existing entry.
4245 static reg_errcode_t
4246 internal_function __attribute_warn_unused_result__
4247 match_ctx_add_entry (re_match_context_t
*mctx
, int node
, int str_idx
, int from
,
4250 if (mctx
->nbkref_ents
>= mctx
->abkref_ents
)
4252 struct re_backref_cache_entry
* new_entry
;
4253 new_entry
= re_realloc (mctx
->bkref_ents
, struct re_backref_cache_entry
,
4254 mctx
->abkref_ents
* 2);
4255 if (BE (new_entry
== NULL
, 0))
4257 re_free (mctx
->bkref_ents
);
4260 mctx
->bkref_ents
= new_entry
;
4261 memset (mctx
->bkref_ents
+ mctx
->nbkref_ents
, '\0',
4262 sizeof (struct re_backref_cache_entry
) * mctx
->abkref_ents
);
4263 mctx
->abkref_ents
*= 2;
4265 if (mctx
->nbkref_ents
> 0
4266 && mctx
->bkref_ents
[mctx
->nbkref_ents
- 1].str_idx
== str_idx
)
4267 mctx
->bkref_ents
[mctx
->nbkref_ents
- 1].more
= 1;
4269 mctx
->bkref_ents
[mctx
->nbkref_ents
].node
= node
;
4270 mctx
->bkref_ents
[mctx
->nbkref_ents
].str_idx
= str_idx
;
4271 mctx
->bkref_ents
[mctx
->nbkref_ents
].subexp_from
= from
;
4272 mctx
->bkref_ents
[mctx
->nbkref_ents
].subexp_to
= to
;
4274 /* This is a cache that saves negative results of check_dst_limits_calc_pos.
4275 If bit N is clear, means that this entry won't epsilon-transition to
4276 an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression. If
4277 it is set, check_dst_limits_calc_pos_1 will recurse and try to find one
4280 A backreference does not epsilon-transition unless it is empty, so set
4281 to all zeros if FROM != TO. */
4282 mctx
->bkref_ents
[mctx
->nbkref_ents
].eps_reachable_subexps_map
4283 = (from
== to
? ~0 : 0);
4285 mctx
->bkref_ents
[mctx
->nbkref_ents
++].more
= 0;
4286 if (mctx
->max_mb_elem_len
< to
- from
)
4287 mctx
->max_mb_elem_len
= to
- from
;
4291 /* Search for the first entry which has the same str_idx, or -1 if none is
4292 found. Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX. */
4296 search_cur_bkref_entry (const re_match_context_t
*mctx
, int str_idx
)
4298 int left
, right
, mid
, last
;
4299 last
= right
= mctx
->nbkref_ents
;
4300 for (left
= 0; left
< right
;)
4302 mid
= (left
+ right
) / 2;
4303 if (mctx
->bkref_ents
[mid
].str_idx
< str_idx
)
4308 if (left
< last
&& mctx
->bkref_ents
[left
].str_idx
== str_idx
)
4314 /* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches
4317 static reg_errcode_t
4318 internal_function __attribute_warn_unused_result__
4319 match_ctx_add_subtop (re_match_context_t
*mctx
, int node
, int str_idx
)
4322 assert (mctx
->sub_tops
!= NULL
);
4323 assert (mctx
->asub_tops
> 0);
4325 if (BE (mctx
->nsub_tops
== mctx
->asub_tops
, 0))
4327 int new_asub_tops
= mctx
->asub_tops
* 2;
4328 re_sub_match_top_t
**new_array
= re_realloc (mctx
->sub_tops
,
4329 re_sub_match_top_t
*,
4331 if (BE (new_array
== NULL
, 0))
4333 mctx
->sub_tops
= new_array
;
4334 mctx
->asub_tops
= new_asub_tops
;
4336 mctx
->sub_tops
[mctx
->nsub_tops
] = calloc (1, sizeof (re_sub_match_top_t
));
4337 if (BE (mctx
->sub_tops
[mctx
->nsub_tops
] == NULL
, 0))
4339 mctx
->sub_tops
[mctx
->nsub_tops
]->node
= node
;
4340 mctx
->sub_tops
[mctx
->nsub_tops
++]->str_idx
= str_idx
;
4344 /* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches
4345 at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP. */
4347 static re_sub_match_last_t
*
4349 match_ctx_add_sublast (re_sub_match_top_t
*subtop
, int node
, int str_idx
)
4351 re_sub_match_last_t
*new_entry
;
4352 if (BE (subtop
->nlasts
== subtop
->alasts
, 0))
4354 int new_alasts
= 2 * subtop
->alasts
+ 1;
4355 re_sub_match_last_t
**new_array
= re_realloc (subtop
->lasts
,
4356 re_sub_match_last_t
*,
4358 if (BE (new_array
== NULL
, 0))
4360 subtop
->lasts
= new_array
;
4361 subtop
->alasts
= new_alasts
;
4363 new_entry
= calloc (1, sizeof (re_sub_match_last_t
));
4364 if (BE (new_entry
!= NULL
, 1))
4366 subtop
->lasts
[subtop
->nlasts
] = new_entry
;
4367 new_entry
->node
= node
;
4368 new_entry
->str_idx
= str_idx
;
4376 sift_ctx_init (re_sift_context_t
*sctx
, re_dfastate_t
**sifted_sts
,
4377 re_dfastate_t
**limited_sts
, int last_node
, int last_str_idx
)
4379 sctx
->sifted_states
= sifted_sts
;
4380 sctx
->limited_states
= limited_sts
;
4381 sctx
->last_node
= last_node
;
4382 sctx
->last_str_idx
= last_str_idx
;
4383 re_node_set_init_empty (&sctx
->limits
);