1 /* Extended regular expression matching and search library.
2 Copyright (C) 2002-2019 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <https://www.gnu.org/licenses/>. */
20 static reg_errcode_t
match_ctx_init (re_match_context_t
*cache
, int eflags
,
22 static void match_ctx_clean (re_match_context_t
*mctx
);
23 static void match_ctx_free (re_match_context_t
*cache
);
24 static reg_errcode_t
match_ctx_add_entry (re_match_context_t
*cache
, Idx node
,
25 Idx str_idx
, Idx from
, Idx to
);
26 static Idx
search_cur_bkref_entry (const re_match_context_t
*mctx
, Idx str_idx
);
27 static reg_errcode_t
match_ctx_add_subtop (re_match_context_t
*mctx
, Idx node
,
29 static re_sub_match_last_t
* match_ctx_add_sublast (re_sub_match_top_t
*subtop
,
30 Idx node
, Idx str_idx
);
31 static void sift_ctx_init (re_sift_context_t
*sctx
, re_dfastate_t
**sifted_sts
,
32 re_dfastate_t
**limited_sts
, Idx last_node
,
34 static reg_errcode_t
re_search_internal (const regex_t
*preg
,
35 const char *string
, Idx length
,
36 Idx start
, Idx last_start
, Idx stop
,
37 size_t nmatch
, regmatch_t pmatch
[],
39 static regoff_t
re_search_2_stub (struct re_pattern_buffer
*bufp
,
40 const char *string1
, Idx length1
,
41 const char *string2
, Idx length2
,
42 Idx start
, regoff_t range
,
43 struct re_registers
*regs
,
44 Idx stop
, bool ret_len
);
45 static regoff_t
re_search_stub (struct re_pattern_buffer
*bufp
,
46 const char *string
, Idx length
, Idx start
,
47 regoff_t range
, Idx stop
,
48 struct re_registers
*regs
,
50 static unsigned re_copy_regs (struct re_registers
*regs
, regmatch_t
*pmatch
,
51 Idx nregs
, int regs_allocated
);
52 static reg_errcode_t
prune_impossible_nodes (re_match_context_t
*mctx
);
53 static Idx
check_matching (re_match_context_t
*mctx
, bool fl_longest_match
,
55 static Idx
check_halt_state_context (const re_match_context_t
*mctx
,
56 const re_dfastate_t
*state
, Idx idx
);
57 static void update_regs (const re_dfa_t
*dfa
, regmatch_t
*pmatch
,
58 regmatch_t
*prev_idx_match
, Idx cur_node
,
59 Idx cur_idx
, Idx nmatch
);
60 static reg_errcode_t
push_fail_stack (struct re_fail_stack_t
*fs
,
61 Idx str_idx
, Idx dest_node
, Idx nregs
,
63 re_node_set
*eps_via_nodes
);
64 static reg_errcode_t
set_regs (const regex_t
*preg
,
65 const re_match_context_t
*mctx
,
66 size_t nmatch
, regmatch_t
*pmatch
,
68 static reg_errcode_t
free_fail_stack_return (struct re_fail_stack_t
*fs
);
71 static int sift_states_iter_mb (const re_match_context_t
*mctx
,
72 re_sift_context_t
*sctx
,
73 Idx node_idx
, Idx str_idx
, Idx max_str_idx
);
74 #endif /* RE_ENABLE_I18N */
75 static reg_errcode_t
sift_states_backward (const re_match_context_t
*mctx
,
76 re_sift_context_t
*sctx
);
77 static reg_errcode_t
build_sifted_states (const re_match_context_t
*mctx
,
78 re_sift_context_t
*sctx
, Idx str_idx
,
79 re_node_set
*cur_dest
);
80 static reg_errcode_t
update_cur_sifted_state (const re_match_context_t
*mctx
,
81 re_sift_context_t
*sctx
,
83 re_node_set
*dest_nodes
);
84 static reg_errcode_t
add_epsilon_src_nodes (const re_dfa_t
*dfa
,
85 re_node_set
*dest_nodes
,
86 const re_node_set
*candidates
);
87 static bool check_dst_limits (const re_match_context_t
*mctx
,
88 const re_node_set
*limits
,
89 Idx dst_node
, Idx dst_idx
, Idx src_node
,
91 static int check_dst_limits_calc_pos_1 (const re_match_context_t
*mctx
,
92 int boundaries
, Idx subexp_idx
,
93 Idx from_node
, Idx bkref_idx
);
94 static int check_dst_limits_calc_pos (const re_match_context_t
*mctx
,
95 Idx limit
, Idx subexp_idx
,
96 Idx node
, Idx str_idx
,
98 static reg_errcode_t
check_subexp_limits (const re_dfa_t
*dfa
,
99 re_node_set
*dest_nodes
,
100 const re_node_set
*candidates
,
102 struct re_backref_cache_entry
*bkref_ents
,
104 static reg_errcode_t
sift_states_bkref (const re_match_context_t
*mctx
,
105 re_sift_context_t
*sctx
,
106 Idx str_idx
, const re_node_set
*candidates
);
107 static reg_errcode_t
merge_state_array (const re_dfa_t
*dfa
,
109 re_dfastate_t
**src
, Idx num
);
110 static re_dfastate_t
*find_recover_state (reg_errcode_t
*err
,
111 re_match_context_t
*mctx
);
112 static re_dfastate_t
*transit_state (reg_errcode_t
*err
,
113 re_match_context_t
*mctx
,
114 re_dfastate_t
*state
);
115 static re_dfastate_t
*merge_state_with_log (reg_errcode_t
*err
,
116 re_match_context_t
*mctx
,
117 re_dfastate_t
*next_state
);
118 static reg_errcode_t
check_subexp_matching_top (re_match_context_t
*mctx
,
119 re_node_set
*cur_nodes
,
122 static re_dfastate_t
*transit_state_sb (reg_errcode_t
*err
,
123 re_match_context_t
*mctx
,
124 re_dfastate_t
*pstate
);
126 #ifdef RE_ENABLE_I18N
127 static reg_errcode_t
transit_state_mb (re_match_context_t
*mctx
,
128 re_dfastate_t
*pstate
);
129 #endif /* RE_ENABLE_I18N */
130 static reg_errcode_t
transit_state_bkref (re_match_context_t
*mctx
,
131 const re_node_set
*nodes
);
132 static reg_errcode_t
get_subexp (re_match_context_t
*mctx
,
133 Idx bkref_node
, Idx bkref_str_idx
);
134 static reg_errcode_t
get_subexp_sub (re_match_context_t
*mctx
,
135 const re_sub_match_top_t
*sub_top
,
136 re_sub_match_last_t
*sub_last
,
137 Idx bkref_node
, Idx bkref_str
);
138 static Idx
find_subexp_node (const re_dfa_t
*dfa
, const re_node_set
*nodes
,
139 Idx subexp_idx
, int type
);
140 static reg_errcode_t
check_arrival (re_match_context_t
*mctx
,
141 state_array_t
*path
, Idx top_node
,
142 Idx top_str
, Idx last_node
, Idx last_str
,
144 static reg_errcode_t
check_arrival_add_next_nodes (re_match_context_t
*mctx
,
146 re_node_set
*cur_nodes
,
147 re_node_set
*next_nodes
);
148 static reg_errcode_t
check_arrival_expand_ecl (const re_dfa_t
*dfa
,
149 re_node_set
*cur_nodes
,
150 Idx ex_subexp
, int type
);
151 static reg_errcode_t
check_arrival_expand_ecl_sub (const re_dfa_t
*dfa
,
152 re_node_set
*dst_nodes
,
153 Idx target
, Idx ex_subexp
,
155 static reg_errcode_t
expand_bkref_cache (re_match_context_t
*mctx
,
156 re_node_set
*cur_nodes
, Idx cur_str
,
157 Idx subexp_num
, int type
);
158 static bool build_trtable (const re_dfa_t
*dfa
, re_dfastate_t
*state
);
159 #ifdef RE_ENABLE_I18N
160 static int check_node_accept_bytes (const re_dfa_t
*dfa
, Idx node_idx
,
161 const re_string_t
*input
, Idx idx
);
163 static unsigned int find_collation_sequence_value (const unsigned char *mbs
,
166 #endif /* RE_ENABLE_I18N */
167 static Idx
group_nodes_into_DFAstates (const re_dfa_t
*dfa
,
168 const re_dfastate_t
*state
,
169 re_node_set
*states_node
,
170 bitset_t
*states_ch
);
171 static bool check_node_accept (const re_match_context_t
*mctx
,
172 const re_token_t
*node
, Idx idx
);
173 static reg_errcode_t
extend_buffers (re_match_context_t
*mctx
, int min_len
);
175 /* Entry point for POSIX code. */
177 /* regexec searches for a given pattern, specified by PREG, in the
180 If NMATCH is zero or REG_NOSUB was set in the cflags argument to
181 'regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at
182 least NMATCH elements, and we set them to the offsets of the
183 corresponding matched substrings.
185 EFLAGS specifies "execution flags" which affect matching: if
186 REG_NOTBOL is set, then ^ does not match at the beginning of the
187 string; if REG_NOTEOL is set, then $ does not match at the end.
189 We return 0 if we find a match and REG_NOMATCH if not. */
192 regexec (const regex_t
*__restrict preg
, const char *__restrict string
,
193 size_t nmatch
, regmatch_t pmatch
[], int eflags
)
197 re_dfa_t
*dfa
= preg
->buffer
;
199 if (eflags
& ~(REG_NOTBOL
| REG_NOTEOL
| REG_STARTEND
))
202 if (eflags
& REG_STARTEND
)
204 start
= pmatch
[0].rm_so
;
205 length
= pmatch
[0].rm_eo
;
210 length
= strlen (string
);
213 lock_lock (dfa
->lock
);
215 err
= re_search_internal (preg
, string
, length
, start
, length
,
216 length
, 0, NULL
, eflags
);
218 err
= re_search_internal (preg
, string
, length
, start
, length
,
219 length
, nmatch
, pmatch
, eflags
);
220 lock_unlock (dfa
->lock
);
221 return err
!= REG_NOERROR
;
225 libc_hidden_def (__regexec
)
227 # include <shlib-compat.h>
228 versioned_symbol (libc
, __regexec
, regexec
, GLIBC_2_3_4
);
230 # if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)
231 __typeof__ (__regexec
) __compat_regexec
;
234 attribute_compat_text_section
235 __compat_regexec (const regex_t
*__restrict preg
,
236 const char *__restrict string
, size_t nmatch
,
237 regmatch_t pmatch
[], int eflags
)
239 return regexec (preg
, string
, nmatch
, pmatch
,
240 eflags
& (REG_NOTBOL
| REG_NOTEOL
));
242 compat_symbol (libc
, __compat_regexec
, regexec
, GLIBC_2_0
);
246 /* Entry points for GNU code. */
248 /* re_match, re_search, re_match_2, re_search_2
250 The former two functions operate on STRING with length LENGTH,
251 while the later two operate on concatenation of STRING1 and STRING2
252 with lengths LENGTH1 and LENGTH2, respectively.
254 re_match() matches the compiled pattern in BUFP against the string,
255 starting at index START.
257 re_search() first tries matching at index START, then it tries to match
258 starting from index START + 1, and so on. The last start position tried
259 is START + RANGE. (Thus RANGE = 0 forces re_search to operate the same
262 The parameter STOP of re_{match,search}_2 specifies that no match exceeding
263 the first STOP characters of the concatenation of the strings should be
266 If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match
267 and all groups is stored in REGS. (For the "_2" variants, the offsets are
268 computed relative to the concatenation, not relative to the individual
271 On success, re_match* functions return the length of the match, re_search*
272 return the position of the start of the match. Return value -1 means no
273 match was found and -2 indicates an internal error. */
276 re_match (struct re_pattern_buffer
*bufp
, const char *string
, Idx length
,
277 Idx start
, struct re_registers
*regs
)
279 return re_search_stub (bufp
, string
, length
, start
, 0, length
, regs
, true);
282 weak_alias (__re_match
, re_match
)
286 re_search (struct re_pattern_buffer
*bufp
, const char *string
, Idx length
,
287 Idx start
, regoff_t range
, struct re_registers
*regs
)
289 return re_search_stub (bufp
, string
, length
, start
, range
, length
, regs
,
293 weak_alias (__re_search
, re_search
)
297 re_match_2 (struct re_pattern_buffer
*bufp
, const char *string1
, Idx length1
,
298 const char *string2
, Idx length2
, Idx start
,
299 struct re_registers
*regs
, Idx stop
)
301 return re_search_2_stub (bufp
, string1
, length1
, string2
, length2
,
302 start
, 0, regs
, stop
, true);
305 weak_alias (__re_match_2
, re_match_2
)
309 re_search_2 (struct re_pattern_buffer
*bufp
, const char *string1
, Idx length1
,
310 const char *string2
, Idx length2
, Idx start
, regoff_t range
,
311 struct re_registers
*regs
, Idx stop
)
313 return re_search_2_stub (bufp
, string1
, length1
, string2
, length2
,
314 start
, range
, regs
, stop
, false);
317 weak_alias (__re_search_2
, re_search_2
)
321 re_search_2_stub (struct re_pattern_buffer
*bufp
, const char *string1
,
322 Idx length1
, const char *string2
, Idx length2
, Idx start
,
323 regoff_t range
, struct re_registers
*regs
,
324 Idx stop
, bool ret_len
)
331 if (__glibc_unlikely ((length1
< 0 || length2
< 0 || stop
< 0
332 || INT_ADD_WRAPV (length1
, length2
, &len
))))
335 /* Concatenate the strings. */
339 s
= re_malloc (char, len
);
341 if (__glibc_unlikely (s
== NULL
))
344 memcpy (__mempcpy (s
, string1
, length1
), string2
, length2
);
346 memcpy (s
, string1
, length1
);
347 memcpy (s
+ length1
, string2
, length2
);
356 rval
= re_search_stub (bufp
, str
, len
, start
, range
, stop
, regs
,
362 /* The parameters have the same meaning as those of re_search.
363 Additional parameters:
364 If RET_LEN is true the length of the match is returned (re_match style);
365 otherwise the position of the match is returned. */
368 re_search_stub (struct re_pattern_buffer
*bufp
, const char *string
, Idx length
,
369 Idx start
, regoff_t range
, Idx stop
, struct re_registers
*regs
,
372 reg_errcode_t result
;
377 re_dfa_t
*dfa
= bufp
->buffer
;
378 Idx last_start
= start
+ range
;
380 /* Check for out-of-range. */
381 if (__glibc_unlikely (start
< 0 || start
> length
))
383 if (__glibc_unlikely (length
< last_start
384 || (0 <= range
&& last_start
< start
)))
386 else if (__glibc_unlikely (last_start
< 0
387 || (range
< 0 && start
<= last_start
)))
390 lock_lock (dfa
->lock
);
392 eflags
|= (bufp
->not_bol
) ? REG_NOTBOL
: 0;
393 eflags
|= (bufp
->not_eol
) ? REG_NOTEOL
: 0;
395 /* Compile fastmap if we haven't yet. */
396 if (start
< last_start
&& bufp
->fastmap
!= NULL
&& !bufp
->fastmap_accurate
)
397 re_compile_fastmap (bufp
);
399 if (__glibc_unlikely (bufp
->no_sub
))
402 /* We need at least 1 register. */
405 else if (__glibc_unlikely (bufp
->regs_allocated
== REGS_FIXED
406 && regs
->num_regs
<= bufp
->re_nsub
))
408 nregs
= regs
->num_regs
;
409 if (__glibc_unlikely (nregs
< 1))
411 /* Nothing can be copied to regs. */
417 nregs
= bufp
->re_nsub
+ 1;
418 pmatch
= re_malloc (regmatch_t
, nregs
);
419 if (__glibc_unlikely (pmatch
== NULL
))
425 result
= re_search_internal (bufp
, string
, length
, start
, last_start
, stop
,
426 nregs
, pmatch
, eflags
);
430 /* I hope we needn't fill their regs with -1's when no match was found. */
431 if (result
!= REG_NOERROR
)
432 rval
= result
== REG_NOMATCH
? -1 : -2;
433 else if (regs
!= NULL
)
435 /* If caller wants register contents data back, copy them. */
436 bufp
->regs_allocated
= re_copy_regs (regs
, pmatch
, nregs
,
437 bufp
->regs_allocated
);
438 if (__glibc_unlikely (bufp
->regs_allocated
== REGS_UNALLOCATED
))
442 if (__glibc_likely (rval
== 0))
446 assert (pmatch
[0].rm_so
== start
);
447 rval
= pmatch
[0].rm_eo
- start
;
450 rval
= pmatch
[0].rm_so
;
454 lock_unlock (dfa
->lock
);
459 re_copy_regs (struct re_registers
*regs
, regmatch_t
*pmatch
, Idx nregs
,
462 int rval
= REGS_REALLOCATE
;
464 Idx need_regs
= nregs
+ 1;
465 /* We need one extra element beyond 'num_regs' for the '-1' marker GNU code
468 /* Have the register data arrays been allocated? */
469 if (regs_allocated
== REGS_UNALLOCATED
)
470 { /* No. So allocate them with malloc. */
471 regs
->start
= re_malloc (regoff_t
, need_regs
);
472 if (__glibc_unlikely (regs
->start
== NULL
))
473 return REGS_UNALLOCATED
;
474 regs
->end
= re_malloc (regoff_t
, need_regs
);
475 if (__glibc_unlikely (regs
->end
== NULL
))
477 re_free (regs
->start
);
478 return REGS_UNALLOCATED
;
480 regs
->num_regs
= need_regs
;
482 else if (regs_allocated
== REGS_REALLOCATE
)
483 { /* Yes. If we need more elements than were already
484 allocated, reallocate them. If we need fewer, just
486 if (__glibc_unlikely (need_regs
> regs
->num_regs
))
488 regoff_t
*new_start
= re_realloc (regs
->start
, regoff_t
, need_regs
);
490 if (__glibc_unlikely (new_start
== NULL
))
491 return REGS_UNALLOCATED
;
492 new_end
= re_realloc (regs
->end
, regoff_t
, need_regs
);
493 if (__glibc_unlikely (new_end
== NULL
))
496 return REGS_UNALLOCATED
;
498 regs
->start
= new_start
;
500 regs
->num_regs
= need_regs
;
505 assert (regs_allocated
== REGS_FIXED
);
506 /* This function may not be called with REGS_FIXED and nregs too big. */
507 assert (regs
->num_regs
>= nregs
);
512 for (i
= 0; i
< nregs
; ++i
)
514 regs
->start
[i
] = pmatch
[i
].rm_so
;
515 regs
->end
[i
] = pmatch
[i
].rm_eo
;
517 for ( ; i
< regs
->num_regs
; ++i
)
518 regs
->start
[i
] = regs
->end
[i
] = -1;
523 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
524 ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use
525 this memory for recording register information. STARTS and ENDS
526 must be allocated using the malloc library routine, and must each
527 be at least NUM_REGS * sizeof (regoff_t) bytes long.
529 If NUM_REGS == 0, then subsequent matches should allocate their own
532 Unless this function is called, the first search or match using
533 PATTERN_BUFFER will allocate its own register data, without
534 freeing the old data. */
537 re_set_registers (struct re_pattern_buffer
*bufp
, struct re_registers
*regs
,
538 __re_size_t num_regs
, regoff_t
*starts
, regoff_t
*ends
)
542 bufp
->regs_allocated
= REGS_REALLOCATE
;
543 regs
->num_regs
= num_regs
;
544 regs
->start
= starts
;
549 bufp
->regs_allocated
= REGS_UNALLOCATED
;
551 regs
->start
= regs
->end
= NULL
;
555 weak_alias (__re_set_registers
, re_set_registers
)
558 /* Entry points compatible with 4.2 BSD regex library. We don't define
559 them unless specifically requested. */
561 #if defined _REGEX_RE_COMP || defined _LIBC
566 re_exec (const char *s
)
568 return 0 == regexec (&re_comp_buf
, s
, 0, NULL
, 0);
570 #endif /* _REGEX_RE_COMP */
572 /* Internal entry point. */
574 /* Searches for a compiled pattern PREG in the string STRING, whose
575 length is LENGTH. NMATCH, PMATCH, and EFLAGS have the same
576 meaning as with regexec. LAST_START is START + RANGE, where
577 START and RANGE have the same meaning as with re_search.
578 Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
579 otherwise return the error code.
580 Note: We assume front end functions already check ranges.
581 (0 <= LAST_START && LAST_START <= LENGTH) */
584 __attribute_warn_unused_result__
585 re_search_internal (const regex_t
*preg
, const char *string
, Idx length
,
586 Idx start
, Idx last_start
, Idx stop
, size_t nmatch
,
587 regmatch_t pmatch
[], int eflags
)
590 const re_dfa_t
*dfa
= preg
->buffer
;
591 Idx left_lim
, right_lim
;
593 bool fl_longest_match
;
600 #if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
601 re_match_context_t mctx
= { .dfa
= dfa
};
603 re_match_context_t mctx
;
605 char *fastmap
= ((preg
->fastmap
!= NULL
&& preg
->fastmap_accurate
606 && start
!= last_start
&& !preg
->can_be_null
)
607 ? preg
->fastmap
: NULL
);
608 RE_TRANSLATE_TYPE t
= preg
->translate
;
610 #if !(defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L))
611 memset (&mctx
, '\0', sizeof (re_match_context_t
));
615 extra_nmatch
= (nmatch
> preg
->re_nsub
) ? nmatch
- (preg
->re_nsub
+ 1) : 0;
616 nmatch
-= extra_nmatch
;
618 /* Check if the DFA haven't been compiled. */
619 if (__glibc_unlikely (preg
->used
== 0 || dfa
->init_state
== NULL
620 || dfa
->init_state_word
== NULL
621 || dfa
->init_state_nl
== NULL
622 || dfa
->init_state_begbuf
== NULL
))
626 /* We assume front-end functions already check them. */
627 assert (0 <= last_start
&& last_start
<= length
);
630 /* If initial states with non-begbuf contexts have no elements,
631 the regex must be anchored. If preg->newline_anchor is set,
632 we'll never use init_state_nl, so do not check it. */
633 if (dfa
->init_state
->nodes
.nelem
== 0
634 && dfa
->init_state_word
->nodes
.nelem
== 0
635 && (dfa
->init_state_nl
->nodes
.nelem
== 0
636 || !preg
->newline_anchor
))
638 if (start
!= 0 && last_start
!= 0)
640 start
= last_start
= 0;
643 /* We must check the longest matching, if nmatch > 0. */
644 fl_longest_match
= (nmatch
!= 0 || dfa
->nbackref
);
646 err
= re_string_allocate (&mctx
.input
, string
, length
, dfa
->nodes_len
+ 1,
647 preg
->translate
, (preg
->syntax
& RE_ICASE
) != 0,
649 if (__glibc_unlikely (err
!= REG_NOERROR
))
651 mctx
.input
.stop
= stop
;
652 mctx
.input
.raw_stop
= stop
;
653 mctx
.input
.newline_anchor
= preg
->newline_anchor
;
655 err
= match_ctx_init (&mctx
, eflags
, dfa
->nbackref
* 2);
656 if (__glibc_unlikely (err
!= REG_NOERROR
))
659 /* We will log all the DFA states through which the dfa pass,
660 if nmatch > 1, or this dfa has "multibyte node", which is a
661 back-reference or a node which can accept multibyte character or
662 multi character collating element. */
663 if (nmatch
> 1 || dfa
->has_mb_node
)
665 /* Avoid overflow. */
666 if (__glibc_unlikely ((MIN (IDX_MAX
, SIZE_MAX
/ sizeof (re_dfastate_t
*))
667 <= mctx
.input
.bufs_len
)))
673 mctx
.state_log
= re_malloc (re_dfastate_t
*, mctx
.input
.bufs_len
+ 1);
674 if (__glibc_unlikely (mctx
.state_log
== NULL
))
681 mctx
.state_log
= NULL
;
684 mctx
.input
.tip_context
= (eflags
& REG_NOTBOL
) ? CONTEXT_BEGBUF
685 : CONTEXT_NEWLINE
| CONTEXT_BEGBUF
;
687 /* Check incrementally whether the input string matches. */
688 incr
= (last_start
< start
) ? -1 : 1;
689 left_lim
= (last_start
< start
) ? last_start
: start
;
690 right_lim
= (last_start
< start
) ? start
: last_start
;
691 sb
= dfa
->mb_cur_max
== 1;
694 ? ((sb
|| !(preg
->syntax
& RE_ICASE
|| t
) ? 4 : 0)
695 | (start
<= last_start
? 2 : 0)
696 | (t
!= NULL
? 1 : 0))
699 for (;; match_first
+= incr
)
702 if (match_first
< left_lim
|| right_lim
< match_first
)
705 /* Advance as rapidly as possible through the string, until we
706 find a plausible place to start matching. This may be done
707 with varying efficiency, so there are various possibilities:
708 only the most common of them are specialized, in order to
709 save on code size. We use a switch statement for speed. */
717 /* Fastmap with single-byte translation, match forward. */
718 while (__glibc_likely (match_first
< right_lim
)
719 && !fastmap
[t
[(unsigned char) string
[match_first
]]])
721 goto forward_match_found_start_or_reached_end
;
724 /* Fastmap without translation, match forward. */
725 while (__glibc_likely (match_first
< right_lim
)
726 && !fastmap
[(unsigned char) string
[match_first
]])
729 forward_match_found_start_or_reached_end
:
730 if (__glibc_unlikely (match_first
== right_lim
))
732 ch
= match_first
>= length
733 ? 0 : (unsigned char) string
[match_first
];
734 if (!fastmap
[t
? t
[ch
] : ch
])
741 /* Fastmap without multi-byte translation, match backwards. */
742 while (match_first
>= left_lim
)
744 ch
= match_first
>= length
745 ? 0 : (unsigned char) string
[match_first
];
746 if (fastmap
[t
? t
[ch
] : ch
])
750 if (match_first
< left_lim
)
755 /* In this case, we can't determine easily the current byte,
756 since it might be a component byte of a multibyte
757 character. Then we use the constructed buffer instead. */
760 /* If MATCH_FIRST is out of the valid range, reconstruct the
762 __re_size_t offset
= match_first
- mctx
.input
.raw_mbs_idx
;
763 if (__glibc_unlikely (offset
764 >= (__re_size_t
) mctx
.input
.valid_raw_len
))
766 err
= re_string_reconstruct (&mctx
.input
, match_first
,
768 if (__glibc_unlikely (err
!= REG_NOERROR
))
771 offset
= match_first
- mctx
.input
.raw_mbs_idx
;
773 /* If MATCH_FIRST is out of the buffer, leave it as '\0'.
774 Note that MATCH_FIRST must not be smaller than 0. */
775 ch
= (match_first
>= length
776 ? 0 : re_string_byte_at (&mctx
.input
, offset
));
780 if (match_first
< left_lim
|| match_first
> right_lim
)
789 /* Reconstruct the buffers so that the matcher can assume that
790 the matching starts from the beginning of the buffer. */
791 err
= re_string_reconstruct (&mctx
.input
, match_first
, eflags
);
792 if (__glibc_unlikely (err
!= REG_NOERROR
))
795 #ifdef RE_ENABLE_I18N
796 /* Don't consider this char as a possible match start if it part,
797 yet isn't the head, of a multibyte character. */
798 if (!sb
&& !re_string_first_byte (&mctx
.input
, 0))
802 /* It seems to be appropriate one, then use the matcher. */
803 /* We assume that the matching starts from 0. */
804 mctx
.state_log_top
= mctx
.nbkref_ents
= mctx
.max_mb_elem_len
= 0;
805 match_last
= check_matching (&mctx
, fl_longest_match
,
806 start
<= last_start
? &match_first
: NULL
);
807 if (match_last
!= -1)
809 if (__glibc_unlikely (match_last
== -2))
816 mctx
.match_last
= match_last
;
817 if ((!preg
->no_sub
&& nmatch
> 1) || dfa
->nbackref
)
819 re_dfastate_t
*pstate
= mctx
.state_log
[match_last
];
820 mctx
.last_node
= check_halt_state_context (&mctx
, pstate
,
823 if ((!preg
->no_sub
&& nmatch
> 1 && dfa
->has_plural_match
)
826 err
= prune_impossible_nodes (&mctx
);
827 if (err
== REG_NOERROR
)
829 if (__glibc_unlikely (err
!= REG_NOMATCH
))
834 break; /* We found a match. */
838 match_ctx_clean (&mctx
);
842 assert (match_last
!= -1);
843 assert (err
== REG_NOERROR
);
846 /* Set pmatch[] if we need. */
851 /* Initialize registers. */
852 for (reg_idx
= 1; reg_idx
< nmatch
; ++reg_idx
)
853 pmatch
[reg_idx
].rm_so
= pmatch
[reg_idx
].rm_eo
= -1;
855 /* Set the points where matching start/end. */
857 pmatch
[0].rm_eo
= mctx
.match_last
;
858 /* FIXME: This function should fail if mctx.match_last exceeds
859 the maximum possible regoff_t value. We need a new error
860 code REG_OVERFLOW. */
862 if (!preg
->no_sub
&& nmatch
> 1)
864 err
= set_regs (preg
, &mctx
, nmatch
, pmatch
,
865 dfa
->has_plural_match
&& dfa
->nbackref
> 0);
866 if (__glibc_unlikely (err
!= REG_NOERROR
))
870 /* At last, add the offset to each register, since we slid
871 the buffers so that we could assume that the matching starts
873 for (reg_idx
= 0; reg_idx
< nmatch
; ++reg_idx
)
874 if (pmatch
[reg_idx
].rm_so
!= -1)
876 #ifdef RE_ENABLE_I18N
877 if (__glibc_unlikely (mctx
.input
.offsets_needed
!= 0))
879 pmatch
[reg_idx
].rm_so
=
880 (pmatch
[reg_idx
].rm_so
== mctx
.input
.valid_len
881 ? mctx
.input
.valid_raw_len
882 : mctx
.input
.offsets
[pmatch
[reg_idx
].rm_so
]);
883 pmatch
[reg_idx
].rm_eo
=
884 (pmatch
[reg_idx
].rm_eo
== mctx
.input
.valid_len
885 ? mctx
.input
.valid_raw_len
886 : mctx
.input
.offsets
[pmatch
[reg_idx
].rm_eo
]);
889 assert (mctx
.input
.offsets_needed
== 0);
891 pmatch
[reg_idx
].rm_so
+= match_first
;
892 pmatch
[reg_idx
].rm_eo
+= match_first
;
894 for (reg_idx
= 0; reg_idx
< extra_nmatch
; ++reg_idx
)
896 pmatch
[nmatch
+ reg_idx
].rm_so
= -1;
897 pmatch
[nmatch
+ reg_idx
].rm_eo
= -1;
901 for (reg_idx
= 0; reg_idx
+ 1 < nmatch
; reg_idx
++)
902 if (dfa
->subexp_map
[reg_idx
] != reg_idx
)
904 pmatch
[reg_idx
+ 1].rm_so
905 = pmatch
[dfa
->subexp_map
[reg_idx
] + 1].rm_so
;
906 pmatch
[reg_idx
+ 1].rm_eo
907 = pmatch
[dfa
->subexp_map
[reg_idx
] + 1].rm_eo
;
912 re_free (mctx
.state_log
);
914 match_ctx_free (&mctx
);
915 re_string_destruct (&mctx
.input
);
920 __attribute_warn_unused_result__
921 prune_impossible_nodes (re_match_context_t
*mctx
)
923 const re_dfa_t
*const dfa
= mctx
->dfa
;
924 Idx halt_node
, match_last
;
926 re_dfastate_t
**sifted_states
;
927 re_dfastate_t
**lim_states
= NULL
;
928 re_sift_context_t sctx
;
930 assert (mctx
->state_log
!= NULL
);
932 match_last
= mctx
->match_last
;
933 halt_node
= mctx
->last_node
;
935 /* Avoid overflow. */
936 if (__glibc_unlikely (MIN (IDX_MAX
, SIZE_MAX
/ sizeof (re_dfastate_t
*))
940 sifted_states
= re_malloc (re_dfastate_t
*, match_last
+ 1);
941 if (__glibc_unlikely (sifted_states
== NULL
))
948 lim_states
= re_malloc (re_dfastate_t
*, match_last
+ 1);
949 if (__glibc_unlikely (lim_states
== NULL
))
956 memset (lim_states
, '\0',
957 sizeof (re_dfastate_t
*) * (match_last
+ 1));
958 sift_ctx_init (&sctx
, sifted_states
, lim_states
, halt_node
,
960 ret
= sift_states_backward (mctx
, &sctx
);
961 re_node_set_free (&sctx
.limits
);
962 if (__glibc_unlikely (ret
!= REG_NOERROR
))
964 if (sifted_states
[0] != NULL
|| lim_states
[0] != NULL
)
974 } while (mctx
->state_log
[match_last
] == NULL
975 || !mctx
->state_log
[match_last
]->halt
);
976 halt_node
= check_halt_state_context (mctx
,
977 mctx
->state_log
[match_last
],
980 ret
= merge_state_array (dfa
, sifted_states
, lim_states
,
982 re_free (lim_states
);
984 if (__glibc_unlikely (ret
!= REG_NOERROR
))
989 sift_ctx_init (&sctx
, sifted_states
, lim_states
, halt_node
, match_last
);
990 ret
= sift_states_backward (mctx
, &sctx
);
991 re_node_set_free (&sctx
.limits
);
992 if (__glibc_unlikely (ret
!= REG_NOERROR
))
994 if (sifted_states
[0] == NULL
)
1000 re_free (mctx
->state_log
);
1001 mctx
->state_log
= sifted_states
;
1002 sifted_states
= NULL
;
1003 mctx
->last_node
= halt_node
;
1004 mctx
->match_last
= match_last
;
1007 re_free (sifted_states
);
1008 re_free (lim_states
);
1012 /* Acquire an initial state and return it.
1013 We must select appropriate initial state depending on the context,
1014 since initial states may have constraints like "\<", "^", etc.. */
1016 static inline re_dfastate_t
*
1017 __attribute__ ((always_inline
))
1018 acquire_init_state_context (reg_errcode_t
*err
, const re_match_context_t
*mctx
,
1021 const re_dfa_t
*const dfa
= mctx
->dfa
;
1022 if (dfa
->init_state
->has_constraint
)
1024 unsigned int context
;
1025 context
= re_string_context_at (&mctx
->input
, idx
- 1, mctx
->eflags
);
1026 if (IS_WORD_CONTEXT (context
))
1027 return dfa
->init_state_word
;
1028 else if (IS_ORDINARY_CONTEXT (context
))
1029 return dfa
->init_state
;
1030 else if (IS_BEGBUF_CONTEXT (context
) && IS_NEWLINE_CONTEXT (context
))
1031 return dfa
->init_state_begbuf
;
1032 else if (IS_NEWLINE_CONTEXT (context
))
1033 return dfa
->init_state_nl
;
1034 else if (IS_BEGBUF_CONTEXT (context
))
1036 /* It is relatively rare case, then calculate on demand. */
1037 return re_acquire_state_context (err
, dfa
,
1038 dfa
->init_state
->entrance_nodes
,
1042 /* Must not happen? */
1043 return dfa
->init_state
;
1046 return dfa
->init_state
;
1049 /* Check whether the regular expression match input string INPUT or not,
1050 and return the index where the matching end. Return -1 if
1051 there is no match, and return -2 in case of an error.
1052 FL_LONGEST_MATCH means we want the POSIX longest matching.
1053 If P_MATCH_FIRST is not NULL, and the match fails, it is set to the
1054 next place where we may want to try matching.
1055 Note that the matcher assumes that the matching starts from the current
1056 index of the buffer. */
1059 __attribute_warn_unused_result__
1060 check_matching (re_match_context_t
*mctx
, bool fl_longest_match
,
1063 const re_dfa_t
*const dfa
= mctx
->dfa
;
1066 Idx match_last
= -1;
1067 Idx cur_str_idx
= re_string_cur_idx (&mctx
->input
);
1068 re_dfastate_t
*cur_state
;
1069 bool at_init_state
= p_match_first
!= NULL
;
1070 Idx next_start_idx
= cur_str_idx
;
1073 cur_state
= acquire_init_state_context (&err
, mctx
, cur_str_idx
);
1074 /* An initial state must not be NULL (invalid). */
1075 if (__glibc_unlikely (cur_state
== NULL
))
1077 assert (err
== REG_ESPACE
);
1081 if (mctx
->state_log
!= NULL
)
1083 mctx
->state_log
[cur_str_idx
] = cur_state
;
1085 /* Check OP_OPEN_SUBEXP in the initial state in case that we use them
1086 later. E.g. Processing back references. */
1087 if (__glibc_unlikely (dfa
->nbackref
))
1089 at_init_state
= false;
1090 err
= check_subexp_matching_top (mctx
, &cur_state
->nodes
, 0);
1091 if (__glibc_unlikely (err
!= REG_NOERROR
))
1094 if (cur_state
->has_backref
)
1096 err
= transit_state_bkref (mctx
, &cur_state
->nodes
);
1097 if (__glibc_unlikely (err
!= REG_NOERROR
))
1103 /* If the RE accepts NULL string. */
1104 if (__glibc_unlikely (cur_state
->halt
))
1106 if (!cur_state
->has_constraint
1107 || check_halt_state_context (mctx
, cur_state
, cur_str_idx
))
1109 if (!fl_longest_match
)
1113 match_last
= cur_str_idx
;
1119 while (!re_string_eoi (&mctx
->input
))
1121 re_dfastate_t
*old_state
= cur_state
;
1122 Idx next_char_idx
= re_string_cur_idx (&mctx
->input
) + 1;
1124 if ((__glibc_unlikely (next_char_idx
>= mctx
->input
.bufs_len
)
1125 && mctx
->input
.bufs_len
< mctx
->input
.len
)
1126 || (__glibc_unlikely (next_char_idx
>= mctx
->input
.valid_len
)
1127 && mctx
->input
.valid_len
< mctx
->input
.len
))
1129 err
= extend_buffers (mctx
, next_char_idx
+ 1);
1130 if (__glibc_unlikely (err
!= REG_NOERROR
))
1132 assert (err
== REG_ESPACE
);
1137 cur_state
= transit_state (&err
, mctx
, cur_state
);
1138 if (mctx
->state_log
!= NULL
)
1139 cur_state
= merge_state_with_log (&err
, mctx
, cur_state
);
1141 if (cur_state
== NULL
)
1143 /* Reached the invalid state or an error. Try to recover a valid
1144 state using the state log, if available and if we have not
1145 already found a valid (even if not the longest) match. */
1146 if (__glibc_unlikely (err
!= REG_NOERROR
))
1149 if (mctx
->state_log
== NULL
1150 || (match
&& !fl_longest_match
)
1151 || (cur_state
= find_recover_state (&err
, mctx
)) == NULL
)
1155 if (__glibc_unlikely (at_init_state
))
1157 if (old_state
== cur_state
)
1158 next_start_idx
= next_char_idx
;
1160 at_init_state
= false;
1163 if (cur_state
->halt
)
1165 /* Reached a halt state.
1166 Check the halt state can satisfy the current context. */
1167 if (!cur_state
->has_constraint
1168 || check_halt_state_context (mctx
, cur_state
,
1169 re_string_cur_idx (&mctx
->input
)))
1171 /* We found an appropriate halt state. */
1172 match_last
= re_string_cur_idx (&mctx
->input
);
1175 /* We found a match, do not modify match_first below. */
1176 p_match_first
= NULL
;
1177 if (!fl_longest_match
)
1184 *p_match_first
+= next_start_idx
;
1189 /* Check NODE match the current context. */
1192 check_halt_node_context (const re_dfa_t
*dfa
, Idx node
, unsigned int context
)
1194 re_token_type_t type
= dfa
->nodes
[node
].type
;
1195 unsigned int constraint
= dfa
->nodes
[node
].constraint
;
1196 if (type
!= END_OF_RE
)
1200 if (NOT_SATISFY_NEXT_CONSTRAINT (constraint
, context
))
1205 /* Check the halt state STATE match the current context.
1206 Return 0 if not match, if the node, STATE has, is a halt node and
1207 match the context, return the node. */
1210 check_halt_state_context (const re_match_context_t
*mctx
,
1211 const re_dfastate_t
*state
, Idx idx
)
1214 unsigned int context
;
1216 assert (state
->halt
);
1218 context
= re_string_context_at (&mctx
->input
, idx
, mctx
->eflags
);
1219 for (i
= 0; i
< state
->nodes
.nelem
; ++i
)
1220 if (check_halt_node_context (mctx
->dfa
, state
->nodes
.elems
[i
], context
))
1221 return state
->nodes
.elems
[i
];
1225 /* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
1226 corresponding to the DFA).
1227 Return the destination node, and update EPS_VIA_NODES;
1228 return -1 in case of errors. */
1231 proceed_next_node (const re_match_context_t
*mctx
, Idx nregs
, regmatch_t
*regs
,
1232 Idx
*pidx
, Idx node
, re_node_set
*eps_via_nodes
,
1233 struct re_fail_stack_t
*fs
)
1235 const re_dfa_t
*const dfa
= mctx
->dfa
;
1238 if (IS_EPSILON_NODE (dfa
->nodes
[node
].type
))
1240 re_node_set
*cur_nodes
= &mctx
->state_log
[*pidx
]->nodes
;
1241 re_node_set
*edests
= &dfa
->edests
[node
];
1243 ok
= re_node_set_insert (eps_via_nodes
, node
);
1244 if (__glibc_unlikely (! ok
))
1246 /* Pick up a valid destination, or return -1 if none
1248 for (dest_node
= -1, i
= 0; i
< edests
->nelem
; ++i
)
1250 Idx candidate
= edests
->elems
[i
];
1251 if (!re_node_set_contains (cur_nodes
, candidate
))
1253 if (dest_node
== -1)
1254 dest_node
= candidate
;
1258 /* In order to avoid infinite loop like "(a*)*", return the second
1259 epsilon-transition if the first was already considered. */
1260 if (re_node_set_contains (eps_via_nodes
, dest_node
))
1263 /* Otherwise, push the second epsilon-transition on the fail stack. */
1265 && push_fail_stack (fs
, *pidx
, candidate
, nregs
, regs
,
1269 /* We know we are going to exit. */
1278 re_token_type_t type
= dfa
->nodes
[node
].type
;
1280 #ifdef RE_ENABLE_I18N
1281 if (dfa
->nodes
[node
].accept_mb
)
1282 naccepted
= check_node_accept_bytes (dfa
, node
, &mctx
->input
, *pidx
);
1284 #endif /* RE_ENABLE_I18N */
1285 if (type
== OP_BACK_REF
)
1287 Idx subexp_idx
= dfa
->nodes
[node
].opr
.idx
+ 1;
1288 naccepted
= regs
[subexp_idx
].rm_eo
- regs
[subexp_idx
].rm_so
;
1291 if (regs
[subexp_idx
].rm_so
== -1 || regs
[subexp_idx
].rm_eo
== -1)
1295 char *buf
= (char *) re_string_get_buffer (&mctx
->input
);
1296 if (memcmp (buf
+ regs
[subexp_idx
].rm_so
, buf
+ *pidx
,
1305 ok
= re_node_set_insert (eps_via_nodes
, node
);
1306 if (__glibc_unlikely (! ok
))
1308 dest_node
= dfa
->edests
[node
].elems
[0];
1309 if (re_node_set_contains (&mctx
->state_log
[*pidx
]->nodes
,
1316 || check_node_accept (mctx
, dfa
->nodes
+ node
, *pidx
))
1318 Idx dest_node
= dfa
->nexts
[node
];
1319 *pidx
= (naccepted
== 0) ? *pidx
+ 1 : *pidx
+ naccepted
;
1320 if (fs
&& (*pidx
> mctx
->match_last
|| mctx
->state_log
[*pidx
] == NULL
1321 || !re_node_set_contains (&mctx
->state_log
[*pidx
]->nodes
,
1324 re_node_set_empty (eps_via_nodes
);
1331 static reg_errcode_t
1332 __attribute_warn_unused_result__
1333 push_fail_stack (struct re_fail_stack_t
*fs
, Idx str_idx
, Idx dest_node
,
1334 Idx nregs
, regmatch_t
*regs
, re_node_set
*eps_via_nodes
)
1337 Idx num
= fs
->num
++;
1338 if (fs
->num
== fs
->alloc
)
1340 struct re_fail_stack_ent_t
*new_array
;
1341 new_array
= re_realloc (fs
->stack
, struct re_fail_stack_ent_t
,
1343 if (new_array
== NULL
)
1346 fs
->stack
= new_array
;
1348 fs
->stack
[num
].idx
= str_idx
;
1349 fs
->stack
[num
].node
= dest_node
;
1350 fs
->stack
[num
].regs
= re_malloc (regmatch_t
, nregs
);
1351 if (fs
->stack
[num
].regs
== NULL
)
1353 memcpy (fs
->stack
[num
].regs
, regs
, sizeof (regmatch_t
) * nregs
);
1354 err
= re_node_set_init_copy (&fs
->stack
[num
].eps_via_nodes
, eps_via_nodes
);
1359 pop_fail_stack (struct re_fail_stack_t
*fs
, Idx
*pidx
, Idx nregs
,
1360 regmatch_t
*regs
, re_node_set
*eps_via_nodes
)
1362 Idx num
= --fs
->num
;
1364 *pidx
= fs
->stack
[num
].idx
;
1365 memcpy (regs
, fs
->stack
[num
].regs
, sizeof (regmatch_t
) * nregs
);
1366 re_node_set_free (eps_via_nodes
);
1367 re_free (fs
->stack
[num
].regs
);
1368 *eps_via_nodes
= fs
->stack
[num
].eps_via_nodes
;
1369 return fs
->stack
[num
].node
;
1372 /* Set the positions where the subexpressions are starts/ends to registers
1374 Note: We assume that pmatch[0] is already set, and
1375 pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch. */
1377 static reg_errcode_t
1378 __attribute_warn_unused_result__
1379 set_regs (const regex_t
*preg
, const re_match_context_t
*mctx
, size_t nmatch
,
1380 regmatch_t
*pmatch
, bool fl_backtrack
)
1382 const re_dfa_t
*dfa
= preg
->buffer
;
1384 re_node_set eps_via_nodes
;
1385 struct re_fail_stack_t
*fs
;
1386 struct re_fail_stack_t fs_body
= { 0, 2, NULL
};
1387 regmatch_t
*prev_idx_match
;
1388 bool prev_idx_match_malloced
= false;
1391 assert (nmatch
> 1);
1392 assert (mctx
->state_log
!= NULL
);
1397 fs
->stack
= re_malloc (struct re_fail_stack_ent_t
, fs
->alloc
);
1398 if (fs
->stack
== NULL
)
1404 cur_node
= dfa
->init_node
;
1405 re_node_set_init_empty (&eps_via_nodes
);
1407 if (__libc_use_alloca (nmatch
* sizeof (regmatch_t
)))
1408 prev_idx_match
= (regmatch_t
*) alloca (nmatch
* sizeof (regmatch_t
));
1411 prev_idx_match
= re_malloc (regmatch_t
, nmatch
);
1412 if (prev_idx_match
== NULL
)
1414 free_fail_stack_return (fs
);
1417 prev_idx_match_malloced
= true;
1419 memcpy (prev_idx_match
, pmatch
, sizeof (regmatch_t
) * nmatch
);
1421 for (idx
= pmatch
[0].rm_so
; idx
<= pmatch
[0].rm_eo
;)
1423 update_regs (dfa
, pmatch
, prev_idx_match
, cur_node
, idx
, nmatch
);
1425 if (idx
== pmatch
[0].rm_eo
&& cur_node
== mctx
->last_node
)
1430 for (reg_idx
= 0; reg_idx
< nmatch
; ++reg_idx
)
1431 if (pmatch
[reg_idx
].rm_so
> -1 && pmatch
[reg_idx
].rm_eo
== -1)
1433 if (reg_idx
== nmatch
)
1435 re_node_set_free (&eps_via_nodes
);
1436 if (prev_idx_match_malloced
)
1437 re_free (prev_idx_match
);
1438 return free_fail_stack_return (fs
);
1440 cur_node
= pop_fail_stack (fs
, &idx
, nmatch
, pmatch
,
1445 re_node_set_free (&eps_via_nodes
);
1446 if (prev_idx_match_malloced
)
1447 re_free (prev_idx_match
);
1452 /* Proceed to next node. */
1453 cur_node
= proceed_next_node (mctx
, nmatch
, pmatch
, &idx
, cur_node
,
1454 &eps_via_nodes
, fs
);
1456 if (__glibc_unlikely (cur_node
< 0))
1458 if (__glibc_unlikely (cur_node
== -2))
1460 re_node_set_free (&eps_via_nodes
);
1461 if (prev_idx_match_malloced
)
1462 re_free (prev_idx_match
);
1463 free_fail_stack_return (fs
);
1467 cur_node
= pop_fail_stack (fs
, &idx
, nmatch
, pmatch
,
1471 re_node_set_free (&eps_via_nodes
);
1472 if (prev_idx_match_malloced
)
1473 re_free (prev_idx_match
);
1478 re_node_set_free (&eps_via_nodes
);
1479 if (prev_idx_match_malloced
)
1480 re_free (prev_idx_match
);
1481 return free_fail_stack_return (fs
);
1484 static reg_errcode_t
1485 free_fail_stack_return (struct re_fail_stack_t
*fs
)
1490 for (fs_idx
= 0; fs_idx
< fs
->num
; ++fs_idx
)
1492 re_node_set_free (&fs
->stack
[fs_idx
].eps_via_nodes
);
1493 re_free (fs
->stack
[fs_idx
].regs
);
1495 re_free (fs
->stack
);
1501 update_regs (const re_dfa_t
*dfa
, regmatch_t
*pmatch
,
1502 regmatch_t
*prev_idx_match
, Idx cur_node
, Idx cur_idx
, Idx nmatch
)
1504 int type
= dfa
->nodes
[cur_node
].type
;
1505 if (type
== OP_OPEN_SUBEXP
)
1507 Idx reg_num
= dfa
->nodes
[cur_node
].opr
.idx
+ 1;
1509 /* We are at the first node of this sub expression. */
1510 if (reg_num
< nmatch
)
1512 pmatch
[reg_num
].rm_so
= cur_idx
;
1513 pmatch
[reg_num
].rm_eo
= -1;
1516 else if (type
== OP_CLOSE_SUBEXP
)
1518 Idx reg_num
= dfa
->nodes
[cur_node
].opr
.idx
+ 1;
1519 if (reg_num
< nmatch
)
1521 /* We are at the last node of this sub expression. */
1522 if (pmatch
[reg_num
].rm_so
< cur_idx
)
1524 pmatch
[reg_num
].rm_eo
= cur_idx
;
1525 /* This is a non-empty match or we are not inside an optional
1526 subexpression. Accept this right away. */
1527 memcpy (prev_idx_match
, pmatch
, sizeof (regmatch_t
) * nmatch
);
1531 if (dfa
->nodes
[cur_node
].opt_subexp
1532 && prev_idx_match
[reg_num
].rm_so
!= -1)
1533 /* We transited through an empty match for an optional
1534 subexpression, like (a?)*, and this is not the subexp's
1535 first match. Copy back the old content of the registers
1536 so that matches of an inner subexpression are undone as
1537 well, like in ((a?))*. */
1538 memcpy (pmatch
, prev_idx_match
, sizeof (regmatch_t
) * nmatch
);
1540 /* We completed a subexpression, but it may be part of
1541 an optional one, so do not update PREV_IDX_MATCH. */
1542 pmatch
[reg_num
].rm_eo
= cur_idx
;
1548 /* This function checks the STATE_LOG from the SCTX->last_str_idx to 0
1549 and sift the nodes in each states according to the following rules.
1550 Updated state_log will be wrote to STATE_LOG.
1552 Rules: We throw away the Node 'a' in the STATE_LOG[STR_IDX] if...
1553 1. When STR_IDX == MATCH_LAST(the last index in the state_log):
1554 If 'a' isn't the LAST_NODE and 'a' can't epsilon transit to
1555 the LAST_NODE, we throw away the node 'a'.
1556 2. When 0 <= STR_IDX < MATCH_LAST and 'a' accepts
1557 string 's' and transit to 'b':
1558 i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
1560 ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
1561 thrown away, we throw away the node 'a'.
1562 3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b':
1563 i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
1565 ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away,
1566 we throw away the node 'a'. */
1568 #define STATE_NODE_CONTAINS(state,node) \
1569 ((state) != NULL && re_node_set_contains (&(state)->nodes, node))
1571 static reg_errcode_t
1572 sift_states_backward (const re_match_context_t
*mctx
, re_sift_context_t
*sctx
)
1576 Idx str_idx
= sctx
->last_str_idx
;
1577 re_node_set cur_dest
;
1580 assert (mctx
->state_log
!= NULL
&& mctx
->state_log
[str_idx
] != NULL
);
1583 /* Build sifted state_log[str_idx]. It has the nodes which can epsilon
1584 transit to the last_node and the last_node itself. */
1585 err
= re_node_set_init_1 (&cur_dest
, sctx
->last_node
);
1586 if (__glibc_unlikely (err
!= REG_NOERROR
))
1588 err
= update_cur_sifted_state (mctx
, sctx
, str_idx
, &cur_dest
);
1589 if (__glibc_unlikely (err
!= REG_NOERROR
))
1592 /* Then check each states in the state_log. */
1595 /* Update counters. */
1596 null_cnt
= (sctx
->sifted_states
[str_idx
] == NULL
) ? null_cnt
+ 1 : 0;
1597 if (null_cnt
> mctx
->max_mb_elem_len
)
1599 memset (sctx
->sifted_states
, '\0',
1600 sizeof (re_dfastate_t
*) * str_idx
);
1601 re_node_set_free (&cur_dest
);
1604 re_node_set_empty (&cur_dest
);
1607 if (mctx
->state_log
[str_idx
])
1609 err
= build_sifted_states (mctx
, sctx
, str_idx
, &cur_dest
);
1610 if (__glibc_unlikely (err
!= REG_NOERROR
))
1614 /* Add all the nodes which satisfy the following conditions:
1615 - It can epsilon transit to a node in CUR_DEST.
1617 And update state_log. */
1618 err
= update_cur_sifted_state (mctx
, sctx
, str_idx
, &cur_dest
);
1619 if (__glibc_unlikely (err
!= REG_NOERROR
))
1624 re_node_set_free (&cur_dest
);
1628 static reg_errcode_t
1629 __attribute_warn_unused_result__
1630 build_sifted_states (const re_match_context_t
*mctx
, re_sift_context_t
*sctx
,
1631 Idx str_idx
, re_node_set
*cur_dest
)
1633 const re_dfa_t
*const dfa
= mctx
->dfa
;
1634 const re_node_set
*cur_src
= &mctx
->state_log
[str_idx
]->non_eps_nodes
;
1637 /* Then build the next sifted state.
1638 We build the next sifted state on 'cur_dest', and update
1639 'sifted_states[str_idx]' with 'cur_dest'.
1641 'cur_dest' is the sifted state from 'state_log[str_idx + 1]'.
1642 'cur_src' points the node_set of the old 'state_log[str_idx]'
1643 (with the epsilon nodes pre-filtered out). */
1644 for (i
= 0; i
< cur_src
->nelem
; i
++)
1646 Idx prev_node
= cur_src
->elems
[i
];
1651 re_token_type_t type
= dfa
->nodes
[prev_node
].type
;
1652 assert (!IS_EPSILON_NODE (type
));
1654 #ifdef RE_ENABLE_I18N
1655 /* If the node may accept "multi byte". */
1656 if (dfa
->nodes
[prev_node
].accept_mb
)
1657 naccepted
= sift_states_iter_mb (mctx
, sctx
, prev_node
,
1658 str_idx
, sctx
->last_str_idx
);
1659 #endif /* RE_ENABLE_I18N */
1661 /* We don't check backreferences here.
1662 See update_cur_sifted_state(). */
1664 && check_node_accept (mctx
, dfa
->nodes
+ prev_node
, str_idx
)
1665 && STATE_NODE_CONTAINS (sctx
->sifted_states
[str_idx
+ 1],
1666 dfa
->nexts
[prev_node
]))
1672 if (sctx
->limits
.nelem
)
1674 Idx to_idx
= str_idx
+ naccepted
;
1675 if (check_dst_limits (mctx
, &sctx
->limits
,
1676 dfa
->nexts
[prev_node
], to_idx
,
1677 prev_node
, str_idx
))
1680 ok
= re_node_set_insert (cur_dest
, prev_node
);
1681 if (__glibc_unlikely (! ok
))
1688 /* Helper functions. */
1690 static reg_errcode_t
1691 clean_state_log_if_needed (re_match_context_t
*mctx
, Idx next_state_log_idx
)
1693 Idx top
= mctx
->state_log_top
;
1695 if ((next_state_log_idx
>= mctx
->input
.bufs_len
1696 && mctx
->input
.bufs_len
< mctx
->input
.len
)
1697 || (next_state_log_idx
>= mctx
->input
.valid_len
1698 && mctx
->input
.valid_len
< mctx
->input
.len
))
1701 err
= extend_buffers (mctx
, next_state_log_idx
+ 1);
1702 if (__glibc_unlikely (err
!= REG_NOERROR
))
1706 if (top
< next_state_log_idx
)
1708 memset (mctx
->state_log
+ top
+ 1, '\0',
1709 sizeof (re_dfastate_t
*) * (next_state_log_idx
- top
));
1710 mctx
->state_log_top
= next_state_log_idx
;
1715 static reg_errcode_t
1716 merge_state_array (const re_dfa_t
*dfa
, re_dfastate_t
**dst
,
1717 re_dfastate_t
**src
, Idx num
)
1721 for (st_idx
= 0; st_idx
< num
; ++st_idx
)
1723 if (dst
[st_idx
] == NULL
)
1724 dst
[st_idx
] = src
[st_idx
];
1725 else if (src
[st_idx
] != NULL
)
1727 re_node_set merged_set
;
1728 err
= re_node_set_init_union (&merged_set
, &dst
[st_idx
]->nodes
,
1729 &src
[st_idx
]->nodes
);
1730 if (__glibc_unlikely (err
!= REG_NOERROR
))
1732 dst
[st_idx
] = re_acquire_state (&err
, dfa
, &merged_set
);
1733 re_node_set_free (&merged_set
);
1734 if (__glibc_unlikely (err
!= REG_NOERROR
))
1741 static reg_errcode_t
1742 update_cur_sifted_state (const re_match_context_t
*mctx
,
1743 re_sift_context_t
*sctx
, Idx str_idx
,
1744 re_node_set
*dest_nodes
)
1746 const re_dfa_t
*const dfa
= mctx
->dfa
;
1747 reg_errcode_t err
= REG_NOERROR
;
1748 const re_node_set
*candidates
;
1749 candidates
= ((mctx
->state_log
[str_idx
] == NULL
) ? NULL
1750 : &mctx
->state_log
[str_idx
]->nodes
);
1752 if (dest_nodes
->nelem
== 0)
1753 sctx
->sifted_states
[str_idx
] = NULL
;
1758 /* At first, add the nodes which can epsilon transit to a node in
1760 err
= add_epsilon_src_nodes (dfa
, dest_nodes
, candidates
);
1761 if (__glibc_unlikely (err
!= REG_NOERROR
))
1764 /* Then, check the limitations in the current sift_context. */
1765 if (sctx
->limits
.nelem
)
1767 err
= check_subexp_limits (dfa
, dest_nodes
, candidates
, &sctx
->limits
,
1768 mctx
->bkref_ents
, str_idx
);
1769 if (__glibc_unlikely (err
!= REG_NOERROR
))
1774 sctx
->sifted_states
[str_idx
] = re_acquire_state (&err
, dfa
, dest_nodes
);
1775 if (__glibc_unlikely (err
!= REG_NOERROR
))
1779 if (candidates
&& mctx
->state_log
[str_idx
]->has_backref
)
1781 err
= sift_states_bkref (mctx
, sctx
, str_idx
, candidates
);
1782 if (__glibc_unlikely (err
!= REG_NOERROR
))
1788 static reg_errcode_t
1789 __attribute_warn_unused_result__
1790 add_epsilon_src_nodes (const re_dfa_t
*dfa
, re_node_set
*dest_nodes
,
1791 const re_node_set
*candidates
)
1793 reg_errcode_t err
= REG_NOERROR
;
1796 re_dfastate_t
*state
= re_acquire_state (&err
, dfa
, dest_nodes
);
1797 if (__glibc_unlikely (err
!= REG_NOERROR
))
1800 if (!state
->inveclosure
.alloc
)
1802 err
= re_node_set_alloc (&state
->inveclosure
, dest_nodes
->nelem
);
1803 if (__glibc_unlikely (err
!= REG_NOERROR
))
1805 for (i
= 0; i
< dest_nodes
->nelem
; i
++)
1807 err
= re_node_set_merge (&state
->inveclosure
,
1808 dfa
->inveclosures
+ dest_nodes
->elems
[i
]);
1809 if (__glibc_unlikely (err
!= REG_NOERROR
))
1813 return re_node_set_add_intersect (dest_nodes
, candidates
,
1814 &state
->inveclosure
);
1817 static reg_errcode_t
1818 sub_epsilon_src_nodes (const re_dfa_t
*dfa
, Idx node
, re_node_set
*dest_nodes
,
1819 const re_node_set
*candidates
)
1823 re_node_set
*inv_eclosure
= dfa
->inveclosures
+ node
;
1824 re_node_set except_nodes
;
1825 re_node_set_init_empty (&except_nodes
);
1826 for (ecl_idx
= 0; ecl_idx
< inv_eclosure
->nelem
; ++ecl_idx
)
1828 Idx cur_node
= inv_eclosure
->elems
[ecl_idx
];
1829 if (cur_node
== node
)
1831 if (IS_EPSILON_NODE (dfa
->nodes
[cur_node
].type
))
1833 Idx edst1
= dfa
->edests
[cur_node
].elems
[0];
1834 Idx edst2
= ((dfa
->edests
[cur_node
].nelem
> 1)
1835 ? dfa
->edests
[cur_node
].elems
[1] : -1);
1836 if ((!re_node_set_contains (inv_eclosure
, edst1
)
1837 && re_node_set_contains (dest_nodes
, edst1
))
1839 && !re_node_set_contains (inv_eclosure
, edst2
)
1840 && re_node_set_contains (dest_nodes
, edst2
)))
1842 err
= re_node_set_add_intersect (&except_nodes
, candidates
,
1843 dfa
->inveclosures
+ cur_node
);
1844 if (__glibc_unlikely (err
!= REG_NOERROR
))
1846 re_node_set_free (&except_nodes
);
1852 for (ecl_idx
= 0; ecl_idx
< inv_eclosure
->nelem
; ++ecl_idx
)
1854 Idx cur_node
= inv_eclosure
->elems
[ecl_idx
];
1855 if (!re_node_set_contains (&except_nodes
, cur_node
))
1857 Idx idx
= re_node_set_contains (dest_nodes
, cur_node
) - 1;
1858 re_node_set_remove_at (dest_nodes
, idx
);
1861 re_node_set_free (&except_nodes
);
1866 check_dst_limits (const re_match_context_t
*mctx
, const re_node_set
*limits
,
1867 Idx dst_node
, Idx dst_idx
, Idx src_node
, Idx src_idx
)
1869 const re_dfa_t
*const dfa
= mctx
->dfa
;
1870 Idx lim_idx
, src_pos
, dst_pos
;
1872 Idx dst_bkref_idx
= search_cur_bkref_entry (mctx
, dst_idx
);
1873 Idx src_bkref_idx
= search_cur_bkref_entry (mctx
, src_idx
);
1874 for (lim_idx
= 0; lim_idx
< limits
->nelem
; ++lim_idx
)
1877 struct re_backref_cache_entry
*ent
;
1878 ent
= mctx
->bkref_ents
+ limits
->elems
[lim_idx
];
1879 subexp_idx
= dfa
->nodes
[ent
->node
].opr
.idx
;
1881 dst_pos
= check_dst_limits_calc_pos (mctx
, limits
->elems
[lim_idx
],
1882 subexp_idx
, dst_node
, dst_idx
,
1884 src_pos
= check_dst_limits_calc_pos (mctx
, limits
->elems
[lim_idx
],
1885 subexp_idx
, src_node
, src_idx
,
1889 <src> <dst> ( <subexp> )
1890 ( <subexp> ) <src> <dst>
1891 ( <subexp1> <src> <subexp2> <dst> <subexp3> ) */
1892 if (src_pos
== dst_pos
)
1893 continue; /* This is unrelated limitation. */
1901 check_dst_limits_calc_pos_1 (const re_match_context_t
*mctx
, int boundaries
,
1902 Idx subexp_idx
, Idx from_node
, Idx bkref_idx
)
1904 const re_dfa_t
*const dfa
= mctx
->dfa
;
1905 const re_node_set
*eclosures
= dfa
->eclosures
+ from_node
;
1908 /* Else, we are on the boundary: examine the nodes on the epsilon
1910 for (node_idx
= 0; node_idx
< eclosures
->nelem
; ++node_idx
)
1912 Idx node
= eclosures
->elems
[node_idx
];
1913 switch (dfa
->nodes
[node
].type
)
1916 if (bkref_idx
!= -1)
1918 struct re_backref_cache_entry
*ent
= mctx
->bkref_ents
+ bkref_idx
;
1924 if (ent
->node
!= node
)
1927 if (subexp_idx
< BITSET_WORD_BITS
1928 && !(ent
->eps_reachable_subexps_map
1929 & ((bitset_word_t
) 1 << subexp_idx
)))
1932 /* Recurse trying to reach the OP_OPEN_SUBEXP and
1933 OP_CLOSE_SUBEXP cases below. But, if the
1934 destination node is the same node as the source
1935 node, don't recurse because it would cause an
1936 infinite loop: a regex that exhibits this behavior
1938 dst
= dfa
->edests
[node
].elems
[0];
1939 if (dst
== from_node
)
1943 else /* if (boundaries & 2) */
1948 check_dst_limits_calc_pos_1 (mctx
, boundaries
, subexp_idx
,
1950 if (cpos
== -1 /* && (boundaries & 1) */)
1952 if (cpos
== 0 && (boundaries
& 2))
1955 if (subexp_idx
< BITSET_WORD_BITS
)
1956 ent
->eps_reachable_subexps_map
1957 &= ~((bitset_word_t
) 1 << subexp_idx
);
1959 while (ent
++->more
);
1963 case OP_OPEN_SUBEXP
:
1964 if ((boundaries
& 1) && subexp_idx
== dfa
->nodes
[node
].opr
.idx
)
1968 case OP_CLOSE_SUBEXP
:
1969 if ((boundaries
& 2) && subexp_idx
== dfa
->nodes
[node
].opr
.idx
)
1978 return (boundaries
& 2) ? 1 : 0;
1982 check_dst_limits_calc_pos (const re_match_context_t
*mctx
, Idx limit
,
1983 Idx subexp_idx
, Idx from_node
, Idx str_idx
,
1986 struct re_backref_cache_entry
*lim
= mctx
->bkref_ents
+ limit
;
1989 /* If we are outside the range of the subexpression, return -1 or 1. */
1990 if (str_idx
< lim
->subexp_from
)
1993 if (lim
->subexp_to
< str_idx
)
1996 /* If we are within the subexpression, return 0. */
1997 boundaries
= (str_idx
== lim
->subexp_from
);
1998 boundaries
|= (str_idx
== lim
->subexp_to
) << 1;
1999 if (boundaries
== 0)
2002 /* Else, examine epsilon closure. */
2003 return check_dst_limits_calc_pos_1 (mctx
, boundaries
, subexp_idx
,
2004 from_node
, bkref_idx
);
2007 /* Check the limitations of sub expressions LIMITS, and remove the nodes
2008 which are against limitations from DEST_NODES. */
2010 static reg_errcode_t
2011 check_subexp_limits (const re_dfa_t
*dfa
, re_node_set
*dest_nodes
,
2012 const re_node_set
*candidates
, re_node_set
*limits
,
2013 struct re_backref_cache_entry
*bkref_ents
, Idx str_idx
)
2016 Idx node_idx
, lim_idx
;
2018 for (lim_idx
= 0; lim_idx
< limits
->nelem
; ++lim_idx
)
2021 struct re_backref_cache_entry
*ent
;
2022 ent
= bkref_ents
+ limits
->elems
[lim_idx
];
2024 if (str_idx
<= ent
->subexp_from
|| ent
->str_idx
< str_idx
)
2025 continue; /* This is unrelated limitation. */
2027 subexp_idx
= dfa
->nodes
[ent
->node
].opr
.idx
;
2028 if (ent
->subexp_to
== str_idx
)
2032 for (node_idx
= 0; node_idx
< dest_nodes
->nelem
; ++node_idx
)
2034 Idx node
= dest_nodes
->elems
[node_idx
];
2035 re_token_type_t type
= dfa
->nodes
[node
].type
;
2036 if (type
== OP_OPEN_SUBEXP
2037 && subexp_idx
== dfa
->nodes
[node
].opr
.idx
)
2039 else if (type
== OP_CLOSE_SUBEXP
2040 && subexp_idx
== dfa
->nodes
[node
].opr
.idx
)
2044 /* Check the limitation of the open subexpression. */
2045 /* Note that (ent->subexp_to = str_idx != ent->subexp_from). */
2048 err
= sub_epsilon_src_nodes (dfa
, ops_node
, dest_nodes
,
2050 if (__glibc_unlikely (err
!= REG_NOERROR
))
2054 /* Check the limitation of the close subexpression. */
2056 for (node_idx
= 0; node_idx
< dest_nodes
->nelem
; ++node_idx
)
2058 Idx node
= dest_nodes
->elems
[node_idx
];
2059 if (!re_node_set_contains (dfa
->inveclosures
+ node
,
2061 && !re_node_set_contains (dfa
->eclosures
+ node
,
2064 /* It is against this limitation.
2065 Remove it form the current sifted state. */
2066 err
= sub_epsilon_src_nodes (dfa
, node
, dest_nodes
,
2068 if (__glibc_unlikely (err
!= REG_NOERROR
))
2074 else /* (ent->subexp_to != str_idx) */
2076 for (node_idx
= 0; node_idx
< dest_nodes
->nelem
; ++node_idx
)
2078 Idx node
= dest_nodes
->elems
[node_idx
];
2079 re_token_type_t type
= dfa
->nodes
[node
].type
;
2080 if (type
== OP_CLOSE_SUBEXP
|| type
== OP_OPEN_SUBEXP
)
2082 if (subexp_idx
!= dfa
->nodes
[node
].opr
.idx
)
2084 /* It is against this limitation.
2085 Remove it form the current sifted state. */
2086 err
= sub_epsilon_src_nodes (dfa
, node
, dest_nodes
,
2088 if (__glibc_unlikely (err
!= REG_NOERROR
))
2097 static reg_errcode_t
2098 __attribute_warn_unused_result__
2099 sift_states_bkref (const re_match_context_t
*mctx
, re_sift_context_t
*sctx
,
2100 Idx str_idx
, const re_node_set
*candidates
)
2102 const re_dfa_t
*const dfa
= mctx
->dfa
;
2105 re_sift_context_t local_sctx
;
2106 Idx first_idx
= search_cur_bkref_entry (mctx
, str_idx
);
2108 if (first_idx
== -1)
2111 local_sctx
.sifted_states
= NULL
; /* Mark that it hasn't been initialized. */
2113 for (node_idx
= 0; node_idx
< candidates
->nelem
; ++node_idx
)
2116 re_token_type_t type
;
2117 struct re_backref_cache_entry
*entry
;
2118 node
= candidates
->elems
[node_idx
];
2119 type
= dfa
->nodes
[node
].type
;
2120 /* Avoid infinite loop for the REs like "()\1+". */
2121 if (node
== sctx
->last_node
&& str_idx
== sctx
->last_str_idx
)
2123 if (type
!= OP_BACK_REF
)
2126 entry
= mctx
->bkref_ents
+ first_idx
;
2127 enabled_idx
= first_idx
;
2134 re_dfastate_t
*cur_state
;
2136 if (entry
->node
!= node
)
2138 subexp_len
= entry
->subexp_to
- entry
->subexp_from
;
2139 to_idx
= str_idx
+ subexp_len
;
2140 dst_node
= (subexp_len
? dfa
->nexts
[node
]
2141 : dfa
->edests
[node
].elems
[0]);
2143 if (to_idx
> sctx
->last_str_idx
2144 || sctx
->sifted_states
[to_idx
] == NULL
2145 || !STATE_NODE_CONTAINS (sctx
->sifted_states
[to_idx
], dst_node
)
2146 || check_dst_limits (mctx
, &sctx
->limits
, node
,
2147 str_idx
, dst_node
, to_idx
))
2150 if (local_sctx
.sifted_states
== NULL
)
2153 err
= re_node_set_init_copy (&local_sctx
.limits
, &sctx
->limits
);
2154 if (__glibc_unlikely (err
!= REG_NOERROR
))
2157 local_sctx
.last_node
= node
;
2158 local_sctx
.last_str_idx
= str_idx
;
2159 ok
= re_node_set_insert (&local_sctx
.limits
, enabled_idx
);
2160 if (__glibc_unlikely (! ok
))
2165 cur_state
= local_sctx
.sifted_states
[str_idx
];
2166 err
= sift_states_backward (mctx
, &local_sctx
);
2167 if (__glibc_unlikely (err
!= REG_NOERROR
))
2169 if (sctx
->limited_states
!= NULL
)
2171 err
= merge_state_array (dfa
, sctx
->limited_states
,
2172 local_sctx
.sifted_states
,
2174 if (__glibc_unlikely (err
!= REG_NOERROR
))
2177 local_sctx
.sifted_states
[str_idx
] = cur_state
;
2178 re_node_set_remove (&local_sctx
.limits
, enabled_idx
);
2180 /* mctx->bkref_ents may have changed, reload the pointer. */
2181 entry
= mctx
->bkref_ents
+ enabled_idx
;
2183 while (enabled_idx
++, entry
++->more
);
2187 if (local_sctx
.sifted_states
!= NULL
)
2189 re_node_set_free (&local_sctx
.limits
);
2196 #ifdef RE_ENABLE_I18N
2198 sift_states_iter_mb (const re_match_context_t
*mctx
, re_sift_context_t
*sctx
,
2199 Idx node_idx
, Idx str_idx
, Idx max_str_idx
)
2201 const re_dfa_t
*const dfa
= mctx
->dfa
;
2203 /* Check the node can accept "multi byte". */
2204 naccepted
= check_node_accept_bytes (dfa
, node_idx
, &mctx
->input
, str_idx
);
2205 if (naccepted
> 0 && str_idx
+ naccepted
<= max_str_idx
&&
2206 !STATE_NODE_CONTAINS (sctx
->sifted_states
[str_idx
+ naccepted
],
2207 dfa
->nexts
[node_idx
]))
2208 /* The node can't accept the "multi byte", or the
2209 destination was already thrown away, then the node
2210 could't accept the current input "multi byte". */
2212 /* Otherwise, it is sure that the node could accept
2213 'naccepted' bytes input. */
2216 #endif /* RE_ENABLE_I18N */
2219 /* Functions for state transition. */
2221 /* Return the next state to which the current state STATE will transit by
2222 accepting the current input byte, and update STATE_LOG if necessary.
2223 If STATE can accept a multibyte char/collating element/back reference
2224 update the destination of STATE_LOG. */
2226 static re_dfastate_t
*
2227 __attribute_warn_unused_result__
2228 transit_state (reg_errcode_t
*err
, re_match_context_t
*mctx
,
2229 re_dfastate_t
*state
)
2231 re_dfastate_t
**trtable
;
2234 #ifdef RE_ENABLE_I18N
2235 /* If the current state can accept multibyte. */
2236 if (__glibc_unlikely (state
->accept_mb
))
2238 *err
= transit_state_mb (mctx
, state
);
2239 if (__glibc_unlikely (*err
!= REG_NOERROR
))
2242 #endif /* RE_ENABLE_I18N */
2244 /* Then decide the next state with the single byte. */
2247 /* don't use transition table */
2248 return transit_state_sb (err
, mctx
, state
);
2251 /* Use transition table */
2252 ch
= re_string_fetch_byte (&mctx
->input
);
2255 trtable
= state
->trtable
;
2256 if (__glibc_likely (trtable
!= NULL
))
2259 trtable
= state
->word_trtable
;
2260 if (__glibc_likely (trtable
!= NULL
))
2262 unsigned int context
;
2264 = re_string_context_at (&mctx
->input
,
2265 re_string_cur_idx (&mctx
->input
) - 1,
2267 if (IS_WORD_CONTEXT (context
))
2268 return trtable
[ch
+ SBC_MAX
];
2273 if (!build_trtable (mctx
->dfa
, state
))
2279 /* Retry, we now have a transition table. */
2283 /* Update the state_log if we need */
2284 static re_dfastate_t
*
2285 merge_state_with_log (reg_errcode_t
*err
, re_match_context_t
*mctx
,
2286 re_dfastate_t
*next_state
)
2288 const re_dfa_t
*const dfa
= mctx
->dfa
;
2289 Idx cur_idx
= re_string_cur_idx (&mctx
->input
);
2291 if (cur_idx
> mctx
->state_log_top
)
2293 mctx
->state_log
[cur_idx
] = next_state
;
2294 mctx
->state_log_top
= cur_idx
;
2296 else if (mctx
->state_log
[cur_idx
] == 0)
2298 mctx
->state_log
[cur_idx
] = next_state
;
2302 re_dfastate_t
*pstate
;
2303 unsigned int context
;
2304 re_node_set next_nodes
, *log_nodes
, *table_nodes
= NULL
;
2305 /* If (state_log[cur_idx] != 0), it implies that cur_idx is
2306 the destination of a multibyte char/collating element/
2307 back reference. Then the next state is the union set of
2308 these destinations and the results of the transition table. */
2309 pstate
= mctx
->state_log
[cur_idx
];
2310 log_nodes
= pstate
->entrance_nodes
;
2311 if (next_state
!= NULL
)
2313 table_nodes
= next_state
->entrance_nodes
;
2314 *err
= re_node_set_init_union (&next_nodes
, table_nodes
,
2316 if (__glibc_unlikely (*err
!= REG_NOERROR
))
2320 next_nodes
= *log_nodes
;
2321 /* Note: We already add the nodes of the initial state,
2322 then we don't need to add them here. */
2324 context
= re_string_context_at (&mctx
->input
,
2325 re_string_cur_idx (&mctx
->input
) - 1,
2327 next_state
= mctx
->state_log
[cur_idx
]
2328 = re_acquire_state_context (err
, dfa
, &next_nodes
, context
);
2329 /* We don't need to check errors here, since the return value of
2330 this function is next_state and ERR is already set. */
2332 if (table_nodes
!= NULL
)
2333 re_node_set_free (&next_nodes
);
2336 if (__glibc_unlikely (dfa
->nbackref
) && next_state
!= NULL
)
2338 /* Check OP_OPEN_SUBEXP in the current state in case that we use them
2339 later. We must check them here, since the back references in the
2340 next state might use them. */
2341 *err
= check_subexp_matching_top (mctx
, &next_state
->nodes
,
2343 if (__glibc_unlikely (*err
!= REG_NOERROR
))
2346 /* If the next state has back references. */
2347 if (next_state
->has_backref
)
2349 *err
= transit_state_bkref (mctx
, &next_state
->nodes
);
2350 if (__glibc_unlikely (*err
!= REG_NOERROR
))
2352 next_state
= mctx
->state_log
[cur_idx
];
2359 /* Skip bytes in the input that correspond to part of a
2360 multi-byte match, then look in the log for a state
2361 from which to restart matching. */
2362 static re_dfastate_t
*
2363 find_recover_state (reg_errcode_t
*err
, re_match_context_t
*mctx
)
2365 re_dfastate_t
*cur_state
;
2368 Idx max
= mctx
->state_log_top
;
2369 Idx cur_str_idx
= re_string_cur_idx (&mctx
->input
);
2373 if (++cur_str_idx
> max
)
2375 re_string_skip_bytes (&mctx
->input
, 1);
2377 while (mctx
->state_log
[cur_str_idx
] == NULL
);
2379 cur_state
= merge_state_with_log (err
, mctx
, NULL
);
2381 while (*err
== REG_NOERROR
&& cur_state
== NULL
);
2385 /* Helper functions for transit_state. */
2387 /* From the node set CUR_NODES, pick up the nodes whose types are
2388 OP_OPEN_SUBEXP and which have corresponding back references in the regular
2389 expression. And register them to use them later for evaluating the
2390 corresponding back references. */
2392 static reg_errcode_t
2393 check_subexp_matching_top (re_match_context_t
*mctx
, re_node_set
*cur_nodes
,
2396 const re_dfa_t
*const dfa
= mctx
->dfa
;
2400 /* TODO: This isn't efficient.
2401 Because there might be more than one nodes whose types are
2402 OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2405 for (node_idx
= 0; node_idx
< cur_nodes
->nelem
; ++node_idx
)
2407 Idx node
= cur_nodes
->elems
[node_idx
];
2408 if (dfa
->nodes
[node
].type
== OP_OPEN_SUBEXP
2409 && dfa
->nodes
[node
].opr
.idx
< BITSET_WORD_BITS
2410 && (dfa
->used_bkref_map
2411 & ((bitset_word_t
) 1 << dfa
->nodes
[node
].opr
.idx
)))
2413 err
= match_ctx_add_subtop (mctx
, node
, str_idx
);
2414 if (__glibc_unlikely (err
!= REG_NOERROR
))
2422 /* Return the next state to which the current state STATE will transit by
2423 accepting the current input byte. */
2425 static re_dfastate_t
*
2426 transit_state_sb (reg_errcode_t
*err
, re_match_context_t
*mctx
,
2427 re_dfastate_t
*state
)
2429 const re_dfa_t
*const dfa
= mctx
->dfa
;
2430 re_node_set next_nodes
;
2431 re_dfastate_t
*next_state
;
2432 Idx node_cnt
, cur_str_idx
= re_string_cur_idx (&mctx
->input
);
2433 unsigned int context
;
2435 *err
= re_node_set_alloc (&next_nodes
, state
->nodes
.nelem
+ 1);
2436 if (__glibc_unlikely (*err
!= REG_NOERROR
))
2438 for (node_cnt
= 0; node_cnt
< state
->nodes
.nelem
; ++node_cnt
)
2440 Idx cur_node
= state
->nodes
.elems
[node_cnt
];
2441 if (check_node_accept (mctx
, dfa
->nodes
+ cur_node
, cur_str_idx
))
2443 *err
= re_node_set_merge (&next_nodes
,
2444 dfa
->eclosures
+ dfa
->nexts
[cur_node
]);
2445 if (__glibc_unlikely (*err
!= REG_NOERROR
))
2447 re_node_set_free (&next_nodes
);
2452 context
= re_string_context_at (&mctx
->input
, cur_str_idx
, mctx
->eflags
);
2453 next_state
= re_acquire_state_context (err
, dfa
, &next_nodes
, context
);
2454 /* We don't need to check errors here, since the return value of
2455 this function is next_state and ERR is already set. */
2457 re_node_set_free (&next_nodes
);
2458 re_string_skip_bytes (&mctx
->input
, 1);
2463 #ifdef RE_ENABLE_I18N
2464 static reg_errcode_t
2465 transit_state_mb (re_match_context_t
*mctx
, re_dfastate_t
*pstate
)
2467 const re_dfa_t
*const dfa
= mctx
->dfa
;
2471 for (i
= 0; i
< pstate
->nodes
.nelem
; ++i
)
2473 re_node_set dest_nodes
, *new_nodes
;
2474 Idx cur_node_idx
= pstate
->nodes
.elems
[i
];
2477 unsigned int context
;
2478 re_dfastate_t
*dest_state
;
2480 if (!dfa
->nodes
[cur_node_idx
].accept_mb
)
2483 if (dfa
->nodes
[cur_node_idx
].constraint
)
2485 context
= re_string_context_at (&mctx
->input
,
2486 re_string_cur_idx (&mctx
->input
),
2488 if (NOT_SATISFY_NEXT_CONSTRAINT (dfa
->nodes
[cur_node_idx
].constraint
,
2493 /* How many bytes the node can accept? */
2494 naccepted
= check_node_accept_bytes (dfa
, cur_node_idx
, &mctx
->input
,
2495 re_string_cur_idx (&mctx
->input
));
2499 /* The node can accepts 'naccepted' bytes. */
2500 dest_idx
= re_string_cur_idx (&mctx
->input
) + naccepted
;
2501 mctx
->max_mb_elem_len
= ((mctx
->max_mb_elem_len
< naccepted
) ? naccepted
2502 : mctx
->max_mb_elem_len
);
2503 err
= clean_state_log_if_needed (mctx
, dest_idx
);
2504 if (__glibc_unlikely (err
!= REG_NOERROR
))
2507 assert (dfa
->nexts
[cur_node_idx
] != -1);
2509 new_nodes
= dfa
->eclosures
+ dfa
->nexts
[cur_node_idx
];
2511 dest_state
= mctx
->state_log
[dest_idx
];
2512 if (dest_state
== NULL
)
2513 dest_nodes
= *new_nodes
;
2516 err
= re_node_set_init_union (&dest_nodes
,
2517 dest_state
->entrance_nodes
, new_nodes
);
2518 if (__glibc_unlikely (err
!= REG_NOERROR
))
2521 context
= re_string_context_at (&mctx
->input
, dest_idx
- 1,
2523 mctx
->state_log
[dest_idx
]
2524 = re_acquire_state_context (&err
, dfa
, &dest_nodes
, context
);
2525 if (dest_state
!= NULL
)
2526 re_node_set_free (&dest_nodes
);
2527 if (__glibc_unlikely (mctx
->state_log
[dest_idx
] == NULL
2528 && err
!= REG_NOERROR
))
2533 #endif /* RE_ENABLE_I18N */
2535 static reg_errcode_t
2536 transit_state_bkref (re_match_context_t
*mctx
, const re_node_set
*nodes
)
2538 const re_dfa_t
*const dfa
= mctx
->dfa
;
2541 Idx cur_str_idx
= re_string_cur_idx (&mctx
->input
);
2543 for (i
= 0; i
< nodes
->nelem
; ++i
)
2545 Idx dest_str_idx
, prev_nelem
, bkc_idx
;
2546 Idx node_idx
= nodes
->elems
[i
];
2547 unsigned int context
;
2548 const re_token_t
*node
= dfa
->nodes
+ node_idx
;
2549 re_node_set
*new_dest_nodes
;
2551 /* Check whether 'node' is a backreference or not. */
2552 if (node
->type
!= OP_BACK_REF
)
2555 if (node
->constraint
)
2557 context
= re_string_context_at (&mctx
->input
, cur_str_idx
,
2559 if (NOT_SATISFY_NEXT_CONSTRAINT (node
->constraint
, context
))
2563 /* 'node' is a backreference.
2564 Check the substring which the substring matched. */
2565 bkc_idx
= mctx
->nbkref_ents
;
2566 err
= get_subexp (mctx
, node_idx
, cur_str_idx
);
2567 if (__glibc_unlikely (err
!= REG_NOERROR
))
2570 /* And add the epsilon closures (which is 'new_dest_nodes') of
2571 the backreference to appropriate state_log. */
2573 assert (dfa
->nexts
[node_idx
] != -1);
2575 for (; bkc_idx
< mctx
->nbkref_ents
; ++bkc_idx
)
2578 re_dfastate_t
*dest_state
;
2579 struct re_backref_cache_entry
*bkref_ent
;
2580 bkref_ent
= mctx
->bkref_ents
+ bkc_idx
;
2581 if (bkref_ent
->node
!= node_idx
|| bkref_ent
->str_idx
!= cur_str_idx
)
2583 subexp_len
= bkref_ent
->subexp_to
- bkref_ent
->subexp_from
;
2584 new_dest_nodes
= (subexp_len
== 0
2585 ? dfa
->eclosures
+ dfa
->edests
[node_idx
].elems
[0]
2586 : dfa
->eclosures
+ dfa
->nexts
[node_idx
]);
2587 dest_str_idx
= (cur_str_idx
+ bkref_ent
->subexp_to
2588 - bkref_ent
->subexp_from
);
2589 context
= re_string_context_at (&mctx
->input
, dest_str_idx
- 1,
2591 dest_state
= mctx
->state_log
[dest_str_idx
];
2592 prev_nelem
= ((mctx
->state_log
[cur_str_idx
] == NULL
) ? 0
2593 : mctx
->state_log
[cur_str_idx
]->nodes
.nelem
);
2594 /* Add 'new_dest_node' to state_log. */
2595 if (dest_state
== NULL
)
2597 mctx
->state_log
[dest_str_idx
]
2598 = re_acquire_state_context (&err
, dfa
, new_dest_nodes
,
2600 if (__glibc_unlikely (mctx
->state_log
[dest_str_idx
] == NULL
2601 && err
!= REG_NOERROR
))
2606 re_node_set dest_nodes
;
2607 err
= re_node_set_init_union (&dest_nodes
,
2608 dest_state
->entrance_nodes
,
2610 if (__glibc_unlikely (err
!= REG_NOERROR
))
2612 re_node_set_free (&dest_nodes
);
2615 mctx
->state_log
[dest_str_idx
]
2616 = re_acquire_state_context (&err
, dfa
, &dest_nodes
, context
);
2617 re_node_set_free (&dest_nodes
);
2618 if (__glibc_unlikely (mctx
->state_log
[dest_str_idx
] == NULL
2619 && err
!= REG_NOERROR
))
2622 /* We need to check recursively if the backreference can epsilon
2625 && mctx
->state_log
[cur_str_idx
]->nodes
.nelem
> prev_nelem
)
2627 err
= check_subexp_matching_top (mctx
, new_dest_nodes
,
2629 if (__glibc_unlikely (err
!= REG_NOERROR
))
2631 err
= transit_state_bkref (mctx
, new_dest_nodes
);
2632 if (__glibc_unlikely (err
!= REG_NOERROR
))
2642 /* Enumerate all the candidates which the backreference BKREF_NODE can match
2643 at BKREF_STR_IDX, and register them by match_ctx_add_entry().
2644 Note that we might collect inappropriate candidates here.
2645 However, the cost of checking them strictly here is too high, then we
2646 delay these checking for prune_impossible_nodes(). */
2648 static reg_errcode_t
2649 __attribute_warn_unused_result__
2650 get_subexp (re_match_context_t
*mctx
, Idx bkref_node
, Idx bkref_str_idx
)
2652 const re_dfa_t
*const dfa
= mctx
->dfa
;
2653 Idx subexp_num
, sub_top_idx
;
2654 const char *buf
= (const char *) re_string_get_buffer (&mctx
->input
);
2655 /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX. */
2656 Idx cache_idx
= search_cur_bkref_entry (mctx
, bkref_str_idx
);
2657 if (cache_idx
!= -1)
2659 const struct re_backref_cache_entry
*entry
2660 = mctx
->bkref_ents
+ cache_idx
;
2662 if (entry
->node
== bkref_node
)
2663 return REG_NOERROR
; /* We already checked it. */
2664 while (entry
++->more
);
2667 subexp_num
= dfa
->nodes
[bkref_node
].opr
.idx
;
2669 /* For each sub expression */
2670 for (sub_top_idx
= 0; sub_top_idx
< mctx
->nsub_tops
; ++sub_top_idx
)
2673 re_sub_match_top_t
*sub_top
= mctx
->sub_tops
[sub_top_idx
];
2674 re_sub_match_last_t
*sub_last
;
2675 Idx sub_last_idx
, sl_str
, bkref_str_off
;
2677 if (dfa
->nodes
[sub_top
->node
].opr
.idx
!= subexp_num
)
2678 continue; /* It isn't related. */
2680 sl_str
= sub_top
->str_idx
;
2681 bkref_str_off
= bkref_str_idx
;
2682 /* At first, check the last node of sub expressions we already
2684 for (sub_last_idx
= 0; sub_last_idx
< sub_top
->nlasts
; ++sub_last_idx
)
2686 regoff_t sl_str_diff
;
2687 sub_last
= sub_top
->lasts
[sub_last_idx
];
2688 sl_str_diff
= sub_last
->str_idx
- sl_str
;
2689 /* The matched string by the sub expression match with the substring
2690 at the back reference? */
2691 if (sl_str_diff
> 0)
2693 if (__glibc_unlikely (bkref_str_off
+ sl_str_diff
2694 > mctx
->input
.valid_len
))
2696 /* Not enough chars for a successful match. */
2697 if (bkref_str_off
+ sl_str_diff
> mctx
->input
.len
)
2700 err
= clean_state_log_if_needed (mctx
,
2703 if (__glibc_unlikely (err
!= REG_NOERROR
))
2705 buf
= (const char *) re_string_get_buffer (&mctx
->input
);
2707 if (memcmp (buf
+ bkref_str_off
, buf
+ sl_str
, sl_str_diff
) != 0)
2708 /* We don't need to search this sub expression any more. */
2711 bkref_str_off
+= sl_str_diff
;
2712 sl_str
+= sl_str_diff
;
2713 err
= get_subexp_sub (mctx
, sub_top
, sub_last
, bkref_node
,
2716 /* Reload buf, since the preceding call might have reallocated
2718 buf
= (const char *) re_string_get_buffer (&mctx
->input
);
2720 if (err
== REG_NOMATCH
)
2722 if (__glibc_unlikely (err
!= REG_NOERROR
))
2726 if (sub_last_idx
< sub_top
->nlasts
)
2728 if (sub_last_idx
> 0)
2730 /* Then, search for the other last nodes of the sub expression. */
2731 for (; sl_str
<= bkref_str_idx
; ++sl_str
)
2734 regoff_t sl_str_off
;
2735 const re_node_set
*nodes
;
2736 sl_str_off
= sl_str
- sub_top
->str_idx
;
2737 /* The matched string by the sub expression match with the substring
2738 at the back reference? */
2741 if (__glibc_unlikely (bkref_str_off
>= mctx
->input
.valid_len
))
2743 /* If we are at the end of the input, we cannot match. */
2744 if (bkref_str_off
>= mctx
->input
.len
)
2747 err
= extend_buffers (mctx
, bkref_str_off
+ 1);
2748 if (__glibc_unlikely (err
!= REG_NOERROR
))
2751 buf
= (const char *) re_string_get_buffer (&mctx
->input
);
2753 if (buf
[bkref_str_off
++] != buf
[sl_str
- 1])
2754 break; /* We don't need to search this sub expression
2757 if (mctx
->state_log
[sl_str
] == NULL
)
2759 /* Does this state have a ')' of the sub expression? */
2760 nodes
= &mctx
->state_log
[sl_str
]->nodes
;
2761 cls_node
= find_subexp_node (dfa
, nodes
, subexp_num
,
2765 if (sub_top
->path
== NULL
)
2767 sub_top
->path
= calloc (sizeof (state_array_t
),
2768 sl_str
- sub_top
->str_idx
+ 1);
2769 if (sub_top
->path
== NULL
)
2772 /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node
2773 in the current context? */
2774 err
= check_arrival (mctx
, sub_top
->path
, sub_top
->node
,
2775 sub_top
->str_idx
, cls_node
, sl_str
,
2777 if (err
== REG_NOMATCH
)
2779 if (__glibc_unlikely (err
!= REG_NOERROR
))
2781 sub_last
= match_ctx_add_sublast (sub_top
, cls_node
, sl_str
);
2782 if (__glibc_unlikely (sub_last
== NULL
))
2784 err
= get_subexp_sub (mctx
, sub_top
, sub_last
, bkref_node
,
2786 buf
= (const char *) re_string_get_buffer (&mctx
->input
);
2787 if (err
== REG_NOMATCH
)
2789 if (__glibc_unlikely (err
!= REG_NOERROR
))
2796 /* Helper functions for get_subexp(). */
2798 /* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR.
2799 If it can arrive, register the sub expression expressed with SUB_TOP
2802 static reg_errcode_t
2803 get_subexp_sub (re_match_context_t
*mctx
, const re_sub_match_top_t
*sub_top
,
2804 re_sub_match_last_t
*sub_last
, Idx bkref_node
, Idx bkref_str
)
2808 /* Can the subexpression arrive the back reference? */
2809 err
= check_arrival (mctx
, &sub_last
->path
, sub_last
->node
,
2810 sub_last
->str_idx
, bkref_node
, bkref_str
,
2812 if (err
!= REG_NOERROR
)
2814 err
= match_ctx_add_entry (mctx
, bkref_node
, bkref_str
, sub_top
->str_idx
,
2816 if (__glibc_unlikely (err
!= REG_NOERROR
))
2818 to_idx
= bkref_str
+ sub_last
->str_idx
- sub_top
->str_idx
;
2819 return clean_state_log_if_needed (mctx
, to_idx
);
2822 /* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX.
2823 Search '(' if FL_OPEN, or search ')' otherwise.
2824 TODO: This function isn't efficient...
2825 Because there might be more than one nodes whose types are
2826 OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2831 find_subexp_node (const re_dfa_t
*dfa
, const re_node_set
*nodes
,
2832 Idx subexp_idx
, int type
)
2835 for (cls_idx
= 0; cls_idx
< nodes
->nelem
; ++cls_idx
)
2837 Idx cls_node
= nodes
->elems
[cls_idx
];
2838 const re_token_t
*node
= dfa
->nodes
+ cls_node
;
2839 if (node
->type
== type
2840 && node
->opr
.idx
== subexp_idx
)
2846 /* Check whether the node TOP_NODE at TOP_STR can arrive to the node
2847 LAST_NODE at LAST_STR. We record the path onto PATH since it will be
2849 Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise. */
2851 static reg_errcode_t
2852 __attribute_warn_unused_result__
2853 check_arrival (re_match_context_t
*mctx
, state_array_t
*path
, Idx top_node
,
2854 Idx top_str
, Idx last_node
, Idx last_str
, int type
)
2856 const re_dfa_t
*const dfa
= mctx
->dfa
;
2857 reg_errcode_t err
= REG_NOERROR
;
2858 Idx subexp_num
, backup_cur_idx
, str_idx
, null_cnt
;
2859 re_dfastate_t
*cur_state
= NULL
;
2860 re_node_set
*cur_nodes
, next_nodes
;
2861 re_dfastate_t
**backup_state_log
;
2862 unsigned int context
;
2864 subexp_num
= dfa
->nodes
[top_node
].opr
.idx
;
2865 /* Extend the buffer if we need. */
2866 if (__glibc_unlikely (path
->alloc
< last_str
+ mctx
->max_mb_elem_len
+ 1))
2868 re_dfastate_t
**new_array
;
2869 Idx old_alloc
= path
->alloc
;
2870 Idx incr_alloc
= last_str
+ mctx
->max_mb_elem_len
+ 1;
2872 if (__glibc_unlikely (IDX_MAX
- old_alloc
< incr_alloc
))
2874 new_alloc
= old_alloc
+ incr_alloc
;
2875 if (__glibc_unlikely (SIZE_MAX
/ sizeof (re_dfastate_t
*) < new_alloc
))
2877 new_array
= re_realloc (path
->array
, re_dfastate_t
*, new_alloc
);
2878 if (__glibc_unlikely (new_array
== NULL
))
2880 path
->array
= new_array
;
2881 path
->alloc
= new_alloc
;
2882 memset (new_array
+ old_alloc
, '\0',
2883 sizeof (re_dfastate_t
*) * (path
->alloc
- old_alloc
));
2886 str_idx
= path
->next_idx
? path
->next_idx
: top_str
;
2888 /* Temporary modify MCTX. */
2889 backup_state_log
= mctx
->state_log
;
2890 backup_cur_idx
= mctx
->input
.cur_idx
;
2891 mctx
->state_log
= path
->array
;
2892 mctx
->input
.cur_idx
= str_idx
;
2894 /* Setup initial node set. */
2895 context
= re_string_context_at (&mctx
->input
, str_idx
- 1, mctx
->eflags
);
2896 if (str_idx
== top_str
)
2898 err
= re_node_set_init_1 (&next_nodes
, top_node
);
2899 if (__glibc_unlikely (err
!= REG_NOERROR
))
2901 err
= check_arrival_expand_ecl (dfa
, &next_nodes
, subexp_num
, type
);
2902 if (__glibc_unlikely (err
!= REG_NOERROR
))
2904 re_node_set_free (&next_nodes
);
2910 cur_state
= mctx
->state_log
[str_idx
];
2911 if (cur_state
&& cur_state
->has_backref
)
2913 err
= re_node_set_init_copy (&next_nodes
, &cur_state
->nodes
);
2914 if (__glibc_unlikely (err
!= REG_NOERROR
))
2918 re_node_set_init_empty (&next_nodes
);
2920 if (str_idx
== top_str
|| (cur_state
&& cur_state
->has_backref
))
2922 if (next_nodes
.nelem
)
2924 err
= expand_bkref_cache (mctx
, &next_nodes
, str_idx
,
2926 if (__glibc_unlikely (err
!= REG_NOERROR
))
2928 re_node_set_free (&next_nodes
);
2932 cur_state
= re_acquire_state_context (&err
, dfa
, &next_nodes
, context
);
2933 if (__glibc_unlikely (cur_state
== NULL
&& err
!= REG_NOERROR
))
2935 re_node_set_free (&next_nodes
);
2938 mctx
->state_log
[str_idx
] = cur_state
;
2941 for (null_cnt
= 0; str_idx
< last_str
&& null_cnt
<= mctx
->max_mb_elem_len
;)
2943 re_node_set_empty (&next_nodes
);
2944 if (mctx
->state_log
[str_idx
+ 1])
2946 err
= re_node_set_merge (&next_nodes
,
2947 &mctx
->state_log
[str_idx
+ 1]->nodes
);
2948 if (__glibc_unlikely (err
!= REG_NOERROR
))
2950 re_node_set_free (&next_nodes
);
2956 err
= check_arrival_add_next_nodes (mctx
, str_idx
,
2957 &cur_state
->non_eps_nodes
,
2959 if (__glibc_unlikely (err
!= REG_NOERROR
))
2961 re_node_set_free (&next_nodes
);
2966 if (next_nodes
.nelem
)
2968 err
= check_arrival_expand_ecl (dfa
, &next_nodes
, subexp_num
, type
);
2969 if (__glibc_unlikely (err
!= REG_NOERROR
))
2971 re_node_set_free (&next_nodes
);
2974 err
= expand_bkref_cache (mctx
, &next_nodes
, str_idx
,
2976 if (__glibc_unlikely (err
!= REG_NOERROR
))
2978 re_node_set_free (&next_nodes
);
2982 context
= re_string_context_at (&mctx
->input
, str_idx
- 1, mctx
->eflags
);
2983 cur_state
= re_acquire_state_context (&err
, dfa
, &next_nodes
, context
);
2984 if (__glibc_unlikely (cur_state
== NULL
&& err
!= REG_NOERROR
))
2986 re_node_set_free (&next_nodes
);
2989 mctx
->state_log
[str_idx
] = cur_state
;
2990 null_cnt
= cur_state
== NULL
? null_cnt
+ 1 : 0;
2992 re_node_set_free (&next_nodes
);
2993 cur_nodes
= (mctx
->state_log
[last_str
] == NULL
? NULL
2994 : &mctx
->state_log
[last_str
]->nodes
);
2995 path
->next_idx
= str_idx
;
2998 mctx
->state_log
= backup_state_log
;
2999 mctx
->input
.cur_idx
= backup_cur_idx
;
3001 /* Then check the current node set has the node LAST_NODE. */
3002 if (cur_nodes
!= NULL
&& re_node_set_contains (cur_nodes
, last_node
))
3008 /* Helper functions for check_arrival. */
3010 /* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them
3012 TODO: This function is similar to the functions transit_state*(),
3013 however this function has many additional works.
3014 Can't we unify them? */
3016 static reg_errcode_t
3017 __attribute_warn_unused_result__
3018 check_arrival_add_next_nodes (re_match_context_t
*mctx
, Idx str_idx
,
3019 re_node_set
*cur_nodes
, re_node_set
*next_nodes
)
3021 const re_dfa_t
*const dfa
= mctx
->dfa
;
3024 #ifdef RE_ENABLE_I18N
3025 reg_errcode_t err
= REG_NOERROR
;
3027 re_node_set union_set
;
3028 re_node_set_init_empty (&union_set
);
3029 for (cur_idx
= 0; cur_idx
< cur_nodes
->nelem
; ++cur_idx
)
3032 Idx cur_node
= cur_nodes
->elems
[cur_idx
];
3034 re_token_type_t type
= dfa
->nodes
[cur_node
].type
;
3035 assert (!IS_EPSILON_NODE (type
));
3037 #ifdef RE_ENABLE_I18N
3038 /* If the node may accept "multi byte". */
3039 if (dfa
->nodes
[cur_node
].accept_mb
)
3041 naccepted
= check_node_accept_bytes (dfa
, cur_node
, &mctx
->input
,
3045 re_dfastate_t
*dest_state
;
3046 Idx next_node
= dfa
->nexts
[cur_node
];
3047 Idx next_idx
= str_idx
+ naccepted
;
3048 dest_state
= mctx
->state_log
[next_idx
];
3049 re_node_set_empty (&union_set
);
3052 err
= re_node_set_merge (&union_set
, &dest_state
->nodes
);
3053 if (__glibc_unlikely (err
!= REG_NOERROR
))
3055 re_node_set_free (&union_set
);
3059 ok
= re_node_set_insert (&union_set
, next_node
);
3060 if (__glibc_unlikely (! ok
))
3062 re_node_set_free (&union_set
);
3065 mctx
->state_log
[next_idx
] = re_acquire_state (&err
, dfa
,
3067 if (__glibc_unlikely (mctx
->state_log
[next_idx
] == NULL
3068 && err
!= REG_NOERROR
))
3070 re_node_set_free (&union_set
);
3075 #endif /* RE_ENABLE_I18N */
3077 || check_node_accept (mctx
, dfa
->nodes
+ cur_node
, str_idx
))
3079 ok
= re_node_set_insert (next_nodes
, dfa
->nexts
[cur_node
]);
3080 if (__glibc_unlikely (! ok
))
3082 re_node_set_free (&union_set
);
3087 re_node_set_free (&union_set
);
3091 /* For all the nodes in CUR_NODES, add the epsilon closures of them to
3092 CUR_NODES, however exclude the nodes which are:
3093 - inside the sub expression whose number is EX_SUBEXP, if FL_OPEN.
3094 - out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN.
3097 static reg_errcode_t
3098 check_arrival_expand_ecl (const re_dfa_t
*dfa
, re_node_set
*cur_nodes
,
3099 Idx ex_subexp
, int type
)
3102 Idx idx
, outside_node
;
3103 re_node_set new_nodes
;
3105 assert (cur_nodes
->nelem
);
3107 err
= re_node_set_alloc (&new_nodes
, cur_nodes
->nelem
);
3108 if (__glibc_unlikely (err
!= REG_NOERROR
))
3110 /* Create a new node set NEW_NODES with the nodes which are epsilon
3111 closures of the node in CUR_NODES. */
3113 for (idx
= 0; idx
< cur_nodes
->nelem
; ++idx
)
3115 Idx cur_node
= cur_nodes
->elems
[idx
];
3116 const re_node_set
*eclosure
= dfa
->eclosures
+ cur_node
;
3117 outside_node
= find_subexp_node (dfa
, eclosure
, ex_subexp
, type
);
3118 if (outside_node
== -1)
3120 /* There are no problematic nodes, just merge them. */
3121 err
= re_node_set_merge (&new_nodes
, eclosure
);
3122 if (__glibc_unlikely (err
!= REG_NOERROR
))
3124 re_node_set_free (&new_nodes
);
3130 /* There are problematic nodes, re-calculate incrementally. */
3131 err
= check_arrival_expand_ecl_sub (dfa
, &new_nodes
, cur_node
,
3133 if (__glibc_unlikely (err
!= REG_NOERROR
))
3135 re_node_set_free (&new_nodes
);
3140 re_node_set_free (cur_nodes
);
3141 *cur_nodes
= new_nodes
;
3145 /* Helper function for check_arrival_expand_ecl.
3146 Check incrementally the epsilon closure of TARGET, and if it isn't
3147 problematic append it to DST_NODES. */
3149 static reg_errcode_t
3150 __attribute_warn_unused_result__
3151 check_arrival_expand_ecl_sub (const re_dfa_t
*dfa
, re_node_set
*dst_nodes
,
3152 Idx target
, Idx ex_subexp
, int type
)
3155 for (cur_node
= target
; !re_node_set_contains (dst_nodes
, cur_node
);)
3159 if (dfa
->nodes
[cur_node
].type
== type
3160 && dfa
->nodes
[cur_node
].opr
.idx
== ex_subexp
)
3162 if (type
== OP_CLOSE_SUBEXP
)
3164 ok
= re_node_set_insert (dst_nodes
, cur_node
);
3165 if (__glibc_unlikely (! ok
))
3170 ok
= re_node_set_insert (dst_nodes
, cur_node
);
3171 if (__glibc_unlikely (! ok
))
3173 if (dfa
->edests
[cur_node
].nelem
== 0)
3175 if (dfa
->edests
[cur_node
].nelem
== 2)
3178 err
= check_arrival_expand_ecl_sub (dfa
, dst_nodes
,
3179 dfa
->edests
[cur_node
].elems
[1],
3181 if (__glibc_unlikely (err
!= REG_NOERROR
))
3184 cur_node
= dfa
->edests
[cur_node
].elems
[0];
3190 /* For all the back references in the current state, calculate the
3191 destination of the back references by the appropriate entry
3192 in MCTX->BKREF_ENTS. */
3194 static reg_errcode_t
3195 __attribute_warn_unused_result__
3196 expand_bkref_cache (re_match_context_t
*mctx
, re_node_set
*cur_nodes
,
3197 Idx cur_str
, Idx subexp_num
, int type
)
3199 const re_dfa_t
*const dfa
= mctx
->dfa
;
3201 Idx cache_idx_start
= search_cur_bkref_entry (mctx
, cur_str
);
3202 struct re_backref_cache_entry
*ent
;
3204 if (cache_idx_start
== -1)
3208 ent
= mctx
->bkref_ents
+ cache_idx_start
;
3211 Idx to_idx
, next_node
;
3213 /* Is this entry ENT is appropriate? */
3214 if (!re_node_set_contains (cur_nodes
, ent
->node
))
3217 to_idx
= cur_str
+ ent
->subexp_to
- ent
->subexp_from
;
3218 /* Calculate the destination of the back reference, and append it
3219 to MCTX->STATE_LOG. */
3220 if (to_idx
== cur_str
)
3222 /* The backreference did epsilon transit, we must re-check all the
3223 node in the current state. */
3224 re_node_set new_dests
;
3225 reg_errcode_t err2
, err3
;
3226 next_node
= dfa
->edests
[ent
->node
].elems
[0];
3227 if (re_node_set_contains (cur_nodes
, next_node
))
3229 err
= re_node_set_init_1 (&new_dests
, next_node
);
3230 err2
= check_arrival_expand_ecl (dfa
, &new_dests
, subexp_num
, type
);
3231 err3
= re_node_set_merge (cur_nodes
, &new_dests
);
3232 re_node_set_free (&new_dests
);
3233 if (__glibc_unlikely (err
!= REG_NOERROR
|| err2
!= REG_NOERROR
3234 || err3
!= REG_NOERROR
))
3236 err
= (err
!= REG_NOERROR
? err
3237 : (err2
!= REG_NOERROR
? err2
: err3
));
3240 /* TODO: It is still inefficient... */
3245 re_node_set union_set
;
3246 next_node
= dfa
->nexts
[ent
->node
];
3247 if (mctx
->state_log
[to_idx
])
3250 if (re_node_set_contains (&mctx
->state_log
[to_idx
]->nodes
,
3253 err
= re_node_set_init_copy (&union_set
,
3254 &mctx
->state_log
[to_idx
]->nodes
);
3255 ok
= re_node_set_insert (&union_set
, next_node
);
3256 if (__glibc_unlikely (err
!= REG_NOERROR
|| ! ok
))
3258 re_node_set_free (&union_set
);
3259 err
= err
!= REG_NOERROR
? err
: REG_ESPACE
;
3265 err
= re_node_set_init_1 (&union_set
, next_node
);
3266 if (__glibc_unlikely (err
!= REG_NOERROR
))
3269 mctx
->state_log
[to_idx
] = re_acquire_state (&err
, dfa
, &union_set
);
3270 re_node_set_free (&union_set
);
3271 if (__glibc_unlikely (mctx
->state_log
[to_idx
] == NULL
3272 && err
!= REG_NOERROR
))
3276 while (ent
++->more
);
3280 /* Build transition table for the state.
3281 Return true if successful. */
3284 build_trtable (const re_dfa_t
*dfa
, re_dfastate_t
*state
)
3289 bool need_word_trtable
= false;
3290 bitset_word_t elem
, mask
;
3291 bool dests_node_malloced
= false;
3292 bool dest_states_malloced
= false;
3293 Idx ndests
; /* Number of the destination states from 'state'. */
3294 re_dfastate_t
**trtable
;
3295 re_dfastate_t
**dest_states
= NULL
, **dest_states_word
, **dest_states_nl
;
3296 re_node_set follows
, *dests_node
;
3298 bitset_t acceptable
;
3302 re_node_set dests_node
[SBC_MAX
];
3303 bitset_t dests_ch
[SBC_MAX
];
3306 /* We build DFA states which corresponds to the destination nodes
3307 from 'state'. 'dests_node[i]' represents the nodes which i-th
3308 destination state contains, and 'dests_ch[i]' represents the
3309 characters which i-th destination state accepts. */
3310 if (__libc_use_alloca (sizeof (struct dests_alloc
)))
3311 dests_alloc
= (struct dests_alloc
*) alloca (sizeof (struct dests_alloc
));
3314 dests_alloc
= re_malloc (struct dests_alloc
, 1);
3315 if (__glibc_unlikely (dests_alloc
== NULL
))
3317 dests_node_malloced
= true;
3319 dests_node
= dests_alloc
->dests_node
;
3320 dests_ch
= dests_alloc
->dests_ch
;
3322 /* Initialize transition table. */
3323 state
->word_trtable
= state
->trtable
= NULL
;
3325 /* At first, group all nodes belonging to 'state' into several
3327 ndests
= group_nodes_into_DFAstates (dfa
, state
, dests_node
, dests_ch
);
3328 if (__glibc_unlikely (ndests
<= 0))
3330 if (dests_node_malloced
)
3331 re_free (dests_alloc
);
3332 /* Return false in case of an error, true otherwise. */
3335 state
->trtable
= (re_dfastate_t
**)
3336 calloc (sizeof (re_dfastate_t
*), SBC_MAX
);
3337 if (__glibc_unlikely (state
->trtable
== NULL
))
3344 err
= re_node_set_alloc (&follows
, ndests
+ 1);
3345 if (__glibc_unlikely (err
!= REG_NOERROR
))
3348 /* Avoid arithmetic overflow in size calculation. */
3350 = ((SIZE_MAX
- (sizeof (re_node_set
) + sizeof (bitset_t
)) * SBC_MAX
)
3351 / (3 * sizeof (re_dfastate_t
*)));
3352 if (__glibc_unlikely (ndests_max
< ndests
))
3355 if (__libc_use_alloca ((sizeof (re_node_set
) + sizeof (bitset_t
)) * SBC_MAX
3356 + ndests
* 3 * sizeof (re_dfastate_t
*)))
3357 dest_states
= (re_dfastate_t
**)
3358 alloca (ndests
* 3 * sizeof (re_dfastate_t
*));
3361 dest_states
= re_malloc (re_dfastate_t
*, ndests
* 3);
3362 if (__glibc_unlikely (dest_states
== NULL
))
3365 if (dest_states_malloced
)
3366 re_free (dest_states
);
3367 re_node_set_free (&follows
);
3368 for (i
= 0; i
< ndests
; ++i
)
3369 re_node_set_free (dests_node
+ i
);
3370 if (dests_node_malloced
)
3371 re_free (dests_alloc
);
3374 dest_states_malloced
= true;
3376 dest_states_word
= dest_states
+ ndests
;
3377 dest_states_nl
= dest_states_word
+ ndests
;
3378 bitset_empty (acceptable
);
3380 /* Then build the states for all destinations. */
3381 for (i
= 0; i
< ndests
; ++i
)
3384 re_node_set_empty (&follows
);
3385 /* Merge the follows of this destination states. */
3386 for (j
= 0; j
< dests_node
[i
].nelem
; ++j
)
3388 next_node
= dfa
->nexts
[dests_node
[i
].elems
[j
]];
3389 if (next_node
!= -1)
3391 err
= re_node_set_merge (&follows
, dfa
->eclosures
+ next_node
);
3392 if (__glibc_unlikely (err
!= REG_NOERROR
))
3396 dest_states
[i
] = re_acquire_state_context (&err
, dfa
, &follows
, 0);
3397 if (__glibc_unlikely (dest_states
[i
] == NULL
&& err
!= REG_NOERROR
))
3399 /* If the new state has context constraint,
3400 build appropriate states for these contexts. */
3401 if (dest_states
[i
]->has_constraint
)
3403 dest_states_word
[i
] = re_acquire_state_context (&err
, dfa
, &follows
,
3405 if (__glibc_unlikely (dest_states_word
[i
] == NULL
3406 && err
!= REG_NOERROR
))
3409 if (dest_states
[i
] != dest_states_word
[i
] && dfa
->mb_cur_max
> 1)
3410 need_word_trtable
= true;
3412 dest_states_nl
[i
] = re_acquire_state_context (&err
, dfa
, &follows
,
3414 if (__glibc_unlikely (dest_states_nl
[i
] == NULL
&& err
!= REG_NOERROR
))
3419 dest_states_word
[i
] = dest_states
[i
];
3420 dest_states_nl
[i
] = dest_states
[i
];
3422 bitset_merge (acceptable
, dests_ch
[i
]);
3425 if (!__glibc_unlikely (need_word_trtable
))
3427 /* We don't care about whether the following character is a word
3428 character, or we are in a single-byte character set so we can
3429 discern by looking at the character code: allocate a
3430 256-entry transition table. */
3431 trtable
= state
->trtable
=
3432 (re_dfastate_t
**) calloc (sizeof (re_dfastate_t
*), SBC_MAX
);
3433 if (__glibc_unlikely (trtable
== NULL
))
3436 /* For all characters ch...: */
3437 for (i
= 0; i
< BITSET_WORDS
; ++i
)
3438 for (ch
= i
* BITSET_WORD_BITS
, elem
= acceptable
[i
], mask
= 1;
3440 mask
<<= 1, elem
>>= 1, ++ch
)
3441 if (__glibc_unlikely (elem
& 1))
3443 /* There must be exactly one destination which accepts
3444 character ch. See group_nodes_into_DFAstates. */
3445 for (j
= 0; (dests_ch
[j
][i
] & mask
) == 0; ++j
)
3448 /* j-th destination accepts the word character ch. */
3449 if (dfa
->word_char
[i
] & mask
)
3450 trtable
[ch
] = dest_states_word
[j
];
3452 trtable
[ch
] = dest_states
[j
];
3457 /* We care about whether the following character is a word
3458 character, and we are in a multi-byte character set: discern
3459 by looking at the character code: build two 256-entry
3460 transition tables, one starting at trtable[0] and one
3461 starting at trtable[SBC_MAX]. */
3462 trtable
= state
->word_trtable
=
3463 (re_dfastate_t
**) calloc (sizeof (re_dfastate_t
*), 2 * SBC_MAX
);
3464 if (__glibc_unlikely (trtable
== NULL
))
3467 /* For all characters ch...: */
3468 for (i
= 0; i
< BITSET_WORDS
; ++i
)
3469 for (ch
= i
* BITSET_WORD_BITS
, elem
= acceptable
[i
], mask
= 1;
3471 mask
<<= 1, elem
>>= 1, ++ch
)
3472 if (__glibc_unlikely (elem
& 1))
3474 /* There must be exactly one destination which accepts
3475 character ch. See group_nodes_into_DFAstates. */
3476 for (j
= 0; (dests_ch
[j
][i
] & mask
) == 0; ++j
)
3479 /* j-th destination accepts the word character ch. */
3480 trtable
[ch
] = dest_states
[j
];
3481 trtable
[ch
+ SBC_MAX
] = dest_states_word
[j
];
3486 if (bitset_contain (acceptable
, NEWLINE_CHAR
))
3488 /* The current state accepts newline character. */
3489 for (j
= 0; j
< ndests
; ++j
)
3490 if (bitset_contain (dests_ch
[j
], NEWLINE_CHAR
))
3492 /* k-th destination accepts newline character. */
3493 trtable
[NEWLINE_CHAR
] = dest_states_nl
[j
];
3494 if (need_word_trtable
)
3495 trtable
[NEWLINE_CHAR
+ SBC_MAX
] = dest_states_nl
[j
];
3496 /* There must be only one destination which accepts
3497 newline. See group_nodes_into_DFAstates. */
3502 if (dest_states_malloced
)
3503 re_free (dest_states
);
3505 re_node_set_free (&follows
);
3506 for (i
= 0; i
< ndests
; ++i
)
3507 re_node_set_free (dests_node
+ i
);
3509 if (dests_node_malloced
)
3510 re_free (dests_alloc
);
3515 /* Group all nodes belonging to STATE into several destinations.
3516 Then for all destinations, set the nodes belonging to the destination
3517 to DESTS_NODE[i] and set the characters accepted by the destination
3518 to DEST_CH[i]. This function return the number of destinations. */
3521 group_nodes_into_DFAstates (const re_dfa_t
*dfa
, const re_dfastate_t
*state
,
3522 re_node_set
*dests_node
, bitset_t
*dests_ch
)
3527 Idx ndests
; /* Number of the destinations from 'state'. */
3528 bitset_t accepts
; /* Characters a node can accept. */
3529 const re_node_set
*cur_nodes
= &state
->nodes
;
3530 bitset_empty (accepts
);
3533 /* For all the nodes belonging to 'state', */
3534 for (i
= 0; i
< cur_nodes
->nelem
; ++i
)
3536 re_token_t
*node
= &dfa
->nodes
[cur_nodes
->elems
[i
]];
3537 re_token_type_t type
= node
->type
;
3538 unsigned int constraint
= node
->constraint
;
3540 /* Enumerate all single byte character this node can accept. */
3541 if (type
== CHARACTER
)
3542 bitset_set (accepts
, node
->opr
.c
);
3543 else if (type
== SIMPLE_BRACKET
)
3545 bitset_merge (accepts
, node
->opr
.sbcset
);
3547 else if (type
== OP_PERIOD
)
3549 #ifdef RE_ENABLE_I18N
3550 if (dfa
->mb_cur_max
> 1)
3551 bitset_merge (accepts
, dfa
->sb_char
);
3554 bitset_set_all (accepts
);
3555 if (!(dfa
->syntax
& RE_DOT_NEWLINE
))
3556 bitset_clear (accepts
, '\n');
3557 if (dfa
->syntax
& RE_DOT_NOT_NULL
)
3558 bitset_clear (accepts
, '\0');
3560 #ifdef RE_ENABLE_I18N
3561 else if (type
== OP_UTF8_PERIOD
)
3563 if (ASCII_CHARS
% BITSET_WORD_BITS
== 0)
3564 memset (accepts
, -1, ASCII_CHARS
/ CHAR_BIT
);
3566 bitset_merge (accepts
, utf8_sb_map
);
3567 if (!(dfa
->syntax
& RE_DOT_NEWLINE
))
3568 bitset_clear (accepts
, '\n');
3569 if (dfa
->syntax
& RE_DOT_NOT_NULL
)
3570 bitset_clear (accepts
, '\0');
3576 /* Check the 'accepts' and sift the characters which are not
3577 match it the context. */
3580 if (constraint
& NEXT_NEWLINE_CONSTRAINT
)
3582 bool accepts_newline
= bitset_contain (accepts
, NEWLINE_CHAR
);
3583 bitset_empty (accepts
);
3584 if (accepts_newline
)
3585 bitset_set (accepts
, NEWLINE_CHAR
);
3589 if (constraint
& NEXT_ENDBUF_CONSTRAINT
)
3591 bitset_empty (accepts
);
3595 if (constraint
& NEXT_WORD_CONSTRAINT
)
3597 bitset_word_t any_set
= 0;
3598 if (type
== CHARACTER
&& !node
->word_char
)
3600 bitset_empty (accepts
);
3603 #ifdef RE_ENABLE_I18N
3604 if (dfa
->mb_cur_max
> 1)
3605 for (j
= 0; j
< BITSET_WORDS
; ++j
)
3606 any_set
|= (accepts
[j
] &= (dfa
->word_char
[j
] | ~dfa
->sb_char
[j
]));
3609 for (j
= 0; j
< BITSET_WORDS
; ++j
)
3610 any_set
|= (accepts
[j
] &= dfa
->word_char
[j
]);
3614 if (constraint
& NEXT_NOTWORD_CONSTRAINT
)
3616 bitset_word_t any_set
= 0;
3617 if (type
== CHARACTER
&& node
->word_char
)
3619 bitset_empty (accepts
);
3622 #ifdef RE_ENABLE_I18N
3623 if (dfa
->mb_cur_max
> 1)
3624 for (j
= 0; j
< BITSET_WORDS
; ++j
)
3625 any_set
|= (accepts
[j
] &= ~(dfa
->word_char
[j
] & dfa
->sb_char
[j
]));
3628 for (j
= 0; j
< BITSET_WORDS
; ++j
)
3629 any_set
|= (accepts
[j
] &= ~dfa
->word_char
[j
]);
3635 /* Then divide 'accepts' into DFA states, or create a new
3636 state. Above, we make sure that accepts is not empty. */
3637 for (j
= 0; j
< ndests
; ++j
)
3639 bitset_t intersec
; /* Intersection sets, see below. */
3641 /* Flags, see below. */
3642 bitset_word_t has_intersec
, not_subset
, not_consumed
;
3644 /* Optimization, skip if this state doesn't accept the character. */
3645 if (type
== CHARACTER
&& !bitset_contain (dests_ch
[j
], node
->opr
.c
))
3648 /* Enumerate the intersection set of this state and 'accepts'. */
3650 for (k
= 0; k
< BITSET_WORDS
; ++k
)
3651 has_intersec
|= intersec
[k
] = accepts
[k
] & dests_ch
[j
][k
];
3652 /* And skip if the intersection set is empty. */
3656 /* Then check if this state is a subset of 'accepts'. */
3657 not_subset
= not_consumed
= 0;
3658 for (k
= 0; k
< BITSET_WORDS
; ++k
)
3660 not_subset
|= remains
[k
] = ~accepts
[k
] & dests_ch
[j
][k
];
3661 not_consumed
|= accepts
[k
] = accepts
[k
] & ~dests_ch
[j
][k
];
3664 /* If this state isn't a subset of 'accepts', create a
3665 new group state, which has the 'remains'. */
3668 bitset_copy (dests_ch
[ndests
], remains
);
3669 bitset_copy (dests_ch
[j
], intersec
);
3670 err
= re_node_set_init_copy (dests_node
+ ndests
, &dests_node
[j
]);
3671 if (__glibc_unlikely (err
!= REG_NOERROR
))
3676 /* Put the position in the current group. */
3677 ok
= re_node_set_insert (&dests_node
[j
], cur_nodes
->elems
[i
]);
3678 if (__glibc_unlikely (! ok
))
3681 /* If all characters are consumed, go to next node. */
3685 /* Some characters remain, create a new group. */
3688 bitset_copy (dests_ch
[ndests
], accepts
);
3689 err
= re_node_set_init_1 (dests_node
+ ndests
, cur_nodes
->elems
[i
]);
3690 if (__glibc_unlikely (err
!= REG_NOERROR
))
3693 bitset_empty (accepts
);
3698 for (j
= 0; j
< ndests
; ++j
)
3699 re_node_set_free (dests_node
+ j
);
3703 #ifdef RE_ENABLE_I18N
3704 /* Check how many bytes the node 'dfa->nodes[node_idx]' accepts.
3705 Return the number of the bytes the node accepts.
3706 STR_IDX is the current index of the input string.
3708 This function handles the nodes which can accept one character, or
3709 one collating element like '.', '[a-z]', opposite to the other nodes
3710 can only accept one byte. */
3713 # include <locale/weight.h>
3717 check_node_accept_bytes (const re_dfa_t
*dfa
, Idx node_idx
,
3718 const re_string_t
*input
, Idx str_idx
)
3720 const re_token_t
*node
= dfa
->nodes
+ node_idx
;
3721 int char_len
, elem_len
;
3724 if (__glibc_unlikely (node
->type
== OP_UTF8_PERIOD
))
3726 unsigned char c
= re_string_byte_at (input
, str_idx
), d
;
3727 if (__glibc_likely (c
< 0xc2))
3730 if (str_idx
+ 2 > input
->len
)
3733 d
= re_string_byte_at (input
, str_idx
+ 1);
3735 return (d
< 0x80 || d
> 0xbf) ? 0 : 2;
3739 if (c
== 0xe0 && d
< 0xa0)
3745 if (c
== 0xf0 && d
< 0x90)
3751 if (c
== 0xf8 && d
< 0x88)
3757 if (c
== 0xfc && d
< 0x84)
3763 if (str_idx
+ char_len
> input
->len
)
3766 for (i
= 1; i
< char_len
; ++i
)
3768 d
= re_string_byte_at (input
, str_idx
+ i
);
3769 if (d
< 0x80 || d
> 0xbf)
3775 char_len
= re_string_char_size_at (input
, str_idx
);
3776 if (node
->type
== OP_PERIOD
)
3780 /* FIXME: I don't think this if is needed, as both '\n'
3781 and '\0' are char_len == 1. */
3782 /* '.' accepts any one character except the following two cases. */
3783 if ((!(dfa
->syntax
& RE_DOT_NEWLINE
) &&
3784 re_string_byte_at (input
, str_idx
) == '\n') ||
3785 ((dfa
->syntax
& RE_DOT_NOT_NULL
) &&
3786 re_string_byte_at (input
, str_idx
) == '\0'))
3791 elem_len
= re_string_elem_size_at (input
, str_idx
);
3792 if ((elem_len
<= 1 && char_len
<= 1) || char_len
== 0)
3795 if (node
->type
== COMPLEX_BRACKET
)
3797 const re_charset_t
*cset
= node
->opr
.mbcset
;
3799 const unsigned char *pin
3800 = ((const unsigned char *) re_string_get_buffer (input
) + str_idx
);
3805 wchar_t wc
= ((cset
->nranges
|| cset
->nchar_classes
|| cset
->nmbchars
)
3806 ? re_string_wchar_at (input
, str_idx
) : 0);
3808 /* match with multibyte character? */
3809 for (i
= 0; i
< cset
->nmbchars
; ++i
)
3810 if (wc
== cset
->mbchars
[i
])
3812 match_len
= char_len
;
3813 goto check_node_accept_bytes_match
;
3815 /* match with character_class? */
3816 for (i
= 0; i
< cset
->nchar_classes
; ++i
)
3818 wctype_t wt
= cset
->char_classes
[i
];
3819 if (__iswctype (wc
, wt
))
3821 match_len
= char_len
;
3822 goto check_node_accept_bytes_match
;
3827 nrules
= _NL_CURRENT_WORD (LC_COLLATE
, _NL_COLLATE_NRULES
);
3830 unsigned int in_collseq
= 0;
3831 const int32_t *table
, *indirect
;
3832 const unsigned char *weights
, *extra
;
3833 const char *collseqwc
;
3835 /* match with collating_symbol? */
3836 if (cset
->ncoll_syms
)
3837 extra
= (const unsigned char *)
3838 _NL_CURRENT (LC_COLLATE
, _NL_COLLATE_SYMB_EXTRAMB
);
3839 for (i
= 0; i
< cset
->ncoll_syms
; ++i
)
3841 const unsigned char *coll_sym
= extra
+ cset
->coll_syms
[i
];
3842 /* Compare the length of input collating element and
3843 the length of current collating element. */
3844 if (*coll_sym
!= elem_len
)
3846 /* Compare each bytes. */
3847 for (j
= 0; j
< *coll_sym
; j
++)
3848 if (pin
[j
] != coll_sym
[1 + j
])
3852 /* Match if every bytes is equal. */
3854 goto check_node_accept_bytes_match
;
3860 if (elem_len
<= char_len
)
3862 collseqwc
= _NL_CURRENT (LC_COLLATE
, _NL_COLLATE_COLLSEQWC
);
3863 in_collseq
= __collseq_table_lookup (collseqwc
, wc
);
3866 in_collseq
= find_collation_sequence_value (pin
, elem_len
);
3868 /* match with range expression? */
3869 /* FIXME: Implement rational ranges here, too. */
3870 for (i
= 0; i
< cset
->nranges
; ++i
)
3871 if (cset
->range_starts
[i
] <= in_collseq
3872 && in_collseq
<= cset
->range_ends
[i
])
3874 match_len
= elem_len
;
3875 goto check_node_accept_bytes_match
;
3878 /* match with equivalence_class? */
3879 if (cset
->nequiv_classes
)
3881 const unsigned char *cp
= pin
;
3882 table
= (const int32_t *)
3883 _NL_CURRENT (LC_COLLATE
, _NL_COLLATE_TABLEMB
);
3884 weights
= (const unsigned char *)
3885 _NL_CURRENT (LC_COLLATE
, _NL_COLLATE_WEIGHTMB
);
3886 extra
= (const unsigned char *)
3887 _NL_CURRENT (LC_COLLATE
, _NL_COLLATE_EXTRAMB
);
3888 indirect
= (const int32_t *)
3889 _NL_CURRENT (LC_COLLATE
, _NL_COLLATE_INDIRECTMB
);
3890 int32_t idx
= findidx (table
, indirect
, extra
, &cp
, elem_len
);
3891 int32_t rule
= idx
>> 24;
3895 size_t weight_len
= weights
[idx
];
3896 for (i
= 0; i
< cset
->nequiv_classes
; ++i
)
3898 int32_t equiv_class_idx
= cset
->equiv_classes
[i
];
3899 int32_t equiv_class_rule
= equiv_class_idx
>> 24;
3900 equiv_class_idx
&= 0xffffff;
3901 if (weights
[equiv_class_idx
] == weight_len
3902 && equiv_class_rule
== rule
3903 && memcmp (weights
+ idx
+ 1,
3904 weights
+ equiv_class_idx
+ 1,
3907 match_len
= elem_len
;
3908 goto check_node_accept_bytes_match
;
3917 /* match with range expression? */
3918 for (i
= 0; i
< cset
->nranges
; ++i
)
3920 if (cset
->range_starts
[i
] <= wc
&& wc
<= cset
->range_ends
[i
])
3922 match_len
= char_len
;
3923 goto check_node_accept_bytes_match
;
3927 check_node_accept_bytes_match
:
3928 if (!cset
->non_match
)
3935 return (elem_len
> char_len
) ? elem_len
: char_len
;
3943 find_collation_sequence_value (const unsigned char *mbs
, size_t mbs_len
)
3945 uint32_t nrules
= _NL_CURRENT_WORD (LC_COLLATE
, _NL_COLLATE_NRULES
);
3950 /* No valid character. Match it as a single byte character. */
3951 const unsigned char *collseq
= (const unsigned char *)
3952 _NL_CURRENT (LC_COLLATE
, _NL_COLLATE_COLLSEQMB
);
3953 return collseq
[mbs
[0]];
3960 const unsigned char *extra
= (const unsigned char *)
3961 _NL_CURRENT (LC_COLLATE
, _NL_COLLATE_SYMB_EXTRAMB
);
3962 int32_t extrasize
= (const unsigned char *)
3963 _NL_CURRENT (LC_COLLATE
, _NL_COLLATE_SYMB_EXTRAMB
+ 1) - extra
;
3965 for (idx
= 0; idx
< extrasize
;)
3969 int32_t elem_mbs_len
;
3970 /* Skip the name of collating element name. */
3971 idx
= idx
+ extra
[idx
] + 1;
3972 elem_mbs_len
= extra
[idx
++];
3973 if (mbs_len
== elem_mbs_len
)
3975 for (mbs_cnt
= 0; mbs_cnt
< elem_mbs_len
; ++mbs_cnt
)
3976 if (extra
[idx
+ mbs_cnt
] != mbs
[mbs_cnt
])
3978 if (mbs_cnt
== elem_mbs_len
)
3979 /* Found the entry. */
3982 /* Skip the byte sequence of the collating element. */
3983 idx
+= elem_mbs_len
;
3984 /* Adjust for the alignment. */
3985 idx
= (idx
+ 3) & ~3;
3986 /* Skip the collation sequence value. */
3987 idx
+= sizeof (uint32_t);
3988 /* Skip the wide char sequence of the collating element. */
3989 idx
= idx
+ sizeof (uint32_t) * (*(int32_t *) (extra
+ idx
) + 1);
3990 /* If we found the entry, return the sequence value. */
3992 return *(uint32_t *) (extra
+ idx
);
3993 /* Skip the collation sequence value. */
3994 idx
+= sizeof (uint32_t);
4000 #endif /* RE_ENABLE_I18N */
4002 /* Check whether the node accepts the byte which is IDX-th
4003 byte of the INPUT. */
4006 check_node_accept (const re_match_context_t
*mctx
, const re_token_t
*node
,
4010 ch
= re_string_byte_at (&mctx
->input
, idx
);
4014 if (node
->opr
.c
!= ch
)
4018 case SIMPLE_BRACKET
:
4019 if (!bitset_contain (node
->opr
.sbcset
, ch
))
4023 #ifdef RE_ENABLE_I18N
4024 case OP_UTF8_PERIOD
:
4025 if (ch
>= ASCII_CHARS
)
4030 if ((ch
== '\n' && !(mctx
->dfa
->syntax
& RE_DOT_NEWLINE
))
4031 || (ch
== '\0' && (mctx
->dfa
->syntax
& RE_DOT_NOT_NULL
)))
4039 if (node
->constraint
)
4041 /* The node has constraints. Check whether the current context
4042 satisfies the constraints. */
4043 unsigned int context
= re_string_context_at (&mctx
->input
, idx
,
4045 if (NOT_SATISFY_NEXT_CONSTRAINT (node
->constraint
, context
))
4052 /* Extend the buffers, if the buffers have run out. */
4054 static reg_errcode_t
4055 __attribute_warn_unused_result__
4056 extend_buffers (re_match_context_t
*mctx
, int min_len
)
4059 re_string_t
*pstr
= &mctx
->input
;
4061 /* Avoid overflow. */
4062 if (__glibc_unlikely (MIN (IDX_MAX
, SIZE_MAX
/ sizeof (re_dfastate_t
*)) / 2
4066 /* Double the lengths of the buffers, but allocate at least MIN_LEN. */
4067 ret
= re_string_realloc_buffers (pstr
,
4069 MIN (pstr
->len
, pstr
->bufs_len
* 2)));
4070 if (__glibc_unlikely (ret
!= REG_NOERROR
))
4073 if (mctx
->state_log
!= NULL
)
4075 /* And double the length of state_log. */
4076 /* XXX We have no indication of the size of this buffer. If this
4077 allocation fail we have no indication that the state_log array
4078 does not have the right size. */
4079 re_dfastate_t
**new_array
= re_realloc (mctx
->state_log
, re_dfastate_t
*,
4080 pstr
->bufs_len
+ 1);
4081 if (__glibc_unlikely (new_array
== NULL
))
4083 mctx
->state_log
= new_array
;
4086 /* Then reconstruct the buffers. */
4089 #ifdef RE_ENABLE_I18N
4090 if (pstr
->mb_cur_max
> 1)
4092 ret
= build_wcs_upper_buffer (pstr
);
4093 if (__glibc_unlikely (ret
!= REG_NOERROR
))
4097 #endif /* RE_ENABLE_I18N */
4098 build_upper_buffer (pstr
);
4102 #ifdef RE_ENABLE_I18N
4103 if (pstr
->mb_cur_max
> 1)
4104 build_wcs_buffer (pstr
);
4106 #endif /* RE_ENABLE_I18N */
4108 if (pstr
->trans
!= NULL
)
4109 re_string_translate_buffer (pstr
);
4116 /* Functions for matching context. */
4118 /* Initialize MCTX. */
4120 static reg_errcode_t
4121 __attribute_warn_unused_result__
4122 match_ctx_init (re_match_context_t
*mctx
, int eflags
, Idx n
)
4124 mctx
->eflags
= eflags
;
4125 mctx
->match_last
= -1;
4128 /* Avoid overflow. */
4129 size_t max_object_size
=
4130 MAX (sizeof (struct re_backref_cache_entry
),
4131 sizeof (re_sub_match_top_t
*));
4132 if (__glibc_unlikely (MIN (IDX_MAX
, SIZE_MAX
/ max_object_size
) < n
))
4135 mctx
->bkref_ents
= re_malloc (struct re_backref_cache_entry
, n
);
4136 mctx
->sub_tops
= re_malloc (re_sub_match_top_t
*, n
);
4137 if (__glibc_unlikely (mctx
->bkref_ents
== NULL
|| mctx
->sub_tops
== NULL
))
4140 /* Already zero-ed by the caller.
4142 mctx->bkref_ents = NULL;
4143 mctx->nbkref_ents = 0;
4144 mctx->nsub_tops = 0; */
4145 mctx
->abkref_ents
= n
;
4146 mctx
->max_mb_elem_len
= 1;
4147 mctx
->asub_tops
= n
;
4151 /* Clean the entries which depend on the current input in MCTX.
4152 This function must be invoked when the matcher changes the start index
4153 of the input, or changes the input string. */
4156 match_ctx_clean (re_match_context_t
*mctx
)
4159 for (st_idx
= 0; st_idx
< mctx
->nsub_tops
; ++st_idx
)
4162 re_sub_match_top_t
*top
= mctx
->sub_tops
[st_idx
];
4163 for (sl_idx
= 0; sl_idx
< top
->nlasts
; ++sl_idx
)
4165 re_sub_match_last_t
*last
= top
->lasts
[sl_idx
];
4166 re_free (last
->path
.array
);
4169 re_free (top
->lasts
);
4172 re_free (top
->path
->array
);
4173 re_free (top
->path
);
4178 mctx
->nsub_tops
= 0;
4179 mctx
->nbkref_ents
= 0;
4182 /* Free all the memory associated with MCTX. */
4185 match_ctx_free (re_match_context_t
*mctx
)
4187 /* First, free all the memory associated with MCTX->SUB_TOPS. */
4188 match_ctx_clean (mctx
);
4189 re_free (mctx
->sub_tops
);
4190 re_free (mctx
->bkref_ents
);
4193 /* Add a new backreference entry to MCTX.
4194 Note that we assume that caller never call this function with duplicate
4195 entry, and call with STR_IDX which isn't smaller than any existing entry.
4198 static reg_errcode_t
4199 __attribute_warn_unused_result__
4200 match_ctx_add_entry (re_match_context_t
*mctx
, Idx node
, Idx str_idx
, Idx from
,
4203 if (mctx
->nbkref_ents
>= mctx
->abkref_ents
)
4205 struct re_backref_cache_entry
* new_entry
;
4206 new_entry
= re_realloc (mctx
->bkref_ents
, struct re_backref_cache_entry
,
4207 mctx
->abkref_ents
* 2);
4208 if (__glibc_unlikely (new_entry
== NULL
))
4210 re_free (mctx
->bkref_ents
);
4213 mctx
->bkref_ents
= new_entry
;
4214 memset (mctx
->bkref_ents
+ mctx
->nbkref_ents
, '\0',
4215 sizeof (struct re_backref_cache_entry
) * mctx
->abkref_ents
);
4216 mctx
->abkref_ents
*= 2;
4218 if (mctx
->nbkref_ents
> 0
4219 && mctx
->bkref_ents
[mctx
->nbkref_ents
- 1].str_idx
== str_idx
)
4220 mctx
->bkref_ents
[mctx
->nbkref_ents
- 1].more
= 1;
4222 mctx
->bkref_ents
[mctx
->nbkref_ents
].node
= node
;
4223 mctx
->bkref_ents
[mctx
->nbkref_ents
].str_idx
= str_idx
;
4224 mctx
->bkref_ents
[mctx
->nbkref_ents
].subexp_from
= from
;
4225 mctx
->bkref_ents
[mctx
->nbkref_ents
].subexp_to
= to
;
4227 /* This is a cache that saves negative results of check_dst_limits_calc_pos.
4228 If bit N is clear, means that this entry won't epsilon-transition to
4229 an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression. If
4230 it is set, check_dst_limits_calc_pos_1 will recurse and try to find one
4233 A backreference does not epsilon-transition unless it is empty, so set
4234 to all zeros if FROM != TO. */
4235 mctx
->bkref_ents
[mctx
->nbkref_ents
].eps_reachable_subexps_map
4236 = (from
== to
? -1 : 0);
4238 mctx
->bkref_ents
[mctx
->nbkref_ents
++].more
= 0;
4239 if (mctx
->max_mb_elem_len
< to
- from
)
4240 mctx
->max_mb_elem_len
= to
- from
;
4244 /* Return the first entry with the same str_idx, or -1 if none is
4245 found. Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX. */
4248 search_cur_bkref_entry (const re_match_context_t
*mctx
, Idx str_idx
)
4250 Idx left
, right
, mid
, last
;
4251 last
= right
= mctx
->nbkref_ents
;
4252 for (left
= 0; left
< right
;)
4254 mid
= (left
+ right
) / 2;
4255 if (mctx
->bkref_ents
[mid
].str_idx
< str_idx
)
4260 if (left
< last
&& mctx
->bkref_ents
[left
].str_idx
== str_idx
)
4266 /* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches
4269 static reg_errcode_t
4270 __attribute_warn_unused_result__
4271 match_ctx_add_subtop (re_match_context_t
*mctx
, Idx node
, Idx str_idx
)
4274 assert (mctx
->sub_tops
!= NULL
);
4275 assert (mctx
->asub_tops
> 0);
4277 if (__glibc_unlikely (mctx
->nsub_tops
== mctx
->asub_tops
))
4279 Idx new_asub_tops
= mctx
->asub_tops
* 2;
4280 re_sub_match_top_t
**new_array
= re_realloc (mctx
->sub_tops
,
4281 re_sub_match_top_t
*,
4283 if (__glibc_unlikely (new_array
== NULL
))
4285 mctx
->sub_tops
= new_array
;
4286 mctx
->asub_tops
= new_asub_tops
;
4288 mctx
->sub_tops
[mctx
->nsub_tops
] = calloc (1, sizeof (re_sub_match_top_t
));
4289 if (__glibc_unlikely (mctx
->sub_tops
[mctx
->nsub_tops
] == NULL
))
4291 mctx
->sub_tops
[mctx
->nsub_tops
]->node
= node
;
4292 mctx
->sub_tops
[mctx
->nsub_tops
++]->str_idx
= str_idx
;
4296 /* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches
4297 at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP. */
4299 static re_sub_match_last_t
*
4300 match_ctx_add_sublast (re_sub_match_top_t
*subtop
, Idx node
, Idx str_idx
)
4302 re_sub_match_last_t
*new_entry
;
4303 if (__glibc_unlikely (subtop
->nlasts
== subtop
->alasts
))
4305 Idx new_alasts
= 2 * subtop
->alasts
+ 1;
4306 re_sub_match_last_t
**new_array
= re_realloc (subtop
->lasts
,
4307 re_sub_match_last_t
*,
4309 if (__glibc_unlikely (new_array
== NULL
))
4311 subtop
->lasts
= new_array
;
4312 subtop
->alasts
= new_alasts
;
4314 new_entry
= calloc (1, sizeof (re_sub_match_last_t
));
4315 if (__glibc_likely (new_entry
!= NULL
))
4317 subtop
->lasts
[subtop
->nlasts
] = new_entry
;
4318 new_entry
->node
= node
;
4319 new_entry
->str_idx
= str_idx
;
4326 sift_ctx_init (re_sift_context_t
*sctx
, re_dfastate_t
**sifted_sts
,
4327 re_dfastate_t
**limited_sts
, Idx last_node
, Idx last_str_idx
)
4329 sctx
->sifted_states
= sifted_sts
;
4330 sctx
->limited_states
= limited_sts
;
4331 sctx
->last_node
= last_node
;
4332 sctx
->last_str_idx
= last_str_idx
;
4333 re_node_set_init_empty (&sctx
->limits
);