Only load 4 bytes.
[glibc.git] / posix / regexec.c
blob32ba80a155185adc1fd103410b3847742bd757fe
1 /* Extended regular expression matching and search library.
2 Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19 02111-1307 USA. */
21 static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
22 int n) internal_function;
23 static void match_ctx_clean (re_match_context_t *mctx) internal_function;
24 static void match_ctx_free (re_match_context_t *cache) internal_function;
25 static void match_ctx_free_subtops (re_match_context_t *mctx)
26 internal_function;
27 static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, int node,
28 int str_idx, int from, int to)
29 internal_function;
30 static int search_cur_bkref_entry (re_match_context_t *mctx, int str_idx)
31 internal_function;
32 static void match_ctx_clear_flag (re_match_context_t *mctx) internal_function;
33 static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, int node,
34 int str_idx) internal_function;
35 static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
36 int node, int str_idx)
37 internal_function;
38 static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
39 re_dfastate_t **limited_sts, int last_node,
40 int last_str_idx, int check_subexp)
41 internal_function;
42 static reg_errcode_t re_search_internal (const regex_t *preg,
43 const char *string, int length,
44 int start, int range, int stop,
45 size_t nmatch, regmatch_t pmatch[],
46 int eflags) internal_function;
47 static int re_search_2_stub (struct re_pattern_buffer *bufp,
48 const char *string1, int length1,
49 const char *string2, int length2,
50 int start, int range, struct re_registers *regs,
51 int stop, int ret_len) internal_function;
52 static int re_search_stub (struct re_pattern_buffer *bufp,
53 const char *string, int length, int start,
54 int range, int stop, struct re_registers *regs,
55 int ret_len) internal_function;
56 static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,
57 int nregs, int regs_allocated) internal_function;
58 static inline re_dfastate_t *acquire_init_state_context
59 (reg_errcode_t *err, const re_match_context_t *mctx, int idx)
60 __attribute ((always_inline)) internal_function;
61 static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx)
62 internal_function;
63 static int check_matching (re_match_context_t *mctx, int fl_longest_match,
64 int *p_match_first)
65 internal_function;
66 static int check_halt_node_context (const re_dfa_t *dfa, int node,
67 unsigned int context) internal_function;
68 static int check_halt_state_context (const re_match_context_t *mctx,
69 const re_dfastate_t *state, int idx)
70 internal_function;
71 static void update_regs (re_dfa_t *dfa, regmatch_t *pmatch,
72 regmatch_t *prev_idx_match, int cur_node,
73 int cur_idx, int nmatch) internal_function;
74 static int proceed_next_node (const re_match_context_t *mctx,
75 int nregs, regmatch_t *regs,
76 int *pidx, int node, re_node_set *eps_via_nodes,
77 struct re_fail_stack_t *fs) internal_function;
78 static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
79 int str_idx, int *dests, int nregs,
80 regmatch_t *regs,
81 re_node_set *eps_via_nodes) internal_function;
82 static int pop_fail_stack (struct re_fail_stack_t *fs, int *pidx, int nregs,
83 regmatch_t *regs, re_node_set *eps_via_nodes) internal_function;
84 static reg_errcode_t set_regs (const regex_t *preg,
85 const re_match_context_t *mctx,
86 size_t nmatch, regmatch_t *pmatch,
87 int fl_backtrack) internal_function;
88 static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs) internal_function;
90 #ifdef RE_ENABLE_I18N
91 static int sift_states_iter_mb (const re_match_context_t *mctx,
92 re_sift_context_t *sctx,
93 int node_idx, int str_idx, int max_str_idx) internal_function;
94 #endif /* RE_ENABLE_I18N */
95 static reg_errcode_t sift_states_backward (re_match_context_t *mctx,
96 re_sift_context_t *sctx) internal_function;
97 static reg_errcode_t update_cur_sifted_state (re_match_context_t *mctx,
98 re_sift_context_t *sctx,
99 int str_idx,
100 re_node_set *dest_nodes) internal_function;
101 static reg_errcode_t add_epsilon_src_nodes (re_dfa_t *dfa,
102 re_node_set *dest_nodes,
103 const re_node_set *candidates) internal_function;
104 static reg_errcode_t sub_epsilon_src_nodes (re_dfa_t *dfa, int node,
105 re_node_set *dest_nodes,
106 const re_node_set *and_nodes) internal_function;
107 static int check_dst_limits (re_match_context_t *mctx, re_node_set *limits,
108 int dst_node, int dst_idx, int src_node,
109 int src_idx) internal_function;
110 static int check_dst_limits_calc_pos (re_match_context_t *mctx,
111 int limit, re_node_set *eclosures,
112 int subexp_idx, int node, int str_idx) internal_function;
113 static reg_errcode_t check_subexp_limits (re_dfa_t *dfa,
114 re_node_set *dest_nodes,
115 const re_node_set *candidates,
116 re_node_set *limits,
117 struct re_backref_cache_entry *bkref_ents,
118 int str_idx) internal_function;
119 static reg_errcode_t sift_states_bkref (re_match_context_t *mctx,
120 re_sift_context_t *sctx,
121 int str_idx, re_node_set *dest_nodes) internal_function;
122 static reg_errcode_t clean_state_log_if_needed (re_match_context_t *mctx,
123 int next_state_log_idx) internal_function;
124 static reg_errcode_t merge_state_array (re_dfa_t *dfa, re_dfastate_t **dst,
125 re_dfastate_t **src, int num) internal_function;
126 static re_dfastate_t *transit_state (reg_errcode_t *err,
127 re_match_context_t *mctx,
128 re_dfastate_t *state) internal_function;
129 static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx,
130 re_node_set *cur_nodes,
131 int str_idx) internal_function;
132 #if 0
133 static re_dfastate_t *transit_state_sb (reg_errcode_t *err,
134 re_match_context_t *mctx,
135 re_dfastate_t *pstate) internal_function;
136 #endif
137 #ifdef RE_ENABLE_I18N
138 static reg_errcode_t transit_state_mb (re_match_context_t *mctx,
139 re_dfastate_t *pstate) internal_function;
140 #endif /* RE_ENABLE_I18N */
141 static reg_errcode_t transit_state_bkref (re_match_context_t *mctx,
142 const re_node_set *nodes) internal_function;
143 static reg_errcode_t get_subexp (re_match_context_t *mctx,
144 int bkref_node, int bkref_str_idx) internal_function;
145 static reg_errcode_t get_subexp_sub (re_match_context_t *mctx,
146 const re_sub_match_top_t *sub_top,
147 re_sub_match_last_t *sub_last,
148 int bkref_node, int bkref_str) internal_function;
149 static int find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
150 int subexp_idx, int type) internal_function;
151 static reg_errcode_t check_arrival (re_match_context_t *mctx,
152 state_array_t *path, int top_node,
153 int top_str, int last_node, int last_str,
154 int type) internal_function;
155 static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx,
156 int str_idx,
157 re_node_set *cur_nodes,
158 re_node_set *next_nodes) internal_function;
159 static reg_errcode_t check_arrival_expand_ecl (re_dfa_t *dfa,
160 re_node_set *cur_nodes,
161 int ex_subexp, int type) internal_function;
162 static reg_errcode_t check_arrival_expand_ecl_sub (re_dfa_t *dfa,
163 re_node_set *dst_nodes,
164 int target, int ex_subexp,
165 int type) internal_function;
166 static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx,
167 re_node_set *cur_nodes, int cur_str,
168 int last_str, int subexp_num,
169 int type) internal_function;
170 static re_dfastate_t **build_trtable (re_dfa_t *dfa,
171 re_dfastate_t *state) internal_function;
172 #ifdef RE_ENABLE_I18N
173 static int check_node_accept_bytes (re_dfa_t *dfa, int node_idx,
174 const re_string_t *input, int idx) internal_function;
175 # ifdef _LIBC
176 static unsigned int find_collation_sequence_value (const unsigned char *mbs,
177 size_t name_len) internal_function;
178 # endif /* _LIBC */
179 #endif /* RE_ENABLE_I18N */
180 static int group_nodes_into_DFAstates (re_dfa_t *dfa,
181 const re_dfastate_t *state,
182 re_node_set *states_node,
183 bitset *states_ch) internal_function;
184 static int check_node_accept (const re_match_context_t *mctx,
185 const re_token_t *node, int idx) internal_function;
186 static reg_errcode_t extend_buffers (re_match_context_t *mctx) internal_function;
188 /* Entry point for POSIX code. */
190 /* regexec searches for a given pattern, specified by PREG, in the
191 string STRING.
193 If NMATCH is zero or REG_NOSUB was set in the cflags argument to
194 `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at
195 least NMATCH elements, and we set them to the offsets of the
196 corresponding matched substrings.
198 EFLAGS specifies `execution flags' which affect matching: if
199 REG_NOTBOL is set, then ^ does not match at the beginning of the
200 string; if REG_NOTEOL is set, then $ does not match at the end.
202 We return 0 if we find a match and REG_NOMATCH if not. */
205 regexec (preg, string, nmatch, pmatch, eflags)
206 const regex_t *__restrict preg;
207 const char *__restrict string;
208 size_t nmatch;
209 regmatch_t pmatch[];
210 int eflags;
212 reg_errcode_t err;
213 int length = strlen (string);
214 if (preg->no_sub)
215 err = re_search_internal (preg, string, length, 0, length, length, 0,
216 NULL, eflags);
217 else
218 err = re_search_internal (preg, string, length, 0, length, length, nmatch,
219 pmatch, eflags);
220 return err != REG_NOERROR;
222 #ifdef _LIBC
223 weak_alias (__regexec, regexec)
224 #endif
226 /* Entry points for GNU code. */
228 /* re_match, re_search, re_match_2, re_search_2
230 The former two functions operate on STRING with length LENGTH,
231 while the later two operate on concatenation of STRING1 and STRING2
232 with lengths LENGTH1 and LENGTH2, respectively.
234 re_match() matches the compiled pattern in BUFP against the string,
235 starting at index START.
237 re_search() first tries matching at index START, then it tries to match
238 starting from index START + 1, and so on. The last start position tried
239 is START + RANGE. (Thus RANGE = 0 forces re_search to operate the same
240 way as re_match().)
242 The parameter STOP of re_{match,search}_2 specifies that no match exceeding
243 the first STOP characters of the concatenation of the strings should be
244 concerned.
246 If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match
247 and all groups is stroed in REGS. (For the "_2" variants, the offsets are
248 computed relative to the concatenation, not relative to the individual
249 strings.)
251 On success, re_match* functions return the length of the match, re_search*
252 return the position of the start of the match. Return value -1 means no
253 match was found and -2 indicates an internal error. */
256 re_match (bufp, string, length, start, regs)
257 struct re_pattern_buffer *bufp;
258 const char *string;
259 int length, start;
260 struct re_registers *regs;
262 return re_search_stub (bufp, string, length, start, 0, length, regs, 1);
264 #ifdef _LIBC
265 weak_alias (__re_match, re_match)
266 #endif
269 re_search (bufp, string, length, start, range, regs)
270 struct re_pattern_buffer *bufp;
271 const char *string;
272 int length, start, range;
273 struct re_registers *regs;
275 return re_search_stub (bufp, string, length, start, range, length, regs, 0);
277 #ifdef _LIBC
278 weak_alias (__re_search, re_search)
279 #endif
282 re_match_2 (bufp, string1, length1, string2, length2, start, regs, stop)
283 struct re_pattern_buffer *bufp;
284 const char *string1, *string2;
285 int length1, length2, start, stop;
286 struct re_registers *regs;
288 return re_search_2_stub (bufp, string1, length1, string2, length2,
289 start, 0, regs, stop, 1);
291 #ifdef _LIBC
292 weak_alias (__re_match_2, re_match_2)
293 #endif
296 re_search_2 (bufp, string1, length1, string2, length2, start, range, regs, stop)
297 struct re_pattern_buffer *bufp;
298 const char *string1, *string2;
299 int length1, length2, start, range, stop;
300 struct re_registers *regs;
302 return re_search_2_stub (bufp, string1, length1, string2, length2,
303 start, range, regs, stop, 0);
305 #ifdef _LIBC
306 weak_alias (__re_search_2, re_search_2)
307 #endif
309 static int
310 re_search_2_stub (bufp, string1, length1, string2, length2, start, range, regs,
311 stop, ret_len)
312 struct re_pattern_buffer *bufp;
313 const char *string1, *string2;
314 int length1, length2, start, range, stop, ret_len;
315 struct re_registers *regs;
317 const char *str;
318 int rval;
319 int len = length1 + length2;
320 int free_str = 0;
322 if (BE (length1 < 0 || length2 < 0 || stop < 0, 0))
323 return -2;
325 /* Concatenate the strings. */
326 if (length2 > 0)
327 if (length1 > 0)
329 char *s = re_malloc (char, len);
331 if (BE (s == NULL, 0))
332 return -2;
333 memcpy (s, string1, length1);
334 memcpy (s + length1, string2, length2);
335 str = s;
336 free_str = 1;
338 else
339 str = string2;
340 else
341 str = string1;
343 rval = re_search_stub (bufp, str, len, start, range, stop, regs,
344 ret_len);
345 if (free_str)
346 re_free ((char *) str);
347 return rval;
350 /* The parameters have the same meaning as those of re_search.
351 Additional parameters:
352 If RET_LEN is nonzero the length of the match is returned (re_match style);
353 otherwise the position of the match is returned. */
355 static int
356 re_search_stub (bufp, string, length, start, range, stop, regs, ret_len)
357 struct re_pattern_buffer *bufp;
358 const char *string;
359 int length, start, range, stop, ret_len;
360 struct re_registers *regs;
362 reg_errcode_t result;
363 regmatch_t *pmatch;
364 int nregs, rval;
365 int eflags = 0;
367 /* Check for out-of-range. */
368 if (BE (start < 0 || start > length, 0))
369 return -1;
370 if (BE (start + range > length, 0))
371 range = length - start;
372 else if (BE (start + range < 0, 0))
373 range = -start;
375 eflags |= (bufp->not_bol) ? REG_NOTBOL : 0;
376 eflags |= (bufp->not_eol) ? REG_NOTEOL : 0;
378 /* Compile fastmap if we haven't yet. */
379 if (range > 0 && bufp->fastmap != NULL && !bufp->fastmap_accurate)
380 re_compile_fastmap (bufp);
382 if (BE (bufp->no_sub, 0))
383 regs = NULL;
385 /* We need at least 1 register. */
386 if (regs == NULL)
387 nregs = 1;
388 else if (BE (bufp->regs_allocated == REGS_FIXED &&
389 regs->num_regs < bufp->re_nsub + 1, 0))
391 nregs = regs->num_regs;
392 if (BE (nregs < 1, 0))
394 /* Nothing can be copied to regs. */
395 regs = NULL;
396 nregs = 1;
399 else
400 nregs = bufp->re_nsub + 1;
401 pmatch = re_malloc (regmatch_t, nregs);
402 if (BE (pmatch == NULL, 0))
403 return -2;
405 result = re_search_internal (bufp, string, length, start, range, stop,
406 nregs, pmatch, eflags);
408 rval = 0;
410 /* I hope we needn't fill ther regs with -1's when no match was found. */
411 if (result != REG_NOERROR)
412 rval = -1;
413 else if (regs != NULL)
415 /* If caller wants register contents data back, copy them. */
416 bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,
417 bufp->regs_allocated);
418 if (BE (bufp->regs_allocated == REGS_UNALLOCATED, 0))
419 rval = -2;
422 if (BE (rval == 0, 1))
424 if (ret_len)
426 assert (pmatch[0].rm_so == start);
427 rval = pmatch[0].rm_eo - start;
429 else
430 rval = pmatch[0].rm_so;
432 re_free (pmatch);
433 return rval;
436 static unsigned
437 re_copy_regs (regs, pmatch, nregs, regs_allocated)
438 struct re_registers *regs;
439 regmatch_t *pmatch;
440 int nregs, regs_allocated;
442 int rval = REGS_REALLOCATE;
443 int i;
444 int need_regs = nregs + 1;
445 /* We need one extra element beyond `num_regs' for the `-1' marker GNU code
446 uses. */
448 /* Have the register data arrays been allocated? */
449 if (regs_allocated == REGS_UNALLOCATED)
450 { /* No. So allocate them with malloc. */
451 regs->start = re_malloc (regoff_t, need_regs);
452 regs->end = re_malloc (regoff_t, need_regs);
453 if (BE (regs->start == NULL, 0) || BE (regs->end == NULL, 0))
454 return REGS_UNALLOCATED;
455 regs->num_regs = need_regs;
457 else if (regs_allocated == REGS_REALLOCATE)
458 { /* Yes. If we need more elements than were already
459 allocated, reallocate them. If we need fewer, just
460 leave it alone. */
461 if (BE (need_regs > regs->num_regs, 0))
463 regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs);
464 regoff_t *new_end = re_realloc (regs->end, regoff_t, need_regs);
465 if (BE (new_start == NULL, 0) || BE (new_end == NULL, 0))
466 return REGS_UNALLOCATED;
467 regs->start = new_start;
468 regs->end = new_end;
469 regs->num_regs = need_regs;
472 else
474 assert (regs_allocated == REGS_FIXED);
475 /* This function may not be called with REGS_FIXED and nregs too big. */
476 assert (regs->num_regs >= nregs);
477 rval = REGS_FIXED;
480 /* Copy the regs. */
481 for (i = 0; i < nregs; ++i)
483 regs->start[i] = pmatch[i].rm_so;
484 regs->end[i] = pmatch[i].rm_eo;
486 for ( ; i < regs->num_regs; ++i)
487 regs->start[i] = regs->end[i] = -1;
489 return rval;
492 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
493 ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use
494 this memory for recording register information. STARTS and ENDS
495 must be allocated using the malloc library routine, and must each
496 be at least NUM_REGS * sizeof (regoff_t) bytes long.
498 If NUM_REGS == 0, then subsequent matches should allocate their own
499 register data.
501 Unless this function is called, the first search or match using
502 PATTERN_BUFFER will allocate its own register data, without
503 freeing the old data. */
505 void
506 re_set_registers (bufp, regs, num_regs, starts, ends)
507 struct re_pattern_buffer *bufp;
508 struct re_registers *regs;
509 unsigned num_regs;
510 regoff_t *starts, *ends;
512 if (num_regs)
514 bufp->regs_allocated = REGS_REALLOCATE;
515 regs->num_regs = num_regs;
516 regs->start = starts;
517 regs->end = ends;
519 else
521 bufp->regs_allocated = REGS_UNALLOCATED;
522 regs->num_regs = 0;
523 regs->start = regs->end = (regoff_t *) 0;
526 #ifdef _LIBC
527 weak_alias (__re_set_registers, re_set_registers)
528 #endif
530 /* Entry points compatible with 4.2 BSD regex library. We don't define
531 them unless specifically requested. */
533 #if defined _REGEX_RE_COMP || defined _LIBC
535 # ifdef _LIBC
536 weak_function
537 # endif
538 re_exec (s)
539 const char *s;
541 return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);
543 #endif /* _REGEX_RE_COMP */
545 static re_node_set empty_set;
547 /* Internal entry point. */
549 /* Searches for a compiled pattern PREG in the string STRING, whose
550 length is LENGTH. NMATCH, PMATCH, and EFLAGS have the same
551 mingings with regexec. START, and RANGE have the same meanings
552 with re_search.
553 Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
554 otherwise return the error code.
555 Note: We assume front end functions already check ranges.
556 (START + RANGE >= 0 && START + RANGE <= LENGTH) */
558 static reg_errcode_t
559 re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch,
560 eflags)
561 const regex_t *preg;
562 const char *string;
563 int length, start, range, stop, eflags;
564 size_t nmatch;
565 regmatch_t pmatch[];
567 reg_errcode_t err;
568 re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
569 int left_lim, right_lim, incr;
570 int fl_longest_match, match_first, match_last = -1;
571 int fast_translate, sb;
572 #if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
573 re_match_context_t mctx = { .dfa = dfa };
574 #else
575 re_match_context_t mctx;
576 #endif
577 char *fastmap = ((preg->fastmap != NULL && preg->fastmap_accurate
578 && range && !preg->can_be_null) ? preg->fastmap : NULL);
580 #if !(defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L))
581 memset (&mctx, '\0', sizeof (re_match_context_t));
582 mctx.dfa = dfa;
583 #endif
585 /* Check if the DFA haven't been compiled. */
586 if (BE (preg->used == 0 || dfa->init_state == NULL
587 || dfa->init_state_word == NULL || dfa->init_state_nl == NULL
588 || dfa->init_state_begbuf == NULL, 0))
589 return REG_NOMATCH;
591 #ifdef DEBUG
592 /* We assume front-end functions already check them. */
593 assert (start + range >= 0 && start + range <= length);
594 #endif
596 /* If initial states with non-begbuf contexts have no elements,
597 the regex must be anchored. If preg->newline_anchor is set,
598 we'll never use init_state_nl, so do not check it. */
599 if (dfa->init_state->nodes.nelem == 0
600 && dfa->init_state_word->nodes.nelem == 0
601 && (dfa->init_state_nl->nodes.nelem == 0
602 || !preg->newline_anchor))
604 if (start != 0 && start + range != 0)
605 return REG_NOMATCH;
606 start = range = 0;
609 re_node_set_init_empty (&empty_set);
611 /* We must check the longest matching, if nmatch > 0. */
612 fl_longest_match = (nmatch != 0 || dfa->nbackref);
614 err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1,
615 preg->translate, preg->syntax & RE_ICASE, dfa);
616 if (BE (err != REG_NOERROR, 0))
617 goto free_return;
618 mctx.input.stop = stop;
619 mctx.input.raw_stop = stop;
620 mctx.input.newline_anchor = preg->newline_anchor;
622 err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2);
623 if (BE (err != REG_NOERROR, 0))
624 goto free_return;
626 /* We will log all the DFA states through which the dfa pass,
627 if nmatch > 1, or this dfa has "multibyte node", which is a
628 back-reference or a node which can accept multibyte character or
629 multi character collating element. */
630 if (nmatch > 1 || dfa->has_mb_node)
632 mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1);
633 if (BE (mctx.state_log == NULL, 0))
635 err = REG_ESPACE;
636 goto free_return;
639 else
640 mctx.state_log = NULL;
642 match_first = start;
643 mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
644 : CONTEXT_NEWLINE | CONTEXT_BEGBUF;
646 /* Check incrementally whether of not the input string match. */
647 incr = (range < 0) ? -1 : 1;
648 left_lim = (range < 0) ? start + range : start;
649 right_lim = (range < 0) ? start : start + range;
650 sb = dfa->mb_cur_max == 1;
651 fast_translate = sb || !(preg->syntax & RE_ICASE || preg->translate);
653 for (;;)
655 /* At first get the current byte from input string. */
656 if (fastmap)
658 if (BE (fast_translate, 1))
660 unsigned RE_TRANSLATE_TYPE t
661 = (unsigned RE_TRANSLATE_TYPE) preg->translate;
662 if (BE (range >= 0, 1))
664 if (BE (t != NULL, 0))
666 while (BE (match_first < right_lim, 1)
667 && !fastmap[t[(unsigned char) string[match_first]]])
668 ++match_first;
670 else
672 while (BE (match_first < right_lim, 1)
673 && !fastmap[(unsigned char) string[match_first]])
674 ++match_first;
676 if (BE (match_first == right_lim, 0))
678 int ch = match_first >= length
679 ? 0 : (unsigned char) string[match_first];
680 if (!fastmap[t ? t[ch] : ch])
681 break;
684 else
686 while (match_first >= left_lim)
688 int ch = match_first >= length
689 ? 0 : (unsigned char) string[match_first];
690 if (fastmap[t ? t[ch] : ch])
691 break;
692 --match_first;
694 if (match_first < left_lim)
695 break;
698 else
700 int ch;
704 /* In this case, we can't determine easily the current byte,
705 since it might be a component byte of a multibyte
706 character. Then we use the constructed buffer
707 instead. */
708 /* If MATCH_FIRST is out of the valid range, reconstruct the
709 buffers. */
710 if (mctx.input.raw_mbs_idx + mctx.input.valid_raw_len
711 <= match_first
712 || match_first < mctx.input.raw_mbs_idx)
714 err = re_string_reconstruct (&mctx.input, match_first,
715 eflags);
716 if (BE (err != REG_NOERROR, 0))
717 goto free_return;
719 /* If MATCH_FIRST is out of the buffer, leave it as '\0'.
720 Note that MATCH_FIRST must not be smaller than 0. */
721 ch = ((match_first >= length) ? 0
722 : re_string_byte_at (&mctx.input,
723 match_first
724 - mctx.input.raw_mbs_idx));
725 if (fastmap[ch])
726 break;
727 match_first += incr;
729 while (match_first >= left_lim && match_first <= right_lim);
730 if (! fastmap[ch])
731 break;
735 /* Reconstruct the buffers so that the matcher can assume that
736 the matching starts from the beginning of the buffer. */
737 err = re_string_reconstruct (&mctx.input, match_first, eflags);
738 if (BE (err != REG_NOERROR, 0))
739 goto free_return;
740 #ifdef RE_ENABLE_I18N
741 /* Eliminate it when it is a component of a multibyte character
742 and isn't the head of a multibyte character. */
743 if (sb || re_string_first_byte (&mctx.input, 0))
744 #endif
746 /* It seems to be appropriate one, then use the matcher. */
747 /* We assume that the matching starts from 0. */
748 mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
749 match_last = check_matching (&mctx, fl_longest_match,
750 range >= 0 ? &match_first : NULL);
751 if (match_last != -1)
753 if (BE (match_last == -2, 0))
755 err = REG_ESPACE;
756 goto free_return;
758 else
760 mctx.match_last = match_last;
761 if ((!preg->no_sub && nmatch > 1) || dfa->nbackref)
763 re_dfastate_t *pstate = mctx.state_log[match_last];
764 mctx.last_node = check_halt_state_context (&mctx, pstate,
765 match_last);
767 if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match)
768 || dfa->nbackref)
770 err = prune_impossible_nodes (&mctx);
771 if (err == REG_NOERROR)
772 break;
773 if (BE (err != REG_NOMATCH, 0))
774 goto free_return;
775 match_last = -1;
777 else
778 break; /* We found a match. */
781 match_ctx_clean (&mctx);
783 /* Update counter. */
784 match_first += incr;
785 if (match_first < left_lim || right_lim < match_first)
786 break;
789 /* Set pmatch[] if we need. */
790 if (match_last != -1 && nmatch > 0)
792 int reg_idx;
794 /* Initialize registers. */
795 for (reg_idx = 1; reg_idx < nmatch; ++reg_idx)
796 pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;
798 /* Set the points where matching start/end. */
799 pmatch[0].rm_so = 0;
800 pmatch[0].rm_eo = mctx.match_last;
802 if (!preg->no_sub && nmatch > 1)
804 err = set_regs (preg, &mctx, nmatch, pmatch,
805 dfa->has_plural_match && dfa->nbackref > 0);
806 if (BE (err != REG_NOERROR, 0))
807 goto free_return;
810 /* At last, add the offset to the each registers, since we slided
811 the buffers so that we could assume that the matching starts
812 from 0. */
813 for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
814 if (pmatch[reg_idx].rm_so != -1)
816 #ifdef RE_ENABLE_I18N
817 if (BE (mctx.input.offsets_needed != 0, 0))
819 if (pmatch[reg_idx].rm_so == mctx.input.valid_len)
820 pmatch[reg_idx].rm_so += mctx.input.valid_raw_len - mctx.input.valid_len;
821 else
822 pmatch[reg_idx].rm_so = mctx.input.offsets[pmatch[reg_idx].rm_so];
823 if (pmatch[reg_idx].rm_eo == mctx.input.valid_len)
824 pmatch[reg_idx].rm_eo += mctx.input.valid_raw_len - mctx.input.valid_len;
825 else
826 pmatch[reg_idx].rm_eo = mctx.input.offsets[pmatch[reg_idx].rm_eo];
828 #else
829 assert (mctx.input.offsets_needed == 0);
830 #endif
831 pmatch[reg_idx].rm_so += match_first;
832 pmatch[reg_idx].rm_eo += match_first;
835 err = (match_last == -1) ? REG_NOMATCH : REG_NOERROR;
836 free_return:
837 re_free (mctx.state_log);
838 if (dfa->nbackref)
839 match_ctx_free (&mctx);
840 re_string_destruct (&mctx.input);
841 return err;
844 static reg_errcode_t
845 prune_impossible_nodes (mctx)
846 re_match_context_t *mctx;
848 re_dfa_t *const dfa = mctx->dfa;
849 int halt_node, match_last;
850 reg_errcode_t ret;
851 re_dfastate_t **sifted_states;
852 re_dfastate_t **lim_states = NULL;
853 re_sift_context_t sctx;
854 #ifdef DEBUG
855 assert (mctx->state_log != NULL);
856 #endif
857 match_last = mctx->match_last;
858 halt_node = mctx->last_node;
859 sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
860 if (BE (sifted_states == NULL, 0))
862 ret = REG_ESPACE;
863 goto free_return;
865 if (dfa->nbackref)
867 lim_states = re_malloc (re_dfastate_t *, match_last + 1);
868 if (BE (lim_states == NULL, 0))
870 ret = REG_ESPACE;
871 goto free_return;
873 while (1)
875 memset (lim_states, '\0',
876 sizeof (re_dfastate_t *) * (match_last + 1));
877 match_ctx_clear_flag (mctx);
878 sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
879 match_last, 0);
880 ret = sift_states_backward (mctx, &sctx);
881 re_node_set_free (&sctx.limits);
882 if (BE (ret != REG_NOERROR, 0))
883 goto free_return;
884 if (sifted_states[0] != NULL || lim_states[0] != NULL)
885 break;
888 --match_last;
889 if (match_last < 0)
891 ret = REG_NOMATCH;
892 goto free_return;
894 } while (mctx->state_log[match_last] == NULL
895 || !mctx->state_log[match_last]->halt);
896 halt_node = check_halt_state_context (mctx,
897 mctx->state_log[match_last],
898 match_last);
900 ret = merge_state_array (dfa, sifted_states, lim_states,
901 match_last + 1);
902 re_free (lim_states);
903 lim_states = NULL;
904 if (BE (ret != REG_NOERROR, 0))
905 goto free_return;
907 else
909 sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
910 match_last, 0);
911 ret = sift_states_backward (mctx, &sctx);
912 re_node_set_free (&sctx.limits);
913 if (BE (ret != REG_NOERROR, 0))
914 goto free_return;
916 re_free (mctx->state_log);
917 mctx->state_log = sifted_states;
918 sifted_states = NULL;
919 mctx->last_node = halt_node;
920 mctx->match_last = match_last;
921 ret = REG_NOERROR;
922 free_return:
923 re_free (sifted_states);
924 re_free (lim_states);
925 return ret;
928 /* Acquire an initial state and return it.
929 We must select appropriate initial state depending on the context,
930 since initial states may have constraints like "\<", "^", etc.. */
932 static inline re_dfastate_t *
933 acquire_init_state_context (err, mctx, idx)
934 reg_errcode_t *err;
935 const re_match_context_t *mctx;
936 int idx;
938 re_dfa_t *const dfa = mctx->dfa;
939 *err = REG_NOERROR;
940 if (dfa->init_state->has_constraint)
942 unsigned int context;
943 context = re_string_context_at (&mctx->input, idx - 1, mctx->eflags);
944 if (IS_WORD_CONTEXT (context))
945 return dfa->init_state_word;
946 else if (IS_ORDINARY_CONTEXT (context))
947 return dfa->init_state;
948 else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
949 return dfa->init_state_begbuf;
950 else if (IS_NEWLINE_CONTEXT (context))
951 return dfa->init_state_nl;
952 else if (IS_BEGBUF_CONTEXT (context))
954 /* It is relatively rare case, then calculate on demand. */
955 return re_acquire_state_context (err, dfa,
956 dfa->init_state->entrance_nodes,
957 context);
959 else
960 /* Must not happen? */
961 return dfa->init_state;
963 else
964 return dfa->init_state;
967 /* Check whether the regular expression match input string INPUT or not,
968 and return the index where the matching end, return -1 if not match,
969 or return -2 in case of an error.
970 FL_LONGEST_MATCH means we want the POSIX longest matching.
971 If P_MATCH_FIRST is not NULL, and the match fails, it is set to the
972 next place where we may want to try matching.
973 Note that the matcher assume that the maching starts from the current
974 index of the buffer. */
976 static int
977 check_matching (mctx, fl_longest_match, p_match_first)
978 re_match_context_t *mctx;
979 int fl_longest_match;
980 int *p_match_first;
982 re_dfa_t *const dfa = mctx->dfa;
983 reg_errcode_t err;
984 int match = 0;
985 int match_last = -1;
986 int cur_str_idx = re_string_cur_idx (&mctx->input);
987 re_dfastate_t *cur_state;
988 int at_init_state = p_match_first != NULL, skipped = 0;
990 cur_state = acquire_init_state_context (&err, mctx, cur_str_idx);
991 /* An initial state must not be NULL(invalid state). */
992 if (BE (cur_state == NULL, 0))
993 return -2;
994 if (mctx->state_log != NULL)
995 mctx->state_log[cur_str_idx] = cur_state;
997 /* Check OP_OPEN_SUBEXP in the initial state in case that we use them
998 later. E.g. Processing back references. */
999 if (BE (dfa->nbackref, 0))
1001 at_init_state = 0;
1002 err = check_subexp_matching_top (mctx, &cur_state->nodes, 0);
1003 if (BE (err != REG_NOERROR, 0))
1004 return err;
1006 if (cur_state->has_backref)
1008 err = transit_state_bkref (mctx, &cur_state->nodes);
1009 if (BE (err != REG_NOERROR, 0))
1010 return err;
1014 /* If the RE accepts NULL string. */
1015 if (BE (cur_state->halt, 0))
1017 if (!cur_state->has_constraint
1018 || check_halt_state_context (mctx, cur_state, cur_str_idx))
1020 if (!fl_longest_match)
1021 return cur_str_idx;
1022 else
1024 match_last = cur_str_idx;
1025 match = 1;
1030 while (!re_string_eoi (&mctx->input))
1032 re_dfastate_t *old_state = cur_state;
1033 cur_state = transit_state (&err, mctx, cur_state);
1034 if (at_init_state)
1036 if (old_state == cur_state)
1037 skipped++;
1038 else
1039 at_init_state = 0;
1042 if (cur_state == NULL) /* Reached at the invalid state or an error. */
1044 cur_str_idx = re_string_cur_idx (&mctx->input);
1045 if (BE (err != REG_NOERROR, 0))
1046 return -2;
1047 if (!fl_longest_match && match)
1048 break;
1049 else
1051 if (mctx->state_log == NULL)
1052 break;
1053 else
1055 int max = mctx->state_log_top;
1056 for (; cur_str_idx <= max; ++cur_str_idx)
1057 if (mctx->state_log[cur_str_idx] != NULL)
1058 break;
1059 if (cur_str_idx > max)
1060 break;
1065 if (cur_state != NULL && cur_state->halt)
1067 /* Reached at a halt state.
1068 Check the halt state can satisfy the current context. */
1069 if (!cur_state->has_constraint
1070 || check_halt_state_context (mctx, cur_state,
1071 re_string_cur_idx (&mctx->input)))
1073 /* We found an appropriate halt state. */
1074 match_last = re_string_cur_idx (&mctx->input);
1075 match = 1;
1076 if (!fl_longest_match)
1077 break;
1082 if (match_last == -1 && skipped)
1083 *p_match_first += skipped;
1085 return match_last;
1088 /* Check NODE match the current context. */
1090 static int check_halt_node_context (dfa, node, context)
1091 const re_dfa_t *dfa;
1092 int node;
1093 unsigned int context;
1095 re_token_type_t type = dfa->nodes[node].type;
1096 unsigned int constraint = dfa->nodes[node].constraint;
1097 if (type != END_OF_RE)
1098 return 0;
1099 if (!constraint)
1100 return 1;
1101 if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
1102 return 0;
1103 return 1;
1106 /* Check the halt state STATE match the current context.
1107 Return 0 if not match, if the node, STATE has, is a halt node and
1108 match the context, return the node. */
1110 static int
1111 check_halt_state_context (mctx, state, idx)
1112 const re_match_context_t *mctx;
1113 const re_dfastate_t *state;
1114 int idx;
1116 int i;
1117 unsigned int context;
1118 #ifdef DEBUG
1119 assert (state->halt);
1120 #endif
1121 context = re_string_context_at (&mctx->input, idx, mctx->eflags);
1122 for (i = 0; i < state->nodes.nelem; ++i)
1123 if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context))
1124 return state->nodes.elems[i];
1125 return 0;
1128 /* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
1129 corresponding to the DFA).
1130 Return the destination node, and update EPS_VIA_NODES, return -1 in case
1131 of errors. */
1133 static int
1134 proceed_next_node (mctx, nregs, regs, pidx, node, eps_via_nodes, fs)
1135 const re_match_context_t *mctx;
1136 regmatch_t *regs;
1137 int nregs, *pidx, node;
1138 re_node_set *eps_via_nodes;
1139 struct re_fail_stack_t *fs;
1141 re_dfa_t *const dfa = mctx->dfa;
1142 int i, err, dest_node;
1143 dest_node = -1;
1144 if (IS_EPSILON_NODE (dfa->nodes[node].type))
1146 re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;
1147 int ndest, dest_nodes[2];
1148 err = re_node_set_insert (eps_via_nodes, node);
1149 if (BE (err < 0, 0))
1150 return -2;
1151 /* Pick up valid destinations. */
1152 for (ndest = 0, i = 0; i < dfa->edests[node].nelem; ++i)
1154 int candidate = dfa->edests[node].elems[i];
1155 if (!re_node_set_contains (cur_nodes, candidate))
1156 continue;
1157 dest_nodes[0] = (ndest == 0) ? candidate : dest_nodes[0];
1158 dest_nodes[1] = (ndest == 1) ? candidate : dest_nodes[1];
1159 ++ndest;
1161 if (ndest <= 1)
1162 return ndest == 0 ? -1 : (ndest == 1 ? dest_nodes[0] : 0);
1163 /* In order to avoid infinite loop like "(a*)*". */
1164 if (re_node_set_contains (eps_via_nodes, dest_nodes[0]))
1165 return dest_nodes[1];
1166 if (fs != NULL
1167 && push_fail_stack (fs, *pidx, dest_nodes, nregs, regs,
1168 eps_via_nodes))
1169 return -2;
1170 return dest_nodes[0];
1172 else
1174 int naccepted = 0;
1175 re_token_type_t type = dfa->nodes[node].type;
1177 #ifdef RE_ENABLE_I18N
1178 if (ACCEPT_MB_NODE (type))
1179 naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx);
1180 else
1181 #endif /* RE_ENABLE_I18N */
1182 if (type == OP_BACK_REF)
1184 int subexp_idx = dfa->nodes[node].opr.idx;
1185 naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
1186 if (fs != NULL)
1188 if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1)
1189 return -1;
1190 else if (naccepted)
1192 char *buf = (char *) re_string_get_buffer (&mctx->input);
1193 if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
1194 naccepted) != 0)
1195 return -1;
1199 if (naccepted == 0)
1201 err = re_node_set_insert (eps_via_nodes, node);
1202 if (BE (err < 0, 0))
1203 return -2;
1204 dest_node = dfa->edests[node].elems[0];
1205 if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1206 dest_node))
1207 return dest_node;
1211 if (naccepted != 0
1212 || check_node_accept (mctx, dfa->nodes + node, *pidx))
1214 dest_node = dfa->nexts[node];
1215 *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted;
1216 if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL
1217 || !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1218 dest_node)))
1219 return -1;
1220 re_node_set_empty (eps_via_nodes);
1221 return dest_node;
1224 return -1;
1227 static reg_errcode_t
1228 push_fail_stack (fs, str_idx, dests, nregs, regs, eps_via_nodes)
1229 struct re_fail_stack_t *fs;
1230 int str_idx, *dests, nregs;
1231 regmatch_t *regs;
1232 re_node_set *eps_via_nodes;
1234 reg_errcode_t err;
1235 int num = fs->num++;
1236 if (fs->num == fs->alloc)
1238 struct re_fail_stack_ent_t *new_array;
1239 new_array = realloc (fs->stack, (sizeof (struct re_fail_stack_ent_t)
1240 * fs->alloc * 2));
1241 if (new_array == NULL)
1242 return REG_ESPACE;
1243 fs->alloc *= 2;
1244 fs->stack = new_array;
1246 fs->stack[num].idx = str_idx;
1247 fs->stack[num].node = dests[1];
1248 fs->stack[num].regs = re_malloc (regmatch_t, nregs);
1249 if (fs->stack[num].regs == NULL)
1250 return REG_ESPACE;
1251 memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
1252 err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
1253 return err;
1256 static int
1257 pop_fail_stack (fs, pidx, nregs, regs, eps_via_nodes)
1258 struct re_fail_stack_t *fs;
1259 int *pidx, nregs;
1260 regmatch_t *regs;
1261 re_node_set *eps_via_nodes;
1263 int num = --fs->num;
1264 assert (num >= 0);
1265 *pidx = fs->stack[num].idx;
1266 memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
1267 re_node_set_free (eps_via_nodes);
1268 re_free (fs->stack[num].regs);
1269 *eps_via_nodes = fs->stack[num].eps_via_nodes;
1270 return fs->stack[num].node;
1273 /* Set the positions where the subexpressions are starts/ends to registers
1274 PMATCH.
1275 Note: We assume that pmatch[0] is already set, and
1276 pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch. */
1278 static reg_errcode_t
1279 set_regs (preg, mctx, nmatch, pmatch, fl_backtrack)
1280 const regex_t *preg;
1281 const re_match_context_t *mctx;
1282 size_t nmatch;
1283 regmatch_t *pmatch;
1284 int fl_backtrack;
1286 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
1287 int idx, cur_node, real_nmatch;
1288 re_node_set eps_via_nodes;
1289 struct re_fail_stack_t *fs;
1290 struct re_fail_stack_t fs_body = { 0, 2, NULL };
1291 regmatch_t *prev_idx_match;
1293 #ifdef DEBUG
1294 assert (nmatch > 1);
1295 assert (mctx->state_log != NULL);
1296 #endif
1297 if (fl_backtrack)
1299 fs = &fs_body;
1300 fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc);
1301 if (fs->stack == NULL)
1302 return REG_ESPACE;
1304 else
1305 fs = NULL;
1307 cur_node = dfa->init_node;
1308 real_nmatch = (nmatch <= preg->re_nsub) ? nmatch : preg->re_nsub + 1;
1309 re_node_set_init_empty (&eps_via_nodes);
1311 prev_idx_match = (regmatch_t *) alloca (sizeof (regmatch_t) * real_nmatch);
1312 memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * real_nmatch);
1314 for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
1316 update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, real_nmatch);
1318 if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
1320 int reg_idx;
1321 if (fs)
1323 for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
1324 if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)
1325 break;
1326 if (reg_idx == nmatch)
1328 re_node_set_free (&eps_via_nodes);
1329 return free_fail_stack_return (fs);
1331 cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1332 &eps_via_nodes);
1334 else
1336 re_node_set_free (&eps_via_nodes);
1337 return REG_NOERROR;
1341 /* Proceed to next node. */
1342 cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node,
1343 &eps_via_nodes, fs);
1345 if (BE (cur_node < 0, 0))
1347 if (BE (cur_node == -2, 0))
1349 re_node_set_free (&eps_via_nodes);
1350 free_fail_stack_return (fs);
1351 return REG_ESPACE;
1353 if (fs)
1354 cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1355 &eps_via_nodes);
1356 else
1358 re_node_set_free (&eps_via_nodes);
1359 return REG_NOMATCH;
1363 re_node_set_free (&eps_via_nodes);
1364 return free_fail_stack_return (fs);
1367 static reg_errcode_t
1368 free_fail_stack_return (fs)
1369 struct re_fail_stack_t *fs;
1371 if (fs)
1373 int fs_idx;
1374 for (fs_idx = 0; fs_idx < fs->num; ++fs_idx)
1376 re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
1377 re_free (fs->stack[fs_idx].regs);
1379 re_free (fs->stack);
1381 return REG_NOERROR;
1384 static void
1385 update_regs (dfa, pmatch, prev_idx_match, cur_node, cur_idx, nmatch)
1386 re_dfa_t *dfa;
1387 regmatch_t *pmatch, *prev_idx_match;
1388 int cur_node, cur_idx, nmatch;
1390 int type = dfa->nodes[cur_node].type;
1391 if (type == OP_OPEN_SUBEXP)
1393 int reg_num = dfa->nodes[cur_node].opr.idx + 1;
1395 /* We are at the first node of this sub expression. */
1396 if (reg_num < nmatch)
1398 pmatch[reg_num].rm_so = cur_idx;
1399 pmatch[reg_num].rm_eo = -1;
1402 else if (type == OP_CLOSE_SUBEXP)
1404 int reg_num = dfa->nodes[cur_node].opr.idx + 1;
1405 if (reg_num < nmatch)
1407 /* We are at the last node of this sub expression. */
1408 if (pmatch[reg_num].rm_so < cur_idx)
1410 pmatch[reg_num].rm_eo = cur_idx;
1411 /* This is a non-empty match or we are not inside an optional
1412 subexpression. Accept this right away. */
1413 memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1415 else
1417 if (dfa->nodes[cur_node].opt_subexp
1418 && prev_idx_match[reg_num].rm_so != -1)
1419 /* We transited through an empty match for an optional
1420 subexpression, like (a?)*, and this is not the subexp's
1421 first match. Copy back the old content of the registers
1422 so that matches of an inner subexpression are undone as
1423 well, like in ((a?))*. */
1424 memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch);
1425 else
1426 /* We completed a subexpression, but it may be part of
1427 an optional one, so do not update PREV_IDX_MATCH. */
1428 pmatch[reg_num].rm_eo = cur_idx;
1434 /* This function checks the STATE_LOG from the SCTX->last_str_idx to 0
1435 and sift the nodes in each states according to the following rules.
1436 Updated state_log will be wrote to STATE_LOG.
1438 Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if...
1439 1. When STR_IDX == MATCH_LAST(the last index in the state_log):
1440 If `a' isn't the LAST_NODE and `a' can't epsilon transit to
1441 the LAST_NODE, we throw away the node `a'.
1442 2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts
1443 string `s' and transit to `b':
1444 i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
1445 away the node `a'.
1446 ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
1447 thrown away, we throw away the node `a'.
1448 3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b':
1449 i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
1450 node `a'.
1451 ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away,
1452 we throw away the node `a'. */
1454 #define STATE_NODE_CONTAINS(state,node) \
1455 ((state) != NULL && re_node_set_contains (&(state)->nodes, node))
1457 static reg_errcode_t
1458 sift_states_backward (mctx, sctx)
1459 re_match_context_t *mctx;
1460 re_sift_context_t *sctx;
1462 re_dfa_t *const dfa = mctx->dfa;
1463 reg_errcode_t err;
1464 int null_cnt = 0;
1465 int str_idx = sctx->last_str_idx;
1466 re_node_set cur_dest;
1467 re_node_set *cur_src; /* Points the state_log[str_idx]->nodes */
1469 #ifdef DEBUG
1470 assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
1471 #endif
1472 cur_src = &mctx->state_log[str_idx]->nodes;
1474 /* Build sifted state_log[str_idx]. It has the nodes which can epsilon
1475 transit to the last_node and the last_node itself. */
1476 err = re_node_set_init_1 (&cur_dest, sctx->last_node);
1477 if (BE (err != REG_NOERROR, 0))
1478 return err;
1479 err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1480 if (BE (err != REG_NOERROR, 0))
1481 goto free_return;
1483 /* Then check each states in the state_log. */
1484 while (str_idx > 0)
1486 int i, ret;
1487 /* Update counters. */
1488 null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0;
1489 if (null_cnt > mctx->max_mb_elem_len)
1491 memset (sctx->sifted_states, '\0',
1492 sizeof (re_dfastate_t *) * str_idx);
1493 re_node_set_free (&cur_dest);
1494 return REG_NOERROR;
1496 re_node_set_empty (&cur_dest);
1497 --str_idx;
1498 cur_src = ((mctx->state_log[str_idx] == NULL) ? &empty_set
1499 : &mctx->state_log[str_idx]->nodes);
1501 /* Then build the next sifted state.
1502 We build the next sifted state on `cur_dest', and update
1503 `sifted_states[str_idx]' with `cur_dest'.
1504 Note:
1505 `cur_dest' is the sifted state from `state_log[str_idx + 1]'.
1506 `cur_src' points the node_set of the old `state_log[str_idx]'. */
1507 for (i = 0; i < cur_src->nelem; i++)
1509 int prev_node = cur_src->elems[i];
1510 int naccepted = 0;
1511 re_token_type_t type = dfa->nodes[prev_node].type;
1513 if (IS_EPSILON_NODE (type))
1514 continue;
1515 #ifdef RE_ENABLE_I18N
1516 /* If the node may accept `multi byte'. */
1517 if (ACCEPT_MB_NODE (type))
1518 naccepted = sift_states_iter_mb (mctx, sctx, prev_node,
1519 str_idx, sctx->last_str_idx);
1521 #endif /* RE_ENABLE_I18N */
1522 /* We don't check backreferences here.
1523 See update_cur_sifted_state(). */
1525 if (!naccepted
1526 && check_node_accept (mctx, dfa->nodes + prev_node, str_idx)
1527 && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1],
1528 dfa->nexts[prev_node]))
1529 naccepted = 1;
1531 if (naccepted == 0)
1532 continue;
1534 if (sctx->limits.nelem)
1536 int to_idx = str_idx + naccepted;
1537 if (check_dst_limits (mctx, &sctx->limits,
1538 dfa->nexts[prev_node], to_idx,
1539 prev_node, str_idx))
1540 continue;
1542 ret = re_node_set_insert (&cur_dest, prev_node);
1543 if (BE (ret == -1, 0))
1545 err = REG_ESPACE;
1546 goto free_return;
1550 /* Add all the nodes which satisfy the following conditions:
1551 - It can epsilon transit to a node in CUR_DEST.
1552 - It is in CUR_SRC.
1553 And update state_log. */
1554 err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1555 if (BE (err != REG_NOERROR, 0))
1556 goto free_return;
1558 err = REG_NOERROR;
1559 free_return:
1560 re_node_set_free (&cur_dest);
1561 return err;
1564 /* Helper functions. */
1566 static reg_errcode_t
1567 clean_state_log_if_needed (mctx, next_state_log_idx)
1568 re_match_context_t *mctx;
1569 int next_state_log_idx;
1571 int top = mctx->state_log_top;
1573 if (next_state_log_idx >= mctx->input.bufs_len
1574 || (next_state_log_idx >= mctx->input.valid_len
1575 && mctx->input.valid_len < mctx->input.len))
1577 reg_errcode_t err;
1578 err = extend_buffers (mctx);
1579 if (BE (err != REG_NOERROR, 0))
1580 return err;
1583 if (top < next_state_log_idx)
1585 memset (mctx->state_log + top + 1, '\0',
1586 sizeof (re_dfastate_t *) * (next_state_log_idx - top));
1587 mctx->state_log_top = next_state_log_idx;
1589 return REG_NOERROR;
1592 static reg_errcode_t
1593 merge_state_array (dfa, dst, src, num)
1594 re_dfa_t *dfa;
1595 re_dfastate_t **dst;
1596 re_dfastate_t **src;
1597 int num;
1599 int st_idx;
1600 reg_errcode_t err;
1601 for (st_idx = 0; st_idx < num; ++st_idx)
1603 if (dst[st_idx] == NULL)
1604 dst[st_idx] = src[st_idx];
1605 else if (src[st_idx] != NULL)
1607 re_node_set merged_set;
1608 err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,
1609 &src[st_idx]->nodes);
1610 if (BE (err != REG_NOERROR, 0))
1611 return err;
1612 dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);
1613 re_node_set_free (&merged_set);
1614 if (BE (err != REG_NOERROR, 0))
1615 return err;
1618 return REG_NOERROR;
1621 static reg_errcode_t
1622 update_cur_sifted_state (mctx, sctx, str_idx, dest_nodes)
1623 re_match_context_t *mctx;
1624 re_sift_context_t *sctx;
1625 int str_idx;
1626 re_node_set *dest_nodes;
1628 re_dfa_t *const dfa = mctx->dfa;
1629 reg_errcode_t err;
1630 const re_node_set *candidates;
1631 candidates = ((mctx->state_log[str_idx] == NULL) ? &empty_set
1632 : &mctx->state_log[str_idx]->nodes);
1634 /* At first, add the nodes which can epsilon transit to a node in
1635 DEST_NODE. */
1636 if (dest_nodes->nelem)
1638 err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
1639 if (BE (err != REG_NOERROR, 0))
1640 return err;
1643 /* Then, check the limitations in the current sift_context. */
1644 if (dest_nodes->nelem && sctx->limits.nelem)
1646 err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
1647 mctx->bkref_ents, str_idx);
1648 if (BE (err != REG_NOERROR, 0))
1649 return err;
1652 /* Update state_log. */
1653 sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
1654 if (BE (sctx->sifted_states[str_idx] == NULL && err != REG_NOERROR, 0))
1655 return err;
1657 if ((mctx->state_log[str_idx] != NULL
1658 && mctx->state_log[str_idx]->has_backref))
1660 err = sift_states_bkref (mctx, sctx, str_idx, dest_nodes);
1661 if (BE (err != REG_NOERROR, 0))
1662 return err;
1664 return REG_NOERROR;
1667 static reg_errcode_t
1668 add_epsilon_src_nodes (dfa, dest_nodes, candidates)
1669 re_dfa_t *dfa;
1670 re_node_set *dest_nodes;
1671 const re_node_set *candidates;
1673 reg_errcode_t err;
1674 int src_idx;
1675 re_node_set src_copy;
1677 err = re_node_set_init_copy (&src_copy, dest_nodes);
1678 if (BE (err != REG_NOERROR, 0))
1679 return err;
1680 for (src_idx = 0; src_idx < src_copy.nelem; ++src_idx)
1682 err = re_node_set_add_intersect (dest_nodes, candidates,
1683 dfa->inveclosures
1684 + src_copy.elems[src_idx]);
1685 if (BE (err != REG_NOERROR, 0))
1687 re_node_set_free (&src_copy);
1688 return err;
1691 re_node_set_free (&src_copy);
1692 return REG_NOERROR;
1695 static reg_errcode_t
1696 sub_epsilon_src_nodes (dfa, node, dest_nodes, candidates)
1697 re_dfa_t *dfa;
1698 int node;
1699 re_node_set *dest_nodes;
1700 const re_node_set *candidates;
1702 int ecl_idx;
1703 reg_errcode_t err;
1704 re_node_set *inv_eclosure = dfa->inveclosures + node;
1705 re_node_set except_nodes;
1706 re_node_set_init_empty (&except_nodes);
1707 for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1709 int cur_node = inv_eclosure->elems[ecl_idx];
1710 if (cur_node == node)
1711 continue;
1712 if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
1714 int edst1 = dfa->edests[cur_node].elems[0];
1715 int edst2 = ((dfa->edests[cur_node].nelem > 1)
1716 ? dfa->edests[cur_node].elems[1] : -1);
1717 if ((!re_node_set_contains (inv_eclosure, edst1)
1718 && re_node_set_contains (dest_nodes, edst1))
1719 || (edst2 > 0
1720 && !re_node_set_contains (inv_eclosure, edst2)
1721 && re_node_set_contains (dest_nodes, edst2)))
1723 err = re_node_set_add_intersect (&except_nodes, candidates,
1724 dfa->inveclosures + cur_node);
1725 if (BE (err != REG_NOERROR, 0))
1727 re_node_set_free (&except_nodes);
1728 return err;
1733 for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1735 int cur_node = inv_eclosure->elems[ecl_idx];
1736 if (!re_node_set_contains (&except_nodes, cur_node))
1738 int idx = re_node_set_contains (dest_nodes, cur_node) - 1;
1739 re_node_set_remove_at (dest_nodes, idx);
1742 re_node_set_free (&except_nodes);
1743 return REG_NOERROR;
1746 static int
1747 check_dst_limits (mctx, limits, dst_node, dst_idx, src_node, src_idx)
1748 re_match_context_t *mctx;
1749 re_node_set *limits;
1750 int dst_node, dst_idx, src_node, src_idx;
1752 re_dfa_t *const dfa = mctx->dfa;
1753 int lim_idx, src_pos, dst_pos;
1755 for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
1757 int subexp_idx;
1758 struct re_backref_cache_entry *ent;
1759 ent = mctx->bkref_ents + limits->elems[lim_idx];
1760 subexp_idx = dfa->nodes[ent->node].opr.idx - 1;
1762 dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1763 dfa->eclosures + dst_node,
1764 subexp_idx, dst_node, dst_idx);
1765 src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1766 dfa->eclosures + src_node,
1767 subexp_idx, src_node, src_idx);
1769 /* In case of:
1770 <src> <dst> ( <subexp> )
1771 ( <subexp> ) <src> <dst>
1772 ( <subexp1> <src> <subexp2> <dst> <subexp3> ) */
1773 if (src_pos == dst_pos)
1774 continue; /* This is unrelated limitation. */
1775 else
1776 return 1;
1778 return 0;
1781 static int
1782 check_dst_limits_calc_pos (mctx, limit, eclosures, subexp_idx, from_node,
1783 str_idx)
1784 re_match_context_t *mctx;
1785 re_node_set *eclosures;
1786 int limit, subexp_idx, from_node, str_idx;
1788 re_dfa_t *const dfa = mctx->dfa;
1789 struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
1790 int node_idx;
1792 /* If we are outside the range of the subexpression, return -1 or 1. */
1793 if (str_idx < lim->subexp_from)
1794 return -1;
1796 if (lim->subexp_to < str_idx)
1797 return 1;
1799 /* If we are within the subexpression, return 0. */
1800 if (str_idx != lim->subexp_from && str_idx != lim->subexp_to)
1801 return 0;
1803 /* Else, we are on the boundary: examine the nodes on the epsilon
1804 closure. */
1805 for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)
1807 int node = eclosures->elems[node_idx];
1808 switch (dfa->nodes[node].type)
1810 case OP_BACK_REF:
1812 int bi = search_cur_bkref_entry (mctx, str_idx);
1813 for (; bi < mctx->nbkref_ents; ++bi)
1815 struct re_backref_cache_entry *ent = mctx->bkref_ents + bi;
1816 int dst, cpos;
1818 /* If this backreference goes beyond the point we're
1819 examining, don't go any further. */
1820 if (ent->str_idx > str_idx)
1821 break;
1823 if (ent->node != node || ent->subexp_from != ent->subexp_to)
1824 continue;
1826 /* Recurse trying to reach the OP_OPEN_SUBEXP and
1827 OP_CLOSE_SUBEXP cases below. But, if the
1828 destination node is the same node as the source
1829 node, don't recurse because it would cause an
1830 infinite loop: a regex that exhibits this behavior
1831 is ()\1*\1* */
1832 dst = dfa->edests[node].elems[0];
1833 if (dst == from_node)
1835 if (str_idx == lim->subexp_from)
1836 return -1;
1837 else /* if (str_idx == lim->subexp_to) */
1838 return 0;
1841 cpos = check_dst_limits_calc_pos (mctx, limit,
1842 dfa->eclosures + dst,
1843 subexp_idx, dst,
1844 str_idx);
1846 if (cpos == -1 && str_idx == lim->subexp_from)
1847 return -1;
1849 if (cpos == 0 /* && str_idx == lim->lim->subexp_to */)
1850 return 0;
1852 break;
1855 case OP_OPEN_SUBEXP:
1856 if (str_idx == lim->subexp_from && subexp_idx == dfa->nodes[node].opr.idx)
1857 return -1;
1858 break;
1860 case OP_CLOSE_SUBEXP:
1861 if (str_idx == lim->subexp_to && subexp_idx == dfa->nodes[node].opr.idx)
1862 return 0;
1863 break;
1865 default:
1866 break;
1870 if (str_idx == lim->subexp_to)
1871 return 1;
1872 else
1873 return 0;
1876 /* Check the limitations of sub expressions LIMITS, and remove the nodes
1877 which are against limitations from DEST_NODES. */
1879 static reg_errcode_t
1880 check_subexp_limits (dfa, dest_nodes, candidates, limits, bkref_ents, str_idx)
1881 re_dfa_t *dfa;
1882 re_node_set *dest_nodes;
1883 const re_node_set *candidates;
1884 re_node_set *limits;
1885 struct re_backref_cache_entry *bkref_ents;
1886 int str_idx;
1888 reg_errcode_t err;
1889 int node_idx, lim_idx;
1891 for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
1893 int subexp_idx;
1894 struct re_backref_cache_entry *ent;
1895 ent = bkref_ents + limits->elems[lim_idx];
1897 if (str_idx <= ent->subexp_from || ent->str_idx < str_idx)
1898 continue; /* This is unrelated limitation. */
1900 subexp_idx = dfa->nodes[ent->node].opr.idx - 1;
1901 if (ent->subexp_to == str_idx)
1903 int ops_node = -1;
1904 int cls_node = -1;
1905 for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
1907 int node = dest_nodes->elems[node_idx];
1908 re_token_type_t type = dfa->nodes[node].type;
1909 if (type == OP_OPEN_SUBEXP
1910 && subexp_idx == dfa->nodes[node].opr.idx)
1911 ops_node = node;
1912 else if (type == OP_CLOSE_SUBEXP
1913 && subexp_idx == dfa->nodes[node].opr.idx)
1914 cls_node = node;
1917 /* Check the limitation of the open subexpression. */
1918 /* Note that (ent->subexp_to = str_idx != ent->subexp_from). */
1919 if (ops_node >= 0)
1921 err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes,
1922 candidates);
1923 if (BE (err != REG_NOERROR, 0))
1924 return err;
1927 /* Check the limitation of the close subexpression. */
1928 if (cls_node >= 0)
1929 for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
1931 int node = dest_nodes->elems[node_idx];
1932 if (!re_node_set_contains (dfa->inveclosures + node,
1933 cls_node)
1934 && !re_node_set_contains (dfa->eclosures + node,
1935 cls_node))
1937 /* It is against this limitation.
1938 Remove it form the current sifted state. */
1939 err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
1940 candidates);
1941 if (BE (err != REG_NOERROR, 0))
1942 return err;
1943 --node_idx;
1947 else /* (ent->subexp_to != str_idx) */
1949 for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
1951 int node = dest_nodes->elems[node_idx];
1952 re_token_type_t type = dfa->nodes[node].type;
1953 if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP)
1955 if (subexp_idx != dfa->nodes[node].opr.idx)
1956 continue;
1957 if ((type == OP_CLOSE_SUBEXP && ent->subexp_to != str_idx)
1958 || (type == OP_OPEN_SUBEXP))
1960 /* It is against this limitation.
1961 Remove it form the current sifted state. */
1962 err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
1963 candidates);
1964 if (BE (err != REG_NOERROR, 0))
1965 return err;
1971 return REG_NOERROR;
1974 static reg_errcode_t
1975 sift_states_bkref (mctx, sctx, str_idx, dest_nodes)
1976 re_match_context_t *mctx;
1977 re_sift_context_t *sctx;
1978 int str_idx;
1979 re_node_set *dest_nodes;
1981 re_dfa_t *const dfa = mctx->dfa;
1982 reg_errcode_t err;
1983 int node_idx, node;
1984 re_sift_context_t local_sctx;
1985 const re_node_set *candidates;
1986 candidates = ((mctx->state_log[str_idx] == NULL) ? &empty_set
1987 : &mctx->state_log[str_idx]->nodes);
1988 local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized. */
1990 for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)
1992 int cur_bkref_idx = re_string_cur_idx (&mctx->input);
1993 re_token_type_t type;
1994 node = candidates->elems[node_idx];
1995 type = dfa->nodes[node].type;
1996 if (node == sctx->cur_bkref && str_idx == cur_bkref_idx)
1997 continue;
1998 /* Avoid infinite loop for the REs like "()\1+". */
1999 if (node == sctx->last_node && str_idx == sctx->last_str_idx)
2000 continue;
2001 if (type == OP_BACK_REF)
2003 int enabled_idx = search_cur_bkref_entry (mctx, str_idx);
2004 for (; enabled_idx < mctx->nbkref_ents; ++enabled_idx)
2006 int disabled_idx, subexp_len, to_idx, dst_node;
2007 struct re_backref_cache_entry *entry;
2008 entry = mctx->bkref_ents + enabled_idx;
2009 if (entry->str_idx > str_idx)
2010 break;
2011 if (entry->node != node)
2012 continue;
2013 subexp_len = entry->subexp_to - entry->subexp_from;
2014 to_idx = str_idx + subexp_len;
2015 dst_node = (subexp_len ? dfa->nexts[node]
2016 : dfa->edests[node].elems[0]);
2018 if (to_idx > sctx->last_str_idx
2019 || sctx->sifted_states[to_idx] == NULL
2020 || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx],
2021 dst_node)
2022 || check_dst_limits (mctx, &sctx->limits, node,
2023 str_idx, dst_node, to_idx))
2024 continue;
2026 re_dfastate_t *cur_state;
2027 entry->flag = 0;
2028 for (disabled_idx = enabled_idx + 1;
2029 disabled_idx < mctx->nbkref_ents; ++disabled_idx)
2031 struct re_backref_cache_entry *entry2;
2032 entry2 = mctx->bkref_ents + disabled_idx;
2033 if (entry2->str_idx > str_idx)
2034 break;
2035 entry2->flag = (entry2->node == node) ? 1 : entry2->flag;
2038 if (local_sctx.sifted_states == NULL)
2040 local_sctx = *sctx;
2041 err = re_node_set_init_copy (&local_sctx.limits,
2042 &sctx->limits);
2043 if (BE (err != REG_NOERROR, 0))
2044 goto free_return;
2046 local_sctx.last_node = node;
2047 local_sctx.last_str_idx = str_idx;
2048 err = re_node_set_insert (&local_sctx.limits, enabled_idx);
2049 if (BE (err < 0, 0))
2051 err = REG_ESPACE;
2052 goto free_return;
2054 cur_state = local_sctx.sifted_states[str_idx];
2055 err = sift_states_backward (mctx, &local_sctx);
2056 if (BE (err != REG_NOERROR, 0))
2057 goto free_return;
2058 if (sctx->limited_states != NULL)
2060 err = merge_state_array (dfa, sctx->limited_states,
2061 local_sctx.sifted_states,
2062 str_idx + 1);
2063 if (BE (err != REG_NOERROR, 0))
2064 goto free_return;
2066 local_sctx.sifted_states[str_idx] = cur_state;
2067 re_node_set_remove (&local_sctx.limits, enabled_idx);
2068 /* We must not use the variable entry here, since
2069 mctx->bkref_ents might be realloced. */
2070 mctx->bkref_ents[enabled_idx].flag = 1;
2073 enabled_idx = search_cur_bkref_entry (mctx, str_idx);
2074 for (; enabled_idx < mctx->nbkref_ents; ++enabled_idx)
2076 struct re_backref_cache_entry *entry;
2077 entry = mctx->bkref_ents + enabled_idx;
2078 if (entry->str_idx > str_idx)
2079 break;
2080 if (entry->node == node)
2081 entry->flag = 0;
2085 err = REG_NOERROR;
2086 free_return:
2087 if (local_sctx.sifted_states != NULL)
2089 re_node_set_free (&local_sctx.limits);
2092 return err;
2096 #ifdef RE_ENABLE_I18N
2097 static int
2098 sift_states_iter_mb (mctx, sctx, node_idx, str_idx, max_str_idx)
2099 const re_match_context_t *mctx;
2100 re_sift_context_t *sctx;
2101 int node_idx, str_idx, max_str_idx;
2103 re_dfa_t *const dfa = mctx->dfa;
2104 int naccepted;
2105 /* Check the node can accept `multi byte'. */
2106 naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx);
2107 if (naccepted > 0 && str_idx + naccepted <= max_str_idx &&
2108 !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
2109 dfa->nexts[node_idx]))
2110 /* The node can't accept the `multi byte', or the
2111 destination was already thrown away, then the node
2112 could't accept the current input `multi byte'. */
2113 naccepted = 0;
2114 /* Otherwise, it is sure that the node could accept
2115 `naccepted' bytes input. */
2116 return naccepted;
2118 #endif /* RE_ENABLE_I18N */
2121 /* Functions for state transition. */
2123 /* Return the next state to which the current state STATE will transit by
2124 accepting the current input byte, and update STATE_LOG if necessary.
2125 If STATE can accept a multibyte char/collating element/back reference
2126 update the destination of STATE_LOG. */
2128 static re_dfastate_t *
2129 transit_state (err, mctx, state)
2130 reg_errcode_t *err;
2131 re_match_context_t *mctx;
2132 re_dfastate_t *state;
2134 re_dfa_t *const dfa = mctx->dfa;
2135 re_dfastate_t **trtable, *next_state;
2136 unsigned char ch;
2137 int cur_idx;
2139 if (re_string_cur_idx (&mctx->input) + 1 >= mctx->input.bufs_len
2140 || (re_string_cur_idx (&mctx->input) + 1 >= mctx->input.valid_len
2141 && mctx->input.valid_len < mctx->input.len))
2143 *err = extend_buffers (mctx);
2144 if (BE (*err != REG_NOERROR, 0))
2145 return NULL;
2148 *err = REG_NOERROR;
2149 if (state == NULL)
2151 next_state = state;
2152 re_string_skip_bytes (&mctx->input, 1);
2154 else
2156 #ifdef RE_ENABLE_I18N
2157 /* If the current state can accept multibyte. */
2158 if (state->accept_mb)
2160 *err = transit_state_mb (mctx, state);
2161 if (BE (*err != REG_NOERROR, 0))
2162 return NULL;
2164 #endif /* RE_ENABLE_I18N */
2166 /* Then decide the next state with the single byte. */
2167 if (1)
2169 /* Use transition table */
2170 ch = re_string_fetch_byte (&mctx->input);
2171 trtable = state->trtable;
2172 if (trtable == NULL)
2174 trtable = build_trtable (dfa, state);
2175 if (trtable == NULL)
2177 *err = REG_ESPACE;
2178 return NULL;
2181 if (BE (state->word_trtable, 0))
2183 unsigned int context;
2184 context
2185 = re_string_context_at (&mctx->input,
2186 re_string_cur_idx (&mctx->input) - 1,
2187 mctx->eflags);
2188 if (IS_WORD_CONTEXT (context))
2189 next_state = trtable[ch + SBC_MAX];
2190 else
2191 next_state = trtable[ch];
2193 else
2194 next_state = trtable[ch];
2196 #if 0
2197 else
2199 /* don't use transition table */
2200 next_state = transit_state_sb (err, mctx, state);
2201 if (BE (next_state == NULL && err != REG_NOERROR, 0))
2202 return NULL;
2204 #endif
2207 cur_idx = re_string_cur_idx (&mctx->input);
2208 /* Update the state_log if we need. */
2209 if (mctx->state_log != NULL)
2211 if (cur_idx > mctx->state_log_top)
2213 mctx->state_log[cur_idx] = next_state;
2214 mctx->state_log_top = cur_idx;
2216 else if (mctx->state_log[cur_idx] == 0)
2218 mctx->state_log[cur_idx] = next_state;
2220 else
2222 re_dfastate_t *pstate;
2223 unsigned int context;
2224 re_node_set next_nodes, *log_nodes, *table_nodes = NULL;
2225 /* If (state_log[cur_idx] != 0), it implies that cur_idx is
2226 the destination of a multibyte char/collating element/
2227 back reference. Then the next state is the union set of
2228 these destinations and the results of the transition table. */
2229 pstate = mctx->state_log[cur_idx];
2230 log_nodes = pstate->entrance_nodes;
2231 if (next_state != NULL)
2233 table_nodes = next_state->entrance_nodes;
2234 *err = re_node_set_init_union (&next_nodes, table_nodes,
2235 log_nodes);
2236 if (BE (*err != REG_NOERROR, 0))
2237 return NULL;
2239 else
2240 next_nodes = *log_nodes;
2241 /* Note: We already add the nodes of the initial state,
2242 then we don't need to add them here. */
2244 context = re_string_context_at (&mctx->input,
2245 re_string_cur_idx (&mctx->input) - 1,
2246 mctx->eflags);
2247 next_state = mctx->state_log[cur_idx]
2248 = re_acquire_state_context (err, dfa, &next_nodes, context);
2249 /* We don't need to check errors here, since the return value of
2250 this function is next_state and ERR is already set. */
2252 if (table_nodes != NULL)
2253 re_node_set_free (&next_nodes);
2257 if (BE (dfa->nbackref, 0) && next_state != NULL)
2259 /* Check OP_OPEN_SUBEXP in the current state in case that we use them
2260 later. We must check them here, since the back references in the
2261 next state might use them. */
2262 *err = check_subexp_matching_top (mctx, &next_state->nodes,
2263 cur_idx);
2264 if (BE (*err != REG_NOERROR, 0))
2265 return NULL;
2267 /* If the next state has back references. */
2268 if (next_state->has_backref)
2270 *err = transit_state_bkref (mctx, &next_state->nodes);
2271 if (BE (*err != REG_NOERROR, 0))
2272 return NULL;
2273 next_state = mctx->state_log[cur_idx];
2276 return next_state;
2279 /* Helper functions for transit_state. */
2281 /* From the node set CUR_NODES, pick up the nodes whose types are
2282 OP_OPEN_SUBEXP and which have corresponding back references in the regular
2283 expression. And register them to use them later for evaluating the
2284 correspoding back references. */
2286 static reg_errcode_t
2287 check_subexp_matching_top (mctx, cur_nodes, str_idx)
2288 re_match_context_t *mctx;
2289 re_node_set *cur_nodes;
2290 int str_idx;
2292 re_dfa_t *const dfa = mctx->dfa;
2293 int node_idx;
2294 reg_errcode_t err;
2296 /* TODO: This isn't efficient.
2297 Because there might be more than one nodes whose types are
2298 OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2299 nodes.
2300 E.g. RE: (a){2} */
2301 for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx)
2303 int node = cur_nodes->elems[node_idx];
2304 if (dfa->nodes[node].type == OP_OPEN_SUBEXP
2305 && dfa->nodes[node].opr.idx < (8 * sizeof (dfa->used_bkref_map))
2306 && dfa->used_bkref_map & (1 << dfa->nodes[node].opr.idx))
2308 err = match_ctx_add_subtop (mctx, node, str_idx);
2309 if (BE (err != REG_NOERROR, 0))
2310 return err;
2313 return REG_NOERROR;
2316 #if 0
2317 /* Return the next state to which the current state STATE will transit by
2318 accepting the current input byte. */
2320 static re_dfastate_t *
2321 transit_state_sb (err, mctx, state)
2322 reg_errcode_t *err;
2323 re_match_context_t *mctx;
2324 re_dfastate_t *state;
2326 re_dfa_t *const dfa = mctx->dfa;
2327 re_node_set next_nodes;
2328 re_dfastate_t *next_state;
2329 int node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input);
2330 unsigned int context;
2332 *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
2333 if (BE (*err != REG_NOERROR, 0))
2334 return NULL;
2335 for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)
2337 int cur_node = state->nodes.elems[node_cnt];
2338 if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx))
2340 *err = re_node_set_merge (&next_nodes,
2341 dfa->eclosures + dfa->nexts[cur_node]);
2342 if (BE (*err != REG_NOERROR, 0))
2344 re_node_set_free (&next_nodes);
2345 return NULL;
2349 context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags);
2350 next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
2351 /* We don't need to check errors here, since the return value of
2352 this function is next_state and ERR is already set. */
2354 re_node_set_free (&next_nodes);
2355 re_string_skip_bytes (&mctx->input, 1);
2356 return next_state;
2358 #endif
2360 #ifdef RE_ENABLE_I18N
2361 static reg_errcode_t
2362 transit_state_mb (mctx, pstate)
2363 re_match_context_t *mctx;
2364 re_dfastate_t *pstate;
2366 re_dfa_t *const dfa = mctx->dfa;
2367 reg_errcode_t err;
2368 int i;
2370 for (i = 0; i < pstate->nodes.nelem; ++i)
2372 re_node_set dest_nodes, *new_nodes;
2373 int cur_node_idx = pstate->nodes.elems[i];
2374 int naccepted = 0, dest_idx;
2375 unsigned int context;
2376 re_dfastate_t *dest_state;
2378 if (dfa->nodes[cur_node_idx].constraint)
2380 context = re_string_context_at (&mctx->input,
2381 re_string_cur_idx (&mctx->input),
2382 mctx->eflags);
2383 if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
2384 context))
2385 continue;
2388 /* How many bytes the node can accept? */
2389 if (ACCEPT_MB_NODE (dfa->nodes[cur_node_idx].type))
2390 naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input,
2391 re_string_cur_idx (&mctx->input));
2392 if (naccepted == 0)
2393 continue;
2395 /* The node can accepts `naccepted' bytes. */
2396 dest_idx = re_string_cur_idx (&mctx->input) + naccepted;
2397 mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
2398 : mctx->max_mb_elem_len);
2399 err = clean_state_log_if_needed (mctx, dest_idx);
2400 if (BE (err != REG_NOERROR, 0))
2401 return err;
2402 #ifdef DEBUG
2403 assert (dfa->nexts[cur_node_idx] != -1);
2404 #endif
2405 /* `cur_node_idx' may point the entity of the OP_CONTEXT_NODE,
2406 then we use pstate->nodes.elems[i] instead. */
2407 new_nodes = dfa->eclosures + dfa->nexts[pstate->nodes.elems[i]];
2409 dest_state = mctx->state_log[dest_idx];
2410 if (dest_state == NULL)
2411 dest_nodes = *new_nodes;
2412 else
2414 err = re_node_set_init_union (&dest_nodes,
2415 dest_state->entrance_nodes, new_nodes);
2416 if (BE (err != REG_NOERROR, 0))
2417 return err;
2419 context = re_string_context_at (&mctx->input, dest_idx - 1, mctx->eflags);
2420 mctx->state_log[dest_idx]
2421 = re_acquire_state_context (&err, dfa, &dest_nodes, context);
2422 if (dest_state != NULL)
2423 re_node_set_free (&dest_nodes);
2424 if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0))
2425 return err;
2427 return REG_NOERROR;
2429 #endif /* RE_ENABLE_I18N */
2431 static reg_errcode_t
2432 transit_state_bkref (mctx, nodes)
2433 re_match_context_t *mctx;
2434 const re_node_set *nodes;
2436 re_dfa_t *const dfa = mctx->dfa;
2437 reg_errcode_t err;
2438 int i;
2439 int cur_str_idx = re_string_cur_idx (&mctx->input);
2441 for (i = 0; i < nodes->nelem; ++i)
2443 int dest_str_idx, prev_nelem, bkc_idx;
2444 int node_idx = nodes->elems[i];
2445 unsigned int context;
2446 const re_token_t *node = dfa->nodes + node_idx;
2447 re_node_set *new_dest_nodes;
2449 /* Check whether `node' is a backreference or not. */
2450 if (node->type != OP_BACK_REF)
2451 continue;
2453 if (node->constraint)
2455 context = re_string_context_at (&mctx->input, cur_str_idx,
2456 mctx->eflags);
2457 if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
2458 continue;
2461 /* `node' is a backreference.
2462 Check the substring which the substring matched. */
2463 bkc_idx = mctx->nbkref_ents;
2464 err = get_subexp (mctx, node_idx, cur_str_idx);
2465 if (BE (err != REG_NOERROR, 0))
2466 goto free_return;
2468 /* And add the epsilon closures (which is `new_dest_nodes') of
2469 the backreference to appropriate state_log. */
2470 #ifdef DEBUG
2471 assert (dfa->nexts[node_idx] != -1);
2472 #endif
2473 for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
2475 int subexp_len;
2476 re_dfastate_t *dest_state;
2477 struct re_backref_cache_entry *bkref_ent;
2478 bkref_ent = mctx->bkref_ents + bkc_idx;
2479 if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx)
2480 continue;
2481 subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
2482 new_dest_nodes = (subexp_len == 0
2483 ? dfa->eclosures + dfa->edests[node_idx].elems[0]
2484 : dfa->eclosures + dfa->nexts[node_idx]);
2485 dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
2486 - bkref_ent->subexp_from);
2487 context = re_string_context_at (&mctx->input, dest_str_idx - 1,
2488 mctx->eflags);
2489 dest_state = mctx->state_log[dest_str_idx];
2490 prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0
2491 : mctx->state_log[cur_str_idx]->nodes.nelem);
2492 /* Add `new_dest_node' to state_log. */
2493 if (dest_state == NULL)
2495 mctx->state_log[dest_str_idx]
2496 = re_acquire_state_context (&err, dfa, new_dest_nodes,
2497 context);
2498 if (BE (mctx->state_log[dest_str_idx] == NULL
2499 && err != REG_NOERROR, 0))
2500 goto free_return;
2502 else
2504 re_node_set dest_nodes;
2505 err = re_node_set_init_union (&dest_nodes,
2506 dest_state->entrance_nodes,
2507 new_dest_nodes);
2508 if (BE (err != REG_NOERROR, 0))
2510 re_node_set_free (&dest_nodes);
2511 goto free_return;
2513 mctx->state_log[dest_str_idx]
2514 = re_acquire_state_context (&err, dfa, &dest_nodes, context);
2515 re_node_set_free (&dest_nodes);
2516 if (BE (mctx->state_log[dest_str_idx] == NULL
2517 && err != REG_NOERROR, 0))
2518 goto free_return;
2520 /* We need to check recursively if the backreference can epsilon
2521 transit. */
2522 if (subexp_len == 0
2523 && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
2525 err = check_subexp_matching_top (mctx, new_dest_nodes,
2526 cur_str_idx);
2527 if (BE (err != REG_NOERROR, 0))
2528 goto free_return;
2529 err = transit_state_bkref (mctx, new_dest_nodes);
2530 if (BE (err != REG_NOERROR, 0))
2531 goto free_return;
2535 err = REG_NOERROR;
2536 free_return:
2537 return err;
2540 /* Enumerate all the candidates which the backreference BKREF_NODE can match
2541 at BKREF_STR_IDX, and register them by match_ctx_add_entry().
2542 Note that we might collect inappropriate candidates here.
2543 However, the cost of checking them strictly here is too high, then we
2544 delay these checking for prune_impossible_nodes(). */
2546 static reg_errcode_t
2547 get_subexp (mctx, bkref_node, bkref_str_idx)
2548 re_match_context_t *mctx;
2549 int bkref_node, bkref_str_idx;
2551 re_dfa_t *const dfa = mctx->dfa;
2552 int subexp_num, sub_top_idx;
2553 const char *buf = (const char *) re_string_get_buffer (&mctx->input);
2554 /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX. */
2555 int cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
2556 for (; cache_idx < mctx->nbkref_ents; ++cache_idx)
2558 const struct re_backref_cache_entry *entry
2559 = &mctx->bkref_ents[cache_idx];
2560 if (entry->str_idx > bkref_str_idx)
2561 break;
2562 if (entry->node == bkref_node)
2563 return REG_NOERROR; /* We already checked it. */
2565 subexp_num = dfa->nodes[bkref_node].opr.idx - 1;
2567 /* For each sub expression */
2568 for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)
2570 reg_errcode_t err;
2571 re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx];
2572 re_sub_match_last_t *sub_last;
2573 int sub_last_idx, sl_str, bkref_str_off;
2575 if (dfa->nodes[sub_top->node].opr.idx != subexp_num)
2576 continue; /* It isn't related. */
2578 sl_str = sub_top->str_idx;
2579 bkref_str_off = bkref_str_idx;
2580 /* At first, check the last node of sub expressions we already
2581 evaluated. */
2582 for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx)
2584 int sl_str_diff;
2585 sub_last = sub_top->lasts[sub_last_idx];
2586 sl_str_diff = sub_last->str_idx - sl_str;
2587 /* The matched string by the sub expression match with the substring
2588 at the back reference? */
2589 if (sl_str_diff > 0)
2591 if (BE (bkref_str_off + sl_str_diff > mctx->input.valid_len, 0))
2593 /* Not enough chars for a successful match. */
2594 if (bkref_str_off + sl_str_diff > mctx->input.len)
2595 break;
2597 err = clean_state_log_if_needed (mctx,
2598 bkref_str_off
2599 + sl_str_diff);
2600 if (BE (err != REG_NOERROR, 0))
2601 return err;
2602 buf = (const char *) re_string_get_buffer (&mctx->input);
2604 if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0)
2605 break; /* We don't need to search this sub expression any more. */
2607 bkref_str_off += sl_str_diff;
2608 sl_str += sl_str_diff;
2609 err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2610 bkref_str_idx);
2612 /* Reload buf, since the preceding call might have reallocated
2613 the buffer. */
2614 buf = (const char *) re_string_get_buffer (&mctx->input);
2616 if (err == REG_NOMATCH)
2617 continue;
2618 if (BE (err != REG_NOERROR, 0))
2619 return err;
2622 if (sub_last_idx < sub_top->nlasts)
2623 continue;
2624 if (sub_last_idx > 0)
2625 ++sl_str;
2626 /* Then, search for the other last nodes of the sub expression. */
2627 for (; sl_str <= bkref_str_idx; ++sl_str)
2629 int cls_node, sl_str_off;
2630 const re_node_set *nodes;
2631 sl_str_off = sl_str - sub_top->str_idx;
2632 /* The matched string by the sub expression match with the substring
2633 at the back reference? */
2634 if (sl_str_off > 0)
2636 if (BE (bkref_str_off >= mctx->input.valid_len, 0))
2638 /* If we are at the end of the input, we cannot match. */
2639 if (bkref_str_off >= mctx->input.len)
2640 break;
2642 err = extend_buffers (mctx);
2643 if (BE (err != REG_NOERROR, 0))
2644 return err;
2646 buf = (const char *) re_string_get_buffer (&mctx->input);
2648 if (buf [bkref_str_off++] != buf[sl_str - 1])
2649 break; /* We don't need to search this sub expression
2650 any more. */
2652 if (mctx->state_log[sl_str] == NULL)
2653 continue;
2654 /* Does this state have a ')' of the sub expression? */
2655 nodes = &mctx->state_log[sl_str]->nodes;
2656 cls_node = find_subexp_node (dfa, nodes, subexp_num, OP_CLOSE_SUBEXP);
2657 if (cls_node == -1)
2658 continue; /* No. */
2659 if (sub_top->path == NULL)
2661 sub_top->path = calloc (sizeof (state_array_t),
2662 sl_str - sub_top->str_idx + 1);
2663 if (sub_top->path == NULL)
2664 return REG_ESPACE;
2666 /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node
2667 in the current context? */
2668 err = check_arrival (mctx, sub_top->path, sub_top->node,
2669 sub_top->str_idx, cls_node, sl_str, OP_CLOSE_SUBEXP);
2670 if (err == REG_NOMATCH)
2671 continue;
2672 if (BE (err != REG_NOERROR, 0))
2673 return err;
2674 sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str);
2675 if (BE (sub_last == NULL, 0))
2676 return REG_ESPACE;
2677 err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2678 bkref_str_idx);
2679 if (err == REG_NOMATCH)
2680 continue;
2683 return REG_NOERROR;
2686 /* Helper functions for get_subexp(). */
2688 /* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR.
2689 If it can arrive, register the sub expression expressed with SUB_TOP
2690 and SUB_LAST. */
2692 static reg_errcode_t
2693 get_subexp_sub (mctx, sub_top, sub_last, bkref_node, bkref_str)
2694 re_match_context_t *mctx;
2695 const re_sub_match_top_t *sub_top;
2696 re_sub_match_last_t *sub_last;
2697 int bkref_node, bkref_str;
2699 reg_errcode_t err;
2700 int to_idx;
2701 /* Can the subexpression arrive the back reference? */
2702 err = check_arrival (mctx, &sub_last->path, sub_last->node,
2703 sub_last->str_idx, bkref_node, bkref_str, OP_OPEN_SUBEXP);
2704 if (err != REG_NOERROR)
2705 return err;
2706 err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,
2707 sub_last->str_idx);
2708 if (BE (err != REG_NOERROR, 0))
2709 return err;
2710 to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;
2711 return clean_state_log_if_needed (mctx, to_idx);
2714 /* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX.
2715 Search '(' if FL_OPEN, or search ')' otherwise.
2716 TODO: This function isn't efficient...
2717 Because there might be more than one nodes whose types are
2718 OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2719 nodes.
2720 E.g. RE: (a){2} */
2722 static int
2723 find_subexp_node (dfa, nodes, subexp_idx, type)
2724 const re_dfa_t *dfa;
2725 const re_node_set *nodes;
2726 int subexp_idx, type;
2728 int cls_idx;
2729 for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx)
2731 int cls_node = nodes->elems[cls_idx];
2732 const re_token_t *node = dfa->nodes + cls_node;
2733 if (node->type == type
2734 && node->opr.idx == subexp_idx)
2735 return cls_node;
2737 return -1;
2740 /* Check whether the node TOP_NODE at TOP_STR can arrive to the node
2741 LAST_NODE at LAST_STR. We record the path onto PATH since it will be
2742 heavily reused.
2743 Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise. */
2745 static reg_errcode_t
2746 check_arrival (mctx, path, top_node, top_str, last_node, last_str,
2747 type)
2748 re_match_context_t *mctx;
2749 state_array_t *path;
2750 int top_node, top_str, last_node, last_str, type;
2752 re_dfa_t *const dfa = mctx->dfa;
2753 reg_errcode_t err;
2754 int subexp_num, backup_cur_idx, str_idx, null_cnt;
2755 re_dfastate_t *cur_state = NULL;
2756 re_node_set *cur_nodes, next_nodes;
2757 re_dfastate_t **backup_state_log;
2758 unsigned int context;
2760 subexp_num = dfa->nodes[top_node].opr.idx;
2761 /* Extend the buffer if we need. */
2762 if (BE (path->alloc < last_str + mctx->max_mb_elem_len + 1, 0))
2764 re_dfastate_t **new_array;
2765 int old_alloc = path->alloc;
2766 path->alloc += last_str + mctx->max_mb_elem_len + 1;
2767 new_array = re_realloc (path->array, re_dfastate_t *, path->alloc);
2768 if (new_array == NULL)
2770 path->alloc = old_alloc;
2771 return REG_ESPACE;
2773 path->array = new_array;
2774 memset (new_array + old_alloc, '\0',
2775 sizeof (re_dfastate_t *) * (path->alloc - old_alloc));
2778 str_idx = path->next_idx == 0 ? top_str : path->next_idx;
2780 /* Temporary modify MCTX. */
2781 backup_state_log = mctx->state_log;
2782 backup_cur_idx = mctx->input.cur_idx;
2783 mctx->state_log = path->array;
2784 mctx->input.cur_idx = str_idx;
2786 /* Setup initial node set. */
2787 context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
2788 if (str_idx == top_str)
2790 err = re_node_set_init_1 (&next_nodes, top_node);
2791 if (BE (err != REG_NOERROR, 0))
2792 return err;
2793 err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
2794 if (BE (err != REG_NOERROR, 0))
2796 re_node_set_free (&next_nodes);
2797 return err;
2800 else
2802 cur_state = mctx->state_log[str_idx];
2803 if (cur_state && cur_state->has_backref)
2805 err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);
2806 if (BE ( err != REG_NOERROR, 0))
2807 return err;
2809 else
2810 re_node_set_init_empty (&next_nodes);
2812 if (str_idx == top_str || (cur_state && cur_state->has_backref))
2814 if (next_nodes.nelem)
2816 err = expand_bkref_cache (mctx, &next_nodes, str_idx, last_str,
2817 subexp_num, type);
2818 if (BE ( err != REG_NOERROR, 0))
2820 re_node_set_free (&next_nodes);
2821 return err;
2824 cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
2825 if (BE (cur_state == NULL && err != REG_NOERROR, 0))
2827 re_node_set_free (&next_nodes);
2828 return err;
2830 mctx->state_log[str_idx] = cur_state;
2833 for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;)
2835 re_node_set_empty (&next_nodes);
2836 if (mctx->state_log[str_idx + 1])
2838 err = re_node_set_merge (&next_nodes,
2839 &mctx->state_log[str_idx + 1]->nodes);
2840 if (BE (err != REG_NOERROR, 0))
2842 re_node_set_free (&next_nodes);
2843 return err;
2846 if (cur_state)
2848 err = check_arrival_add_next_nodes (mctx, str_idx,
2849 &cur_state->nodes, &next_nodes);
2850 if (BE (err != REG_NOERROR, 0))
2852 re_node_set_free (&next_nodes);
2853 return err;
2856 ++str_idx;
2857 if (next_nodes.nelem)
2859 err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
2860 if (BE (err != REG_NOERROR, 0))
2862 re_node_set_free (&next_nodes);
2863 return err;
2865 err = expand_bkref_cache (mctx, &next_nodes, str_idx, last_str,
2866 subexp_num, type);
2867 if (BE ( err != REG_NOERROR, 0))
2869 re_node_set_free (&next_nodes);
2870 return err;
2873 context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
2874 cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
2875 if (BE (cur_state == NULL && err != REG_NOERROR, 0))
2877 re_node_set_free (&next_nodes);
2878 return err;
2880 mctx->state_log[str_idx] = cur_state;
2881 null_cnt = cur_state == NULL ? null_cnt + 1 : 0;
2883 re_node_set_free (&next_nodes);
2884 cur_nodes = (mctx->state_log[last_str] == NULL ? NULL
2885 : &mctx->state_log[last_str]->nodes);
2886 path->next_idx = str_idx;
2888 /* Fix MCTX. */
2889 mctx->state_log = backup_state_log;
2890 mctx->input.cur_idx = backup_cur_idx;
2892 /* Then check the current node set has the node LAST_NODE. */
2893 if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node))
2894 return REG_NOERROR;
2896 return REG_NOMATCH;
2899 /* Helper functions for check_arrival. */
2901 /* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them
2902 to NEXT_NODES.
2903 TODO: This function is similar to the functions transit_state*(),
2904 however this function has many additional works.
2905 Can't we unify them? */
2907 static reg_errcode_t
2908 check_arrival_add_next_nodes (mctx, str_idx, cur_nodes, next_nodes)
2909 re_match_context_t *mctx;
2910 int str_idx;
2911 re_node_set *cur_nodes, *next_nodes;
2913 re_dfa_t *const dfa = mctx->dfa;
2914 int cur_idx;
2915 reg_errcode_t err;
2916 re_node_set union_set;
2917 re_node_set_init_empty (&union_set);
2918 for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)
2920 int naccepted = 0;
2921 int cur_node = cur_nodes->elems[cur_idx];
2922 re_token_type_t type = dfa->nodes[cur_node].type;
2923 if (IS_EPSILON_NODE (type))
2924 continue;
2925 #ifdef RE_ENABLE_I18N
2926 /* If the node may accept `multi byte'. */
2927 if (ACCEPT_MB_NODE (type))
2929 naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input,
2930 str_idx);
2931 if (naccepted > 1)
2933 re_dfastate_t *dest_state;
2934 int next_node = dfa->nexts[cur_node];
2935 int next_idx = str_idx + naccepted;
2936 dest_state = mctx->state_log[next_idx];
2937 re_node_set_empty (&union_set);
2938 if (dest_state)
2940 err = re_node_set_merge (&union_set, &dest_state->nodes);
2941 if (BE (err != REG_NOERROR, 0))
2943 re_node_set_free (&union_set);
2944 return err;
2947 err = re_node_set_insert (&union_set, next_node);
2948 if (BE (err < 0, 0))
2950 re_node_set_free (&union_set);
2951 return REG_ESPACE;
2953 mctx->state_log[next_idx] = re_acquire_state (&err, dfa,
2954 &union_set);
2955 if (BE (mctx->state_log[next_idx] == NULL
2956 && err != REG_NOERROR, 0))
2958 re_node_set_free (&union_set);
2959 return err;
2963 #endif /* RE_ENABLE_I18N */
2964 if (naccepted
2965 || check_node_accept (mctx, dfa->nodes + cur_node, str_idx))
2967 err = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);
2968 if (BE (err < 0, 0))
2970 re_node_set_free (&union_set);
2971 return REG_ESPACE;
2975 re_node_set_free (&union_set);
2976 return REG_NOERROR;
2979 /* For all the nodes in CUR_NODES, add the epsilon closures of them to
2980 CUR_NODES, however exclude the nodes which are:
2981 - inside the sub expression whose number is EX_SUBEXP, if FL_OPEN.
2982 - out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN.
2985 static reg_errcode_t
2986 check_arrival_expand_ecl (dfa, cur_nodes, ex_subexp, type)
2987 re_dfa_t *dfa;
2988 re_node_set *cur_nodes;
2989 int ex_subexp, type;
2991 reg_errcode_t err;
2992 int idx, outside_node;
2993 re_node_set new_nodes;
2994 #ifdef DEBUG
2995 assert (cur_nodes->nelem);
2996 #endif
2997 err = re_node_set_alloc (&new_nodes, cur_nodes->nelem);
2998 if (BE (err != REG_NOERROR, 0))
2999 return err;
3000 /* Create a new node set NEW_NODES with the nodes which are epsilon
3001 closures of the node in CUR_NODES. */
3003 for (idx = 0; idx < cur_nodes->nelem; ++idx)
3005 int cur_node = cur_nodes->elems[idx];
3006 re_node_set *eclosure = dfa->eclosures + cur_node;
3007 outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type);
3008 if (outside_node == -1)
3010 /* There are no problematic nodes, just merge them. */
3011 err = re_node_set_merge (&new_nodes, eclosure);
3012 if (BE (err != REG_NOERROR, 0))
3014 re_node_set_free (&new_nodes);
3015 return err;
3018 else
3020 /* There are problematic nodes, re-calculate incrementally. */
3021 err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node,
3022 ex_subexp, type);
3023 if (BE (err != REG_NOERROR, 0))
3025 re_node_set_free (&new_nodes);
3026 return err;
3030 re_node_set_free (cur_nodes);
3031 *cur_nodes = new_nodes;
3032 return REG_NOERROR;
3035 /* Helper function for check_arrival_expand_ecl.
3036 Check incrementally the epsilon closure of TARGET, and if it isn't
3037 problematic append it to DST_NODES. */
3039 static reg_errcode_t
3040 check_arrival_expand_ecl_sub (dfa, dst_nodes, target, ex_subexp, type)
3041 re_dfa_t *dfa;
3042 int target, ex_subexp, type;
3043 re_node_set *dst_nodes;
3045 int cur_node;
3046 for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)
3048 int err;
3050 if (dfa->nodes[cur_node].type == type
3051 && dfa->nodes[cur_node].opr.idx == ex_subexp)
3053 if (type == OP_CLOSE_SUBEXP)
3055 err = re_node_set_insert (dst_nodes, cur_node);
3056 if (BE (err == -1, 0))
3057 return REG_ESPACE;
3059 break;
3061 err = re_node_set_insert (dst_nodes, cur_node);
3062 if (BE (err == -1, 0))
3063 return REG_ESPACE;
3064 if (dfa->edests[cur_node].nelem == 0)
3065 break;
3066 if (dfa->edests[cur_node].nelem == 2)
3068 err = check_arrival_expand_ecl_sub (dfa, dst_nodes,
3069 dfa->edests[cur_node].elems[1],
3070 ex_subexp, type);
3071 if (BE (err != REG_NOERROR, 0))
3072 return err;
3074 cur_node = dfa->edests[cur_node].elems[0];
3076 return REG_NOERROR;
3080 /* For all the back references in the current state, calculate the
3081 destination of the back references by the appropriate entry
3082 in MCTX->BKREF_ENTS. */
3084 static reg_errcode_t
3085 expand_bkref_cache (mctx, cur_nodes, cur_str, last_str, subexp_num,
3086 type)
3087 re_match_context_t *mctx;
3088 int cur_str, last_str, subexp_num, type;
3089 re_node_set *cur_nodes;
3091 re_dfa_t *const dfa = mctx->dfa;
3092 reg_errcode_t err;
3093 int cache_idx, cache_idx_start;
3094 /* The current state. */
3096 cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
3097 for (cache_idx = cache_idx_start; cache_idx < mctx->nbkref_ents; ++cache_idx)
3099 int to_idx, next_node;
3100 struct re_backref_cache_entry *ent = mctx->bkref_ents + cache_idx;
3101 if (ent->str_idx > cur_str)
3102 break;
3103 /* Is this entry ENT is appropriate? */
3104 if (!re_node_set_contains (cur_nodes, ent->node))
3105 continue; /* No. */
3107 to_idx = cur_str + ent->subexp_to - ent->subexp_from;
3108 /* Calculate the destination of the back reference, and append it
3109 to MCTX->STATE_LOG. */
3110 if (to_idx == cur_str)
3112 /* The backreference did epsilon transit, we must re-check all the
3113 node in the current state. */
3114 re_node_set new_dests;
3115 reg_errcode_t err2, err3;
3116 next_node = dfa->edests[ent->node].elems[0];
3117 if (re_node_set_contains (cur_nodes, next_node))
3118 continue;
3119 err = re_node_set_init_1 (&new_dests, next_node);
3120 err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type);
3121 err3 = re_node_set_merge (cur_nodes, &new_dests);
3122 re_node_set_free (&new_dests);
3123 if (BE (err != REG_NOERROR || err2 != REG_NOERROR
3124 || err3 != REG_NOERROR, 0))
3126 err = (err != REG_NOERROR ? err
3127 : (err2 != REG_NOERROR ? err2 : err3));
3128 return err;
3130 /* TODO: It is still inefficient... */
3131 cache_idx = cache_idx_start - 1;
3132 continue;
3134 else
3136 re_node_set union_set;
3137 next_node = dfa->nexts[ent->node];
3138 if (mctx->state_log[to_idx])
3140 int ret;
3141 if (re_node_set_contains (&mctx->state_log[to_idx]->nodes,
3142 next_node))
3143 continue;
3144 err = re_node_set_init_copy (&union_set,
3145 &mctx->state_log[to_idx]->nodes);
3146 ret = re_node_set_insert (&union_set, next_node);
3147 if (BE (err != REG_NOERROR || ret < 0, 0))
3149 re_node_set_free (&union_set);
3150 err = err != REG_NOERROR ? err : REG_ESPACE;
3151 return err;
3154 else
3156 err = re_node_set_init_1 (&union_set, next_node);
3157 if (BE (err != REG_NOERROR, 0))
3158 return err;
3160 mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set);
3161 re_node_set_free (&union_set);
3162 if (BE (mctx->state_log[to_idx] == NULL
3163 && err != REG_NOERROR, 0))
3164 return err;
3167 return REG_NOERROR;
3170 /* Build transition table for the state.
3171 Return the new table if succeeded, otherwise return NULL. */
3173 static re_dfastate_t **
3174 build_trtable (dfa, state)
3175 re_dfa_t *dfa;
3176 re_dfastate_t *state;
3178 reg_errcode_t err;
3179 int i, j, ch;
3180 unsigned int elem, mask;
3181 int dests_node_malloced = 0, dest_states_malloced = 0;
3182 int ndests; /* Number of the destination states from `state'. */
3183 re_dfastate_t **trtable;
3184 re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl;
3185 re_node_set follows, *dests_node;
3186 bitset *dests_ch;
3187 bitset acceptable;
3189 /* We build DFA states which corresponds to the destination nodes
3190 from `state'. `dests_node[i]' represents the nodes which i-th
3191 destination state contains, and `dests_ch[i]' represents the
3192 characters which i-th destination state accepts. */
3193 #ifdef _LIBC
3194 if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX))
3195 dests_node = (re_node_set *)
3196 alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX);
3197 else
3198 #endif
3200 dests_node = (re_node_set *)
3201 malloc ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX);
3202 if (BE (dests_node == NULL, 0))
3203 return NULL;
3204 dests_node_malloced = 1;
3206 dests_ch = (bitset *) (dests_node + SBC_MAX);
3208 /* Initialize transiton table. */
3209 state->word_trtable = 0;
3211 /* At first, group all nodes belonging to `state' into several
3212 destinations. */
3213 ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);
3214 if (BE (ndests <= 0, 0))
3216 if (dests_node_malloced)
3217 free (dests_node);
3218 /* Return NULL in case of an error, trtable otherwise. */
3219 if (ndests == 0)
3221 state->trtable = (re_dfastate_t **)
3222 calloc (sizeof (re_dfastate_t *), SBC_MAX);;
3223 return state->trtable;
3225 return NULL;
3228 err = re_node_set_alloc (&follows, ndests + 1);
3229 if (BE (err != REG_NOERROR, 0))
3230 goto out_free;
3232 #ifdef _LIBC
3233 if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX
3234 + ndests * 3 * sizeof (re_dfastate_t *)))
3235 dest_states = (re_dfastate_t **)
3236 alloca (ndests * 3 * sizeof (re_dfastate_t *));
3237 else
3238 #endif
3240 dest_states = (re_dfastate_t **)
3241 malloc (ndests * 3 * sizeof (re_dfastate_t *));
3242 if (BE (dest_states == NULL, 0))
3244 out_free:
3245 if (dest_states_malloced)
3246 free (dest_states);
3247 re_node_set_free (&follows);
3248 for (i = 0; i < ndests; ++i)
3249 re_node_set_free (dests_node + i);
3250 if (dests_node_malloced)
3251 free (dests_node);
3252 return NULL;
3254 dest_states_malloced = 1;
3256 dest_states_word = dest_states + ndests;
3257 dest_states_nl = dest_states_word + ndests;
3258 bitset_empty (acceptable);
3260 /* Then build the states for all destinations. */
3261 for (i = 0; i < ndests; ++i)
3263 int next_node;
3264 re_node_set_empty (&follows);
3265 /* Merge the follows of this destination states. */
3266 for (j = 0; j < dests_node[i].nelem; ++j)
3268 next_node = dfa->nexts[dests_node[i].elems[j]];
3269 if (next_node != -1)
3271 err = re_node_set_merge (&follows, dfa->eclosures + next_node);
3272 if (BE (err != REG_NOERROR, 0))
3273 goto out_free;
3276 dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);
3277 if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0))
3278 goto out_free;
3279 /* If the new state has context constraint,
3280 build appropriate states for these contexts. */
3281 if (dest_states[i]->has_constraint)
3283 dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
3284 CONTEXT_WORD);
3285 if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0))
3286 goto out_free;
3288 if (dest_states[i] != dest_states_word[i]
3289 && dfa->mb_cur_max > 1)
3290 state->word_trtable = 1;
3292 dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
3293 CONTEXT_NEWLINE);
3294 if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0))
3295 goto out_free;
3297 else
3299 dest_states_word[i] = dest_states[i];
3300 dest_states_nl[i] = dest_states[i];
3302 bitset_merge (acceptable, dests_ch[i]);
3305 if (!BE (state->word_trtable, 0))
3307 /* We don't care about whether the following character is a word
3308 character, or we are in a single-byte character set so we can
3309 discern by looking at the character code: allocate a
3310 256-entry transition table. */
3311 trtable = (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);
3312 if (BE (trtable == NULL, 0))
3313 goto out_free;
3315 /* For all characters ch...: */
3316 for (i = 0; i < BITSET_UINTS; ++i)
3317 for (ch = i * UINT_BITS, elem = acceptable[i], mask = 1;
3318 elem;
3319 mask <<= 1, elem >>= 1, ++ch)
3320 if (BE (elem & 1, 0))
3322 /* There must be exactly one destination which accepts
3323 character ch. See group_nodes_into_DFAstates. */
3324 for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
3327 /* j-th destination accepts the word character ch. */
3328 if (dfa->word_char[i] & mask)
3329 trtable[ch] = dest_states_word[j];
3330 else
3331 trtable[ch] = dest_states[j];
3334 else
3336 /* We care about whether the following character is a word
3337 character, and we are in a multi-byte character set: discern
3338 by looking at the character code: build two 256-entry
3339 transition tables, one starting at trtable[0] and one
3340 starting at trtable[SBC_MAX]. */
3341 trtable = (re_dfastate_t **) calloc (sizeof (re_dfastate_t *),
3342 2 * SBC_MAX);
3343 if (BE (trtable == NULL, 0))
3344 goto out_free;
3346 /* For all characters ch...: */
3347 for (i = 0; i < BITSET_UINTS; ++i)
3348 for (ch = i * UINT_BITS, elem = acceptable[i], mask = 1;
3349 elem;
3350 mask <<= 1, elem >>= 1, ++ch)
3351 if (BE (elem & 1, 0))
3353 /* There must be exactly one destination which accepts
3354 character ch. See group_nodes_into_DFAstates. */
3355 for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
3358 /* j-th destination accepts the word character ch. */
3359 trtable[ch] = dest_states[j];
3360 trtable[ch + SBC_MAX] = dest_states_word[j];
3364 /* new line */
3365 if (bitset_contain (acceptable, NEWLINE_CHAR))
3367 /* The current state accepts newline character. */
3368 for (j = 0; j < ndests; ++j)
3369 if (bitset_contain (dests_ch[j], NEWLINE_CHAR))
3371 /* k-th destination accepts newline character. */
3372 trtable[NEWLINE_CHAR] = dest_states_nl[j];
3373 if (state->word_trtable)
3374 trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j];
3375 /* There must be only one destination which accepts
3376 newline. See group_nodes_into_DFAstates. */
3377 break;
3381 if (dest_states_malloced)
3382 free (dest_states);
3384 re_node_set_free (&follows);
3385 for (i = 0; i < ndests; ++i)
3386 re_node_set_free (dests_node + i);
3388 if (dests_node_malloced)
3389 free (dests_node);
3391 state->trtable = trtable;
3392 return trtable;
3395 /* Group all nodes belonging to STATE into several destinations.
3396 Then for all destinations, set the nodes belonging to the destination
3397 to DESTS_NODE[i] and set the characters accepted by the destination
3398 to DEST_CH[i]. This function return the number of destinations. */
3400 static int
3401 group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch)
3402 re_dfa_t *dfa;
3403 const re_dfastate_t *state;
3404 re_node_set *dests_node;
3405 bitset *dests_ch;
3407 reg_errcode_t err;
3408 int i, j, k;
3409 int ndests; /* Number of the destinations from `state'. */
3410 bitset accepts; /* Characters a node can accept. */
3411 const re_node_set *cur_nodes = &state->nodes;
3412 bitset_empty (accepts);
3413 ndests = 0;
3415 /* For all the nodes belonging to `state', */
3416 for (i = 0; i < cur_nodes->nelem; ++i)
3418 re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
3419 re_token_type_t type = node->type;
3420 unsigned int constraint = node->constraint;
3422 /* Enumerate all single byte character this node can accept. */
3423 if (type == CHARACTER)
3424 bitset_set (accepts, node->opr.c);
3425 else if (type == SIMPLE_BRACKET)
3427 bitset_merge (accepts, node->opr.sbcset);
3429 else if (type == OP_PERIOD)
3431 #ifdef RE_ENABLE_I18N
3432 if (dfa->mb_cur_max > 1)
3433 bitset_merge (accepts, dfa->sb_char);
3434 else
3435 #endif
3436 bitset_set_all (accepts);
3437 if (!(dfa->syntax & RE_DOT_NEWLINE))
3438 bitset_clear (accepts, '\n');
3439 if (dfa->syntax & RE_DOT_NOT_NULL)
3440 bitset_clear (accepts, '\0');
3442 #ifdef RE_ENABLE_I18N
3443 else if (type == OP_UTF8_PERIOD)
3445 memset (accepts, 255, sizeof (unsigned int) * BITSET_UINTS / 2);
3446 if (!(dfa->syntax & RE_DOT_NEWLINE))
3447 bitset_clear (accepts, '\n');
3448 if (dfa->syntax & RE_DOT_NOT_NULL)
3449 bitset_clear (accepts, '\0');
3451 #endif
3452 else
3453 continue;
3455 /* Check the `accepts' and sift the characters which are not
3456 match it the context. */
3457 if (constraint)
3459 if (constraint & NEXT_NEWLINE_CONSTRAINT)
3461 int accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
3462 bitset_empty (accepts);
3463 if (accepts_newline)
3464 bitset_set (accepts, NEWLINE_CHAR);
3465 else
3466 continue;
3468 if (constraint & NEXT_ENDBUF_CONSTRAINT)
3470 bitset_empty (accepts);
3471 continue;
3474 if (constraint & NEXT_WORD_CONSTRAINT)
3476 unsigned int any_set = 0;
3477 if (type == CHARACTER && !node->word_char)
3479 bitset_empty (accepts);
3480 continue;
3482 #ifdef RE_ENABLE_I18N
3483 if (dfa->mb_cur_max > 1)
3484 for (j = 0; j < BITSET_UINTS; ++j)
3485 any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j]));
3486 else
3487 #endif
3488 for (j = 0; j < BITSET_UINTS; ++j)
3489 any_set |= (accepts[j] &= dfa->word_char[j]);
3490 if (!any_set)
3491 continue;
3493 if (constraint & NEXT_NOTWORD_CONSTRAINT)
3495 unsigned int any_set = 0;
3496 if (type == CHARACTER && node->word_char)
3498 bitset_empty (accepts);
3499 continue;
3501 #ifdef RE_ENABLE_I18N
3502 if (dfa->mb_cur_max > 1)
3503 for (j = 0; j < BITSET_UINTS; ++j)
3504 any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]));
3505 else
3506 #endif
3507 for (j = 0; j < BITSET_UINTS; ++j)
3508 any_set |= (accepts[j] &= ~dfa->word_char[j]);
3509 if (!any_set)
3510 continue;
3514 /* Then divide `accepts' into DFA states, or create a new
3515 state. Above, we make sure that accepts is not empty. */
3516 for (j = 0; j < ndests; ++j)
3518 bitset intersec; /* Intersection sets, see below. */
3519 bitset remains;
3520 /* Flags, see below. */
3521 int has_intersec, not_subset, not_consumed;
3523 /* Optimization, skip if this state doesn't accept the character. */
3524 if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
3525 continue;
3527 /* Enumerate the intersection set of this state and `accepts'. */
3528 has_intersec = 0;
3529 for (k = 0; k < BITSET_UINTS; ++k)
3530 has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];
3531 /* And skip if the intersection set is empty. */
3532 if (!has_intersec)
3533 continue;
3535 /* Then check if this state is a subset of `accepts'. */
3536 not_subset = not_consumed = 0;
3537 for (k = 0; k < BITSET_UINTS; ++k)
3539 not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k];
3540 not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k];
3543 /* If this state isn't a subset of `accepts', create a
3544 new group state, which has the `remains'. */
3545 if (not_subset)
3547 bitset_copy (dests_ch[ndests], remains);
3548 bitset_copy (dests_ch[j], intersec);
3549 err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
3550 if (BE (err != REG_NOERROR, 0))
3551 goto error_return;
3552 ++ndests;
3555 /* Put the position in the current group. */
3556 err = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
3557 if (BE (err < 0, 0))
3558 goto error_return;
3560 /* If all characters are consumed, go to next node. */
3561 if (!not_consumed)
3562 break;
3564 /* Some characters remain, create a new group. */
3565 if (j == ndests)
3567 bitset_copy (dests_ch[ndests], accepts);
3568 err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
3569 if (BE (err != REG_NOERROR, 0))
3570 goto error_return;
3571 ++ndests;
3572 bitset_empty (accepts);
3575 return ndests;
3576 error_return:
3577 for (j = 0; j < ndests; ++j)
3578 re_node_set_free (dests_node + j);
3579 return -1;
3582 #ifdef RE_ENABLE_I18N
3583 /* Check how many bytes the node `dfa->nodes[node_idx]' accepts.
3584 Return the number of the bytes the node accepts.
3585 STR_IDX is the current index of the input string.
3587 This function handles the nodes which can accept one character, or
3588 one collating element like '.', '[a-z]', opposite to the other nodes
3589 can only accept one byte. */
3591 static int
3592 check_node_accept_bytes (dfa, node_idx, input, str_idx)
3593 re_dfa_t *dfa;
3594 int node_idx, str_idx;
3595 const re_string_t *input;
3597 const re_token_t *node = dfa->nodes + node_idx;
3598 int char_len, elem_len;
3599 int i;
3601 if (BE (node->type == OP_UTF8_PERIOD, 0))
3603 unsigned char c = re_string_byte_at (input, str_idx), d;
3604 if (BE (c < 0xc2, 1))
3605 return 0;
3607 if (str_idx + 2 > input->len)
3608 return 0;
3610 d = re_string_byte_at (input, str_idx + 1);
3611 if (c < 0xe0)
3612 return (d < 0x80 || d > 0xbf) ? 0 : 2;
3613 else if (c < 0xf0)
3615 char_len = 3;
3616 if (c == 0xe0 && d < 0xa0)
3617 return 0;
3619 else if (c < 0xf8)
3621 char_len = 4;
3622 if (c == 0xf0 && d < 0x90)
3623 return 0;
3625 else if (c < 0xfc)
3627 char_len = 5;
3628 if (c == 0xf8 && d < 0x88)
3629 return 0;
3631 else if (c < 0xfe)
3633 char_len = 6;
3634 if (c == 0xfc && d < 0x84)
3635 return 0;
3637 else
3638 return 0;
3640 if (str_idx + char_len > input->len)
3641 return 0;
3643 for (i = 1; i < char_len; ++i)
3645 d = re_string_byte_at (input, str_idx + i);
3646 if (d < 0x80 || d > 0xbf)
3647 return 0;
3649 return char_len;
3652 char_len = re_string_char_size_at (input, str_idx);
3653 if (node->type == OP_PERIOD)
3655 if (char_len <= 1)
3656 return 0;
3657 /* FIXME: I don't think this if is needed, as both '\n'
3658 and '\0' are char_len == 1. */
3659 /* '.' accepts any one character except the following two cases. */
3660 if ((!(dfa->syntax & RE_DOT_NEWLINE) &&
3661 re_string_byte_at (input, str_idx) == '\n') ||
3662 ((dfa->syntax & RE_DOT_NOT_NULL) &&
3663 re_string_byte_at (input, str_idx) == '\0'))
3664 return 0;
3665 return char_len;
3668 elem_len = re_string_elem_size_at (input, str_idx);
3669 if (elem_len <= 1 && char_len <= 1)
3670 return 0;
3672 if (node->type == COMPLEX_BRACKET)
3674 const re_charset_t *cset = node->opr.mbcset;
3675 # ifdef _LIBC
3676 const unsigned char *pin = ((char *) re_string_get_buffer (input)
3677 + str_idx);
3678 int j;
3679 uint32_t nrules;
3680 # endif /* _LIBC */
3681 int match_len = 0;
3682 wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)
3683 ? re_string_wchar_at (input, str_idx) : 0);
3685 /* match with multibyte character? */
3686 for (i = 0; i < cset->nmbchars; ++i)
3687 if (wc == cset->mbchars[i])
3689 match_len = char_len;
3690 goto check_node_accept_bytes_match;
3692 /* match with character_class? */
3693 for (i = 0; i < cset->nchar_classes; ++i)
3695 wctype_t wt = cset->char_classes[i];
3696 if (__iswctype (wc, wt))
3698 match_len = char_len;
3699 goto check_node_accept_bytes_match;
3703 # ifdef _LIBC
3704 nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3705 if (nrules != 0)
3707 unsigned int in_collseq = 0;
3708 const int32_t *table, *indirect;
3709 const unsigned char *weights, *extra;
3710 const char *collseqwc;
3711 int32_t idx;
3712 /* This #include defines a local function! */
3713 # include <locale/weight.h>
3715 /* match with collating_symbol? */
3716 if (cset->ncoll_syms)
3717 extra = (const unsigned char *)
3718 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3719 for (i = 0; i < cset->ncoll_syms; ++i)
3721 const unsigned char *coll_sym = extra + cset->coll_syms[i];
3722 /* Compare the length of input collating element and
3723 the length of current collating element. */
3724 if (*coll_sym != elem_len)
3725 continue;
3726 /* Compare each bytes. */
3727 for (j = 0; j < *coll_sym; j++)
3728 if (pin[j] != coll_sym[1 + j])
3729 break;
3730 if (j == *coll_sym)
3732 /* Match if every bytes is equal. */
3733 match_len = j;
3734 goto check_node_accept_bytes_match;
3738 if (cset->nranges)
3740 if (elem_len <= char_len)
3742 collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
3743 in_collseq = __collseq_table_lookup (collseqwc, wc);
3745 else
3746 in_collseq = find_collation_sequence_value (pin, elem_len);
3748 /* match with range expression? */
3749 for (i = 0; i < cset->nranges; ++i)
3750 if (cset->range_starts[i] <= in_collseq
3751 && in_collseq <= cset->range_ends[i])
3753 match_len = elem_len;
3754 goto check_node_accept_bytes_match;
3757 /* match with equivalence_class? */
3758 if (cset->nequiv_classes)
3760 const unsigned char *cp = pin;
3761 table = (const int32_t *)
3762 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
3763 weights = (const unsigned char *)
3764 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
3765 extra = (const unsigned char *)
3766 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
3767 indirect = (const int32_t *)
3768 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
3769 idx = findidx (&cp);
3770 if (idx > 0)
3771 for (i = 0; i < cset->nequiv_classes; ++i)
3773 int32_t equiv_class_idx = cset->equiv_classes[i];
3774 size_t weight_len = weights[idx];
3775 if (weight_len == weights[equiv_class_idx])
3777 int cnt = 0;
3778 while (cnt <= weight_len
3779 && (weights[equiv_class_idx + 1 + cnt]
3780 == weights[idx + 1 + cnt]))
3781 ++cnt;
3782 if (cnt > weight_len)
3784 match_len = elem_len;
3785 goto check_node_accept_bytes_match;
3791 else
3792 # endif /* _LIBC */
3794 /* match with range expression? */
3795 #if __GNUC__ >= 2
3796 wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'};
3797 #else
3798 wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
3799 cmp_buf[2] = wc;
3800 #endif
3801 for (i = 0; i < cset->nranges; ++i)
3803 cmp_buf[0] = cset->range_starts[i];
3804 cmp_buf[4] = cset->range_ends[i];
3805 if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
3806 && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
3808 match_len = char_len;
3809 goto check_node_accept_bytes_match;
3813 check_node_accept_bytes_match:
3814 if (!cset->non_match)
3815 return match_len;
3816 else
3818 if (match_len > 0)
3819 return 0;
3820 else
3821 return (elem_len > char_len) ? elem_len : char_len;
3824 return 0;
3827 # ifdef _LIBC
3828 static unsigned int
3829 find_collation_sequence_value (mbs, mbs_len)
3830 const unsigned char *mbs;
3831 size_t mbs_len;
3833 uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3834 if (nrules == 0)
3836 if (mbs_len == 1)
3838 /* No valid character. Match it as a single byte character. */
3839 const unsigned char *collseq = (const unsigned char *)
3840 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
3841 return collseq[mbs[0]];
3843 return UINT_MAX;
3845 else
3847 int32_t idx;
3848 const unsigned char *extra = (const unsigned char *)
3849 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3851 for (idx = 0; ;)
3853 int mbs_cnt, found = 0;
3854 int32_t elem_mbs_len;
3855 /* Skip the name of collating element name. */
3856 idx = idx + extra[idx] + 1;
3857 elem_mbs_len = extra[idx++];
3858 if (mbs_len == elem_mbs_len)
3860 for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt)
3861 if (extra[idx + mbs_cnt] != mbs[mbs_cnt])
3862 break;
3863 if (mbs_cnt == elem_mbs_len)
3864 /* Found the entry. */
3865 found = 1;
3867 /* Skip the byte sequence of the collating element. */
3868 idx += elem_mbs_len;
3869 /* Adjust for the alignment. */
3870 idx = (idx + 3) & ~3;
3871 /* Skip the collation sequence value. */
3872 idx += sizeof (uint32_t);
3873 /* Skip the wide char sequence of the collating element. */
3874 idx = idx + sizeof (uint32_t) * (extra[idx] + 1);
3875 /* If we found the entry, return the sequence value. */
3876 if (found)
3877 return *(uint32_t *) (extra + idx);
3878 /* Skip the collation sequence value. */
3879 idx += sizeof (uint32_t);
3883 # endif /* _LIBC */
3884 #endif /* RE_ENABLE_I18N */
3886 /* Check whether the node accepts the byte which is IDX-th
3887 byte of the INPUT. */
3889 static int
3890 check_node_accept (mctx, node, idx)
3891 const re_match_context_t *mctx;
3892 const re_token_t *node;
3893 int idx;
3895 re_dfa_t *const dfa = mctx->dfa;
3896 unsigned char ch;
3897 if (node->constraint)
3899 /* The node has constraints. Check whether the current context
3900 satisfies the constraints. */
3901 unsigned int context = re_string_context_at (&mctx->input, idx,
3902 mctx->eflags);
3903 if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
3904 return 0;
3906 ch = re_string_byte_at (&mctx->input, idx);
3907 switch (node->type)
3909 case CHARACTER:
3910 return node->opr.c == ch;
3911 case SIMPLE_BRACKET:
3912 return bitset_contain (node->opr.sbcset, ch);
3913 #ifdef RE_ENABLE_I18N
3914 case OP_UTF8_PERIOD:
3915 if (ch >= 0x80)
3916 return 0;
3917 /* FALLTHROUGH */
3918 #endif
3919 case OP_PERIOD:
3920 return !((ch == '\n' && !(dfa->syntax & RE_DOT_NEWLINE))
3921 || (ch == '\0' && (dfa->syntax & RE_DOT_NOT_NULL)));
3922 default:
3923 return 0;
3927 /* Extend the buffers, if the buffers have run out. */
3929 static reg_errcode_t
3930 extend_buffers (mctx)
3931 re_match_context_t *mctx;
3933 reg_errcode_t ret;
3934 re_string_t *pstr = &mctx->input;
3936 /* Double the lengthes of the buffers. */
3937 ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
3938 if (BE (ret != REG_NOERROR, 0))
3939 return ret;
3941 if (mctx->state_log != NULL)
3943 /* And double the length of state_log. */
3944 /* XXX We have no indication of the size of this buffer. If this
3945 allocation fail we have no indication that the state_log array
3946 does not have the right size. */
3947 re_dfastate_t **new_array = re_realloc (mctx->state_log, re_dfastate_t *,
3948 pstr->bufs_len + 1);
3949 if (BE (new_array == NULL, 0))
3950 return REG_ESPACE;
3951 mctx->state_log = new_array;
3954 /* Then reconstruct the buffers. */
3955 if (pstr->icase)
3957 #ifdef RE_ENABLE_I18N
3958 if (pstr->mb_cur_max > 1)
3960 ret = build_wcs_upper_buffer (pstr);
3961 if (BE (ret != REG_NOERROR, 0))
3962 return ret;
3964 else
3965 #endif /* RE_ENABLE_I18N */
3966 build_upper_buffer (pstr);
3968 else
3970 #ifdef RE_ENABLE_I18N
3971 if (pstr->mb_cur_max > 1)
3972 build_wcs_buffer (pstr);
3973 else
3974 #endif /* RE_ENABLE_I18N */
3976 if (pstr->trans != NULL)
3977 re_string_translate_buffer (pstr);
3980 return REG_NOERROR;
3984 /* Functions for matching context. */
3986 /* Initialize MCTX. */
3988 static reg_errcode_t
3989 match_ctx_init (mctx, eflags, n)
3990 re_match_context_t *mctx;
3991 int eflags, n;
3993 mctx->eflags = eflags;
3994 mctx->match_last = -1;
3995 if (n > 0)
3997 mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
3998 mctx->sub_tops = re_malloc (re_sub_match_top_t *, n);
3999 if (BE (mctx->bkref_ents == NULL || mctx->sub_tops == NULL, 0))
4000 return REG_ESPACE;
4002 /* Already zero-ed by the caller.
4003 else
4004 mctx->bkref_ents = NULL;
4005 mctx->nbkref_ents = 0;
4006 mctx->nsub_tops = 0; */
4007 mctx->abkref_ents = n;
4008 mctx->max_mb_elem_len = 1;
4009 mctx->asub_tops = n;
4010 return REG_NOERROR;
4013 /* Clean the entries which depend on the current input in MCTX.
4014 This function must be invoked when the matcher changes the start index
4015 of the input, or changes the input string. */
4017 static void
4018 match_ctx_clean (mctx)
4019 re_match_context_t *mctx;
4021 match_ctx_free_subtops (mctx);
4022 mctx->nsub_tops = 0;
4023 mctx->nbkref_ents = 0;
4026 /* Free all the memory associated with MCTX. */
4028 static void
4029 match_ctx_free (mctx)
4030 re_match_context_t *mctx;
4032 match_ctx_free_subtops (mctx);
4033 re_free (mctx->sub_tops);
4034 re_free (mctx->bkref_ents);
4037 /* Free all the memory associated with MCTX->SUB_TOPS. */
4039 static void
4040 match_ctx_free_subtops (mctx)
4041 re_match_context_t *mctx;
4043 int st_idx;
4044 for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx)
4046 int sl_idx;
4047 re_sub_match_top_t *top = mctx->sub_tops[st_idx];
4048 for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx)
4050 re_sub_match_last_t *last = top->lasts[sl_idx];
4051 re_free (last->path.array);
4052 re_free (last);
4054 re_free (top->lasts);
4055 if (top->path)
4057 re_free (top->path->array);
4058 re_free (top->path);
4060 free (top);
4064 /* Add a new backreference entry to MCTX.
4065 Note that we assume that caller never call this function with duplicate
4066 entry, and call with STR_IDX which isn't smaller than any existing entry.
4069 static reg_errcode_t
4070 match_ctx_add_entry (mctx, node, str_idx, from, to)
4071 re_match_context_t *mctx;
4072 int node, str_idx, from, to;
4074 if (mctx->nbkref_ents >= mctx->abkref_ents)
4076 struct re_backref_cache_entry* new_entry;
4077 new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry,
4078 mctx->abkref_ents * 2);
4079 if (BE (new_entry == NULL, 0))
4081 re_free (mctx->bkref_ents);
4082 return REG_ESPACE;
4084 mctx->bkref_ents = new_entry;
4085 memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',
4086 sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);
4087 mctx->abkref_ents *= 2;
4089 mctx->bkref_ents[mctx->nbkref_ents].node = node;
4090 mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
4091 mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
4092 mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
4093 mctx->bkref_ents[mctx->nbkref_ents++].flag = 0;
4094 if (mctx->max_mb_elem_len < to - from)
4095 mctx->max_mb_elem_len = to - from;
4096 return REG_NOERROR;
4099 /* Search for the first entry which has the same str_idx.
4100 Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX. */
4102 static int
4103 search_cur_bkref_entry (mctx, str_idx)
4104 re_match_context_t *mctx;
4105 int str_idx;
4107 int left, right, mid;
4108 right = mctx->nbkref_ents;
4109 for (left = 0; left < right;)
4111 mid = (left + right) / 2;
4112 if (mctx->bkref_ents[mid].str_idx < str_idx)
4113 left = mid + 1;
4114 else
4115 right = mid;
4117 return left;
4120 static void
4121 match_ctx_clear_flag (mctx)
4122 re_match_context_t *mctx;
4124 int i;
4125 for (i = 0; i < mctx->nbkref_ents; ++i)
4126 mctx->bkref_ents[i].flag = 0;
4129 /* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches
4130 at STR_IDX. */
4132 static reg_errcode_t
4133 match_ctx_add_subtop (mctx, node, str_idx)
4134 re_match_context_t *mctx;
4135 int node, str_idx;
4137 #ifdef DEBUG
4138 assert (mctx->sub_tops != NULL);
4139 assert (mctx->asub_tops > 0);
4140 #endif
4141 if (BE (mctx->nsub_tops == mctx->asub_tops, 0))
4143 int new_asub_tops = mctx->asub_tops * 2;
4144 re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops,
4145 re_sub_match_top_t *,
4146 new_asub_tops);
4147 if (BE (new_array == NULL, 0))
4148 return REG_ESPACE;
4149 mctx->sub_tops = new_array;
4150 mctx->asub_tops = new_asub_tops;
4152 mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t));
4153 if (BE (mctx->sub_tops[mctx->nsub_tops] == NULL, 0))
4154 return REG_ESPACE;
4155 mctx->sub_tops[mctx->nsub_tops]->node = node;
4156 mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;
4157 return REG_NOERROR;
4160 /* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches
4161 at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP. */
4163 static re_sub_match_last_t *
4164 match_ctx_add_sublast (subtop, node, str_idx)
4165 re_sub_match_top_t *subtop;
4166 int node, str_idx;
4168 re_sub_match_last_t *new_entry;
4169 if (BE (subtop->nlasts == subtop->alasts, 0))
4171 int new_alasts = 2 * subtop->alasts + 1;
4172 re_sub_match_last_t **new_array = re_realloc (subtop->lasts,
4173 re_sub_match_last_t *,
4174 new_alasts);
4175 if (BE (new_array == NULL, 0))
4176 return NULL;
4177 subtop->lasts = new_array;
4178 subtop->alasts = new_alasts;
4180 new_entry = calloc (1, sizeof (re_sub_match_last_t));
4181 if (BE (new_entry != NULL, 1))
4183 subtop->lasts[subtop->nlasts] = new_entry;
4184 new_entry->node = node;
4185 new_entry->str_idx = str_idx;
4186 ++subtop->nlasts;
4188 return new_entry;
4191 static void
4192 sift_ctx_init (sctx, sifted_sts, limited_sts, last_node, last_str_idx,
4193 check_subexp)
4194 re_sift_context_t *sctx;
4195 re_dfastate_t **sifted_sts, **limited_sts;
4196 int last_node, last_str_idx, check_subexp;
4198 sctx->sifted_states = sifted_sts;
4199 sctx->limited_states = limited_sts;
4200 sctx->last_node = last_node;
4201 sctx->last_str_idx = last_str_idx;
4202 sctx->check_subexp = check_subexp;
4203 sctx->cur_bkref = -1;
4204 sctx->cls_subexp_idx = -1;
4205 re_node_set_init_empty (&sctx->limits);