posix/regexec.c

   1 /* Extended regular expression matching and search library.
   2    Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
   3    This file is part of the GNU C Library.
   4    Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
   5
   6    The GNU C Library is free software; you can redistribute it and/or
   7    modify it under the terms of the GNU Lesser General Public
   8    License as published by the Free Software Foundation; either
   9    version 2.1 of the License, or (at your option) any later version.
  10
  11    The GNU C Library is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14    Lesser General Public License for more details.
  15
  16    You should have received a copy of the GNU Lesser General Public
  17    License along with the GNU C Library; if not, write to the Free
  18    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
  19    02111-1307 USA.  */
  20
  21 static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
  22                                      int n) internal_function;
  23 static void match_ctx_clean (re_match_context_t *mctx) internal_function;
  24 static void match_ctx_free (re_match_context_t *cache) internal_function;
  25 static void match_ctx_free_subtops (re_match_context_t *mctx)
  26      internal_function;
  27 static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, int node,
  28                                           int str_idx, int from, int to)
  29      internal_function;
  30 static int search_cur_bkref_entry (re_match_context_t *mctx, int str_idx)
  31      internal_function;
  32 static void match_ctx_clear_flag (re_match_context_t *mctx) internal_function;
  33 static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, int node,
  34                                            int str_idx) internal_function;
  35 static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
  36                                                    int node, int str_idx)
  37      internal_function;
  38 static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
  39                            re_dfastate_t **limited_sts, int last_node,
  40                            int last_str_idx, int check_subexp)
  41      internal_function;
  42 static reg_errcode_t re_search_internal (const regex_t *preg,
  43                                          const char *string, int length,
  44                                          int start, int range, int stop,
  45                                          size_t nmatch, regmatch_t pmatch[],
  46                                          int eflags) internal_function;
  47 static int re_search_2_stub (struct re_pattern_buffer *bufp,
  48                              const char *string1, int length1,
  49                              const char *string2, int length2,
  50                              int start, int range, struct re_registers *regs,
  51                              int stop, int ret_len) internal_function;
  52 static int re_search_stub (struct re_pattern_buffer *bufp,
  53                            const char *string, int length, int start,
  54                            int range, int stop, struct re_registers *regs,
  55                            int ret_len) internal_function;
  56 static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,
  57                               int nregs, int regs_allocated) internal_function;
  58 static inline re_dfastate_t *acquire_init_state_context
  59      (reg_errcode_t *err, const re_match_context_t *mctx, int idx)
  60      __attribute ((always_inline)) internal_function;
  61 static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx)
  62      internal_function;
  63 static int check_matching (re_match_context_t *mctx, int fl_longest_match,
  64                            int *p_match_first)
  65      internal_function;
  66 static int check_halt_node_context (const re_dfa_t *dfa, int node,
  67                                     unsigned int context) internal_function;
  68 static int check_halt_state_context (const re_match_context_t *mctx,
  69                                      const re_dfastate_t *state, int idx)
  70      internal_function;
  71 static void update_regs (re_dfa_t *dfa, regmatch_t *pmatch,
  72                          regmatch_t *prev_idx_match, int cur_node,
  73                          int cur_idx, int nmatch) internal_function;
  74 static int proceed_next_node (const re_match_context_t *mctx,
  75                               int nregs, regmatch_t *regs,
  76                               int *pidx, int node, re_node_set *eps_via_nodes,
  77                               struct re_fail_stack_t *fs) internal_function;
  78 static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
  79                                       int str_idx, int *dests, int nregs,
  80                                       regmatch_t *regs,
  81                                       re_node_set *eps_via_nodes) internal_function;
  82 static int pop_fail_stack (struct re_fail_stack_t *fs, int *pidx, int nregs,
  83                            regmatch_t *regs, re_node_set *eps_via_nodes) internal_function;
  84 static reg_errcode_t set_regs (const regex_t *preg,
  85                                const re_match_context_t *mctx,
  86                                size_t nmatch, regmatch_t *pmatch,
  87                                int fl_backtrack) internal_function;
  88 static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs) internal_function;
  89
  90 #ifdef RE_ENABLE_I18N
  91 static int sift_states_iter_mb (const re_match_context_t *mctx,
  92                                 re_sift_context_t *sctx,
  93                                 int node_idx, int str_idx, int max_str_idx) internal_function;
  94 #endif /* RE_ENABLE_I18N */
  95 static reg_errcode_t sift_states_backward (re_match_context_t *mctx,
  96                                            re_sift_context_t *sctx) internal_function;
  97 static reg_errcode_t update_cur_sifted_state (re_match_context_t *mctx,
  98                                               re_sift_context_t *sctx,
  99                                               int str_idx,
 100                                               re_node_set *dest_nodes) internal_function;
 101 static reg_errcode_t add_epsilon_src_nodes (re_dfa_t *dfa,
 102                                             re_node_set *dest_nodes,
 103                                             const re_node_set *candidates) internal_function;
 104 static reg_errcode_t sub_epsilon_src_nodes (re_dfa_t *dfa, int node,
 105                                             re_node_set *dest_nodes,
 106                                             const re_node_set *and_nodes) internal_function;
 107 static int check_dst_limits (re_match_context_t *mctx, re_node_set *limits,
 108                              int dst_node, int dst_idx, int src_node,
 109                              int src_idx) internal_function;
 110 static int check_dst_limits_calc_pos (re_match_context_t *mctx,
 111                                       int limit, re_node_set *eclosures,
 112                                       int subexp_idx, int node, int str_idx) internal_function;
 113 static reg_errcode_t check_subexp_limits (re_dfa_t *dfa,
 114                                           re_node_set *dest_nodes,
 115                                           const re_node_set *candidates,
 116                                           re_node_set *limits,
 117                                           struct re_backref_cache_entry *bkref_ents,
 118                                           int str_idx) internal_function;
 119 static reg_errcode_t sift_states_bkref (re_match_context_t *mctx,
 120                                         re_sift_context_t *sctx,
 121                                         int str_idx, re_node_set *dest_nodes) internal_function;
 122 static reg_errcode_t clean_state_log_if_needed (re_match_context_t *mctx,
 123                                                 int next_state_log_idx) internal_function;
 124 static reg_errcode_t merge_state_array (re_dfa_t *dfa, re_dfastate_t **dst,
 125                                         re_dfastate_t **src, int num) internal_function;
 126 static re_dfastate_t *transit_state (reg_errcode_t *err,
 127                                      re_match_context_t *mctx,
 128                                      re_dfastate_t *state) internal_function;
 129 static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx,
 130                                                 re_node_set *cur_nodes,
 131                                                 int str_idx) internal_function;
 132 #if 0
 133 static re_dfastate_t *transit_state_sb (reg_errcode_t *err,
 134                                         re_match_context_t *mctx,
 135                                         re_dfastate_t *pstate) internal_function;
 136 #endif
 137 #ifdef RE_ENABLE_I18N
 138 static reg_errcode_t transit_state_mb (re_match_context_t *mctx,
 139                                        re_dfastate_t *pstate) internal_function;
 140 #endif /* RE_ENABLE_I18N */
 141 static reg_errcode_t transit_state_bkref (re_match_context_t *mctx,
 142                                           const re_node_set *nodes) internal_function;
 143 static reg_errcode_t get_subexp (re_match_context_t *mctx,
 144                                  int bkref_node, int bkref_str_idx) internal_function;
 145 static reg_errcode_t get_subexp_sub (re_match_context_t *mctx,
 146                                      const re_sub_match_top_t *sub_top,
 147                                      re_sub_match_last_t *sub_last,
 148                                      int bkref_node, int bkref_str) internal_function;
 149 static int find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
 150                              int subexp_idx, int type) internal_function;
 151 static reg_errcode_t check_arrival (re_match_context_t *mctx,
 152                                     state_array_t *path, int top_node,
 153                                     int top_str, int last_node, int last_str,
 154                                     int type) internal_function;
 155 static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx,
 156                                                    int str_idx,
 157                                                    re_node_set *cur_nodes,
 158                                                    re_node_set *next_nodes) internal_function;
 159 static reg_errcode_t check_arrival_expand_ecl (re_dfa_t *dfa,
 160                                                re_node_set *cur_nodes,
 161                                                int ex_subexp, int type) internal_function;
 162 static reg_errcode_t check_arrival_expand_ecl_sub (re_dfa_t *dfa,
 163                                                    re_node_set *dst_nodes,
 164                                                    int target, int ex_subexp,
 165                                                    int type) internal_function;
 166 static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx,
 167                                          re_node_set *cur_nodes, int cur_str,
 168                                          int last_str, int subexp_num,
 169                                          int type) internal_function;
 170 static re_dfastate_t **build_trtable (re_dfa_t *dfa,
 171                                       re_dfastate_t *state) internal_function;
 172 #ifdef RE_ENABLE_I18N
 173 static int check_node_accept_bytes (re_dfa_t *dfa, int node_idx,
 174                                     const re_string_t *input, int idx) internal_function;
 175 # ifdef _LIBC
 176 static unsigned int find_collation_sequence_value (const unsigned char *mbs,
 177                                                    size_t name_len) internal_function;
 178 # endif /* _LIBC */
 179 #endif /* RE_ENABLE_I18N */
 180 static int group_nodes_into_DFAstates (re_dfa_t *dfa,
 181                                        const re_dfastate_t *state,
 182                                        re_node_set *states_node,
 183                                        bitset *states_ch) internal_function;
 184 static int check_node_accept (const re_match_context_t *mctx,
 185                               const re_token_t *node, int idx) internal_function;
 186 static reg_errcode_t extend_buffers (re_match_context_t *mctx) internal_function;
 187 \f
 188 /* Entry point for POSIX code.  */
 189
 190 /* regexec searches for a given pattern, specified by PREG, in the
 191    string STRING.
 192
 193    If NMATCH is zero or REG_NOSUB was set in the cflags argument to
 194    `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
 195    least NMATCH elements, and we set them to the offsets of the
 196    corresponding matched substrings.
 197
 198    EFLAGS specifies `execution flags' which affect matching: if
 199    REG_NOTBOL is set, then ^ does not match at the beginning of the
 200    string; if REG_NOTEOL is set, then $ does not match at the end.
 201
 202    We return 0 if we find a match and REG_NOMATCH if not.  */
 203
 204 int
 205 regexec (preg, string, nmatch, pmatch, eflags)
 206     const regex_t *__restrict preg;
 207     const char *__restrict string;
 208     size_t nmatch;
 209     regmatch_t pmatch[];
 210     int eflags;
 211 {
 212   reg_errcode_t err;
 213   int length = strlen (string);
 214   if (preg->no_sub)
 215     err = re_search_internal (preg, string, length, 0, length, length, 0,
 216                               NULL, eflags);
 217   else
 218     err = re_search_internal (preg, string, length, 0, length, length, nmatch,
 219                               pmatch, eflags);
 220   return err != REG_NOERROR;
 221 }
 222 #ifdef _LIBC
 223 weak_alias (__regexec, regexec)
 224 #endif
 225
 226 /* Entry points for GNU code.  */
 227
 228 /* re_match, re_search, re_match_2, re_search_2
 229
 230    The former two functions operate on STRING with length LENGTH,
 231    while the later two operate on concatenation of STRING1 and STRING2
 232    with lengths LENGTH1 and LENGTH2, respectively.
 233
 234    re_match() matches the compiled pattern in BUFP against the string,
 235    starting at index START.
 236
 237    re_search() first tries matching at index START, then it tries to match
 238    starting from index START + 1, and so on.  The last start position tried
 239    is START + RANGE.  (Thus RANGE = 0 forces re_search to operate the same
 240    way as re_match().)
 241
 242    The parameter STOP of re_{match,search}_2 specifies that no match exceeding
 243    the first STOP characters of the concatenation of the strings should be
 244    concerned.
 245
 246    If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match
 247    and all groups is stroed in REGS.  (For the "_2" variants, the offsets are
 248    computed relative to the concatenation, not relative to the individual
 249    strings.)
 250
 251    On success, re_match* functions return the length of the match, re_search*
 252    return the position of the start of the match.  Return value -1 means no
 253    match was found and -2 indicates an internal error.  */
 254
 255 int
 256 re_match (bufp, string, length, start, regs)
 257     struct re_pattern_buffer *bufp;
 258     const char *string;
 259     int length, start;
 260     struct re_registers *regs;
 261 {
 262   return re_search_stub (bufp, string, length, start, 0, length, regs, 1);
 263 }
 264 #ifdef _LIBC
 265 weak_alias (__re_match, re_match)
 266 #endif
 267
 268 int
 269 re_search (bufp, string, length, start, range, regs)
 270     struct re_pattern_buffer *bufp;
 271     const char *string;
 272     int length, start, range;
 273     struct re_registers *regs;
 274 {
 275   return re_search_stub (bufp, string, length, start, range, length, regs, 0);
 276 }
 277 #ifdef _LIBC
 278 weak_alias (__re_search, re_search)
 279 #endif
 280
 281 int
 282 re_match_2 (bufp, string1, length1, string2, length2, start, regs, stop)
 283     struct re_pattern_buffer *bufp;
 284     const char *string1, *string2;
 285     int length1, length2, start, stop;
 286     struct re_registers *regs;
 287 {
 288   return re_search_2_stub (bufp, string1, length1, string2, length2,
 289                            start, 0, regs, stop, 1);
 290 }
 291 #ifdef _LIBC
 292 weak_alias (__re_match_2, re_match_2)
 293 #endif
 294
 295 int
 296 re_search_2 (bufp, string1, length1, string2, length2, start, range, regs, stop)
 297     struct re_pattern_buffer *bufp;
 298     const char *string1, *string2;
 299     int length1, length2, start, range, stop;
 300     struct re_registers *regs;
 301 {
 302   return re_search_2_stub (bufp, string1, length1, string2, length2,
 303                            start, range, regs, stop, 0);
 304 }
 305 #ifdef _LIBC
 306 weak_alias (__re_search_2, re_search_2)
 307 #endif
 308
 309 static int
 310 re_search_2_stub (bufp, string1, length1, string2, length2, start, range, regs,
 311                   stop, ret_len)
 312     struct re_pattern_buffer *bufp;
 313     const char *string1, *string2;
 314     int length1, length2, start, range, stop, ret_len;
 315     struct re_registers *regs;
 316 {
 317   const char *str;
 318   int rval;
 319   int len = length1 + length2;
 320   int free_str = 0;
 321
 322   if (BE (length1 < 0 || length2 < 0 || stop < 0, 0))
 323     return -2;
 324
 325   /* Concatenate the strings.  */
 326   if (length2 > 0)
 327     if (length1 > 0)
 328       {
 329         char *s = re_malloc (char, len);
 330
 331         if (BE (s == NULL, 0))
 332           return -2;
 333         memcpy (s, string1, length1);
 334         memcpy (s + length1, string2, length2);
 335         str = s;
 336         free_str = 1;
 337       }
 338     else
 339       str = string2;
 340   else
 341     str = string1;
 342
 343   rval = re_search_stub (bufp, str, len, start, range, stop, regs,
 344                          ret_len);
 345   if (free_str)
 346     re_free ((char *) str);
 347   return rval;
 348 }
 349
 350 /* The parameters have the same meaning as those of re_search.
 351    Additional parameters:
 352    If RET_LEN is nonzero the length of the match is returned (re_match style);
 353    otherwise the position of the match is returned.  */
 354
 355 static int
 356 re_search_stub (bufp, string, length, start, range, stop, regs, ret_len)
 357     struct re_pattern_buffer *bufp;
 358     const char *string;
 359     int length, start, range, stop, ret_len;
 360     struct re_registers *regs;
 361 {
 362   reg_errcode_t result;
 363   regmatch_t *pmatch;
 364   int nregs, rval;
 365   int eflags = 0;
 366
 367   /* Check for out-of-range.  */
 368   if (BE (start < 0 || start > length, 0))
 369     return -1;
 370   if (BE (start + range > length, 0))
 371     range = length - start;
 372   else if (BE (start + range < 0, 0))
 373     range = -start;
 374
 375   eflags |= (bufp->not_bol) ? REG_NOTBOL : 0;
 376   eflags |= (bufp->not_eol) ? REG_NOTEOL : 0;
 377
 378   /* Compile fastmap if we haven't yet.  */
 379   if (range > 0 && bufp->fastmap != NULL && !bufp->fastmap_accurate)
 380     re_compile_fastmap (bufp);
 381
 382   if (BE (bufp->no_sub, 0))
 383     regs = NULL;
 384
 385   /* We need at least 1 register.  */
 386   if (regs == NULL)
 387     nregs = 1;
 388   else if (BE (bufp->regs_allocated == REGS_FIXED &&
 389                regs->num_regs < bufp->re_nsub + 1, 0))
 390     {
 391       nregs = regs->num_regs;
 392       if (BE (nregs < 1, 0))
 393         {
 394           /* Nothing can be copied to regs.  */
 395           regs = NULL;
 396           nregs = 1;
 397         }
 398     }
 399   else
 400     nregs = bufp->re_nsub + 1;
 401   pmatch = re_malloc (regmatch_t, nregs);
 402   if (BE (pmatch == NULL, 0))
 403     return -2;
 404
 405   result = re_search_internal (bufp, string, length, start, range, stop,
 406                                nregs, pmatch, eflags);
 407
 408   rval = 0;
 409
 410   /* I hope we needn't fill ther regs with -1's when no match was found.  */
 411   if (result != REG_NOERROR)
 412     rval = -1;
 413   else if (regs != NULL)
 414     {
 415       /* If caller wants register contents data back, copy them.  */
 416       bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,
 417                                            bufp->regs_allocated);
 418       if (BE (bufp->regs_allocated == REGS_UNALLOCATED, 0))
 419         rval = -2;
 420     }
 421
 422   if (BE (rval == 0, 1))
 423     {
 424       if (ret_len)
 425         {
 426           assert (pmatch[0].rm_so == start);
 427           rval = pmatch[0].rm_eo - start;
 428         }
 429       else
 430         rval = pmatch[0].rm_so;
 431     }
 432   re_free (pmatch);
 433   return rval;
 434 }
 435
 436 static unsigned
 437 re_copy_regs (regs, pmatch, nregs, regs_allocated)
 438     struct re_registers *regs;
 439     regmatch_t *pmatch;
 440     int nregs, regs_allocated;
 441 {
 442   int rval = REGS_REALLOCATE;
 443   int i;
 444   int need_regs = nregs + 1;
 445   /* We need one extra element beyond `num_regs' for the `-1' marker GNU code
 446      uses.  */
 447
 448   /* Have the register data arrays been allocated?  */
 449   if (regs_allocated == REGS_UNALLOCATED)
 450     { /* No.  So allocate them with malloc.  */
 451       regs->start = re_malloc (regoff_t, need_regs);
 452       regs->end = re_malloc (regoff_t, need_regs);
 453       if (BE (regs->start == NULL, 0) || BE (regs->end == NULL, 0))
 454         return REGS_UNALLOCATED;
 455       regs->num_regs = need_regs;
 456     }
 457   else if (regs_allocated == REGS_REALLOCATE)
 458     { /* Yes.  If we need more elements than were already
 459          allocated, reallocate them.  If we need fewer, just
 460          leave it alone.  */
 461       if (BE (need_regs > regs->num_regs, 0))
 462         {
 463           regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs);
 464           regoff_t *new_end = re_realloc (regs->end, regoff_t, need_regs);
 465           if (BE (new_start == NULL, 0) || BE (new_end == NULL, 0))
 466             return REGS_UNALLOCATED;
 467           regs->start = new_start;
 468           regs->end = new_end;
 469           regs->num_regs = need_regs;
 470         }
 471     }
 472   else
 473     {
 474       assert (regs_allocated == REGS_FIXED);
 475       /* This function may not be called with REGS_FIXED and nregs too big.  */
 476       assert (regs->num_regs >= nregs);
 477       rval = REGS_FIXED;
 478     }
 479
 480   /* Copy the regs.  */
 481   for (i = 0; i < nregs; ++i)
 482     {
 483       regs->start[i] = pmatch[i].rm_so;
 484       regs->end[i] = pmatch[i].rm_eo;
 485     }
 486   for ( ; i < regs->num_regs; ++i)
 487     regs->start[i] = regs->end[i] = -1;
 488
 489   return rval;
 490 }
 491
 492 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
 493    ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
 494    this memory for recording register information.  STARTS and ENDS
 495    must be allocated using the malloc library routine, and must each
 496    be at least NUM_REGS * sizeof (regoff_t) bytes long.
 497
 498    If NUM_REGS == 0, then subsequent matches should allocate their own
 499    register data.
 500
 501    Unless this function is called, the first search or match using
 502    PATTERN_BUFFER will allocate its own register data, without
 503    freeing the old data.  */
 504
 505 void
 506 re_set_registers (bufp, regs, num_regs, starts, ends)
 507     struct re_pattern_buffer *bufp;
 508     struct re_registers *regs;
 509     unsigned num_regs;
 510     regoff_t *starts, *ends;
 511 {
 512   if (num_regs)
 513     {
 514       bufp->regs_allocated = REGS_REALLOCATE;
 515       regs->num_regs = num_regs;
 516       regs->start = starts;
 517       regs->end = ends;
 518     }
 519   else
 520     {
 521       bufp->regs_allocated = REGS_UNALLOCATED;
 522       regs->num_regs = 0;
 523       regs->start = regs->end = (regoff_t *) 0;
 524     }
 525 }
 526 #ifdef _LIBC
 527 weak_alias (__re_set_registers, re_set_registers)
 528 #endif
 529 \f
 530 /* Entry points compatible with 4.2 BSD regex library.  We don't define
 531    them unless specifically requested.  */
 532
 533 #if defined _REGEX_RE_COMP || defined _LIBC
 534 int
 535 # ifdef _LIBC
 536 weak_function
 537 # endif
 538 re_exec (s)
 539      const char *s;
 540 {
 541   return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);
 542 }
 543 #endif /* _REGEX_RE_COMP */
 544 \f
 545 static re_node_set empty_set;
 546
 547 /* Internal entry point.  */
 548
 549 /* Searches for a compiled pattern PREG in the string STRING, whose
 550    length is LENGTH.  NMATCH, PMATCH, and EFLAGS have the same
 551    mingings with regexec.  START, and RANGE have the same meanings
 552    with re_search.
 553    Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
 554    otherwise return the error code.
 555    Note: We assume front end functions already check ranges.
 556    (START + RANGE >= 0 && START + RANGE <= LENGTH)  */
 557
 558 static reg_errcode_t
 559 re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch,
 560                     eflags)
 561     const regex_t *preg;
 562     const char *string;
 563     int length, start, range, stop, eflags;
 564     size_t nmatch;
 565     regmatch_t pmatch[];
 566 {
 567   reg_errcode_t err;
 568   re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
 569   int left_lim, right_lim, incr;
 570   int fl_longest_match, match_first, match_last = -1;
 571   int fast_translate, sb;
 572 #if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
 573   re_match_context_t mctx = { .dfa = dfa };
 574 #else
 575   re_match_context_t mctx;
 576 #endif
 577   char *fastmap = ((preg->fastmap != NULL && preg->fastmap_accurate
 578                     && range && !preg->can_be_null) ? preg->fastmap : NULL);
 579
 580 #if !(defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L))
 581   memset (&mctx, '\0', sizeof (re_match_context_t));
 582   mctx.dfa = dfa;
 583 #endif
 584
 585   /* Check if the DFA haven't been compiled.  */
 586   if (BE (preg->used == 0 || dfa->init_state == NULL
 587           || dfa->init_state_word == NULL || dfa->init_state_nl == NULL
 588           || dfa->init_state_begbuf == NULL, 0))
 589     return REG_NOMATCH;
 590
 591 #ifdef DEBUG
 592   /* We assume front-end functions already check them.  */
 593   assert (start + range >= 0 && start + range <= length);
 594 #endif
 595
 596   /* If initial states with non-begbuf contexts have no elements,
 597      the regex must be anchored.  If preg->newline_anchor is set,
 598      we'll never use init_state_nl, so do not check it.  */
 599   if (dfa->init_state->nodes.nelem == 0
 600       && dfa->init_state_word->nodes.nelem == 0
 601       && (dfa->init_state_nl->nodes.nelem == 0
 602           || !preg->newline_anchor))
 603     {
 604       if (start != 0 && start + range != 0)
 605         return REG_NOMATCH;
 606       start = range = 0;
 607     }
 608
 609   re_node_set_init_empty (&empty_set);
 610
 611   /* We must check the longest matching, if nmatch > 0.  */
 612   fl_longest_match = (nmatch != 0 || dfa->nbackref);
 613
 614   err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1,
 615                             preg->translate, preg->syntax & RE_ICASE, dfa);
 616   if (BE (err != REG_NOERROR, 0))
 617     goto free_return;
 618   mctx.input.stop = stop;
 619   mctx.input.raw_stop = stop;
 620   mctx.input.newline_anchor = preg->newline_anchor;
 621
 622   err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2);
 623   if (BE (err != REG_NOERROR, 0))
 624     goto free_return;
 625
 626   /* We will log all the DFA states through which the dfa pass,
 627      if nmatch > 1, or this dfa has "multibyte node", which is a
 628      back-reference or a node which can accept multibyte character or
 629      multi character collating element.  */
 630   if (nmatch > 1 || dfa->has_mb_node)
 631     {
 632       mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1);
 633       if (BE (mctx.state_log == NULL, 0))
 634         {
 635           err = REG_ESPACE;
 636           goto free_return;
 637         }
 638     }
 639   else
 640     mctx.state_log = NULL;
 641
 642   match_first = start;
 643   mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
 644                            : CONTEXT_NEWLINE | CONTEXT_BEGBUF;
 645
 646   /* Check incrementally whether of not the input string match.  */
 647   incr = (range < 0) ? -1 : 1;
 648   left_lim = (range < 0) ? start + range : start;
 649   right_lim = (range < 0) ? start : start + range;
 650   sb = dfa->mb_cur_max == 1;
 651   fast_translate = sb || !(preg->syntax & RE_ICASE || preg->translate);
 652
 653   for (;;)
 654     {
 655       /* At first get the current byte from input string.  */
 656       if (fastmap)
 657         {
 658           if (BE (fast_translate, 1))
 659             {
 660               unsigned RE_TRANSLATE_TYPE t
 661                 = (unsigned RE_TRANSLATE_TYPE) preg->translate;
 662               if (BE (range >= 0, 1))
 663                 {
 664                   if (BE (t != NULL, 0))
 665                     {
 666                       while (BE (match_first < right_lim, 1)
 667                              && !fastmap[t[(unsigned char) string[match_first]]])
 668                         ++match_first;
 669                     }
 670                   else
 671                     {
 672                       while (BE (match_first < right_lim, 1)
 673                              && !fastmap[(unsigned char) string[match_first]])
 674                         ++match_first;
 675                     }
 676                   if (BE (match_first == right_lim, 0))
 677                     {
 678                       int ch = match_first >= length
 679                                ? 0 : (unsigned char) string[match_first];
 680                       if (!fastmap[t ? t[ch] : ch])
 681                         break;
 682                     }
 683                 }
 684               else
 685                 {
 686                   while (match_first >= left_lim)
 687                     {
 688                       int ch = match_first >= length
 689                                ? 0 : (unsigned char) string[match_first];
 690                       if (fastmap[t ? t[ch] : ch])
 691                         break;
 692                       --match_first;
 693                     }
 694                   if (match_first < left_lim)
 695                     break;
 696                 }
 697             }
 698           else
 699             {
 700               int ch;
 701
 702               do
 703                 {
 704                   /* In this case, we can't determine easily the current byte,
 705                      since it might be a component byte of a multibyte
 706                      character.  Then we use the constructed buffer
 707                      instead.  */
 708                   /* If MATCH_FIRST is out of the valid range, reconstruct the
 709                      buffers.  */
 710                   if (mctx.input.raw_mbs_idx + mctx.input.valid_raw_len
 711                       <= match_first
 712                       || match_first < mctx.input.raw_mbs_idx)
 713                     {
 714                       err = re_string_reconstruct (&mctx.input, match_first,
 715                                                    eflags);
 716                       if (BE (err != REG_NOERROR, 0))
 717                         goto free_return;
 718                     }
 719                   /* If MATCH_FIRST is out of the buffer, leave it as '\0'.
 720                      Note that MATCH_FIRST must not be smaller than 0.  */
 721                   ch = ((match_first >= length) ? 0
 722                        : re_string_byte_at (&mctx.input,
 723                                             match_first
 724                                             - mctx.input.raw_mbs_idx));
 725                   if (fastmap[ch])
 726                     break;
 727                   match_first += incr;
 728                 }
 729               while (match_first >= left_lim && match_first <= right_lim);
 730               if (! fastmap[ch])
 731                 break;
 732             }
 733         }
 734
 735       /* Reconstruct the buffers so that the matcher can assume that
 736          the matching starts from the beginning of the buffer.  */
 737       err = re_string_reconstruct (&mctx.input, match_first, eflags);
 738       if (BE (err != REG_NOERROR, 0))
 739         goto free_return;
 740 #ifdef RE_ENABLE_I18N
 741      /* Eliminate it when it is a component of a multibyte character
 742          and isn't the head of a multibyte character.  */
 743       if (sb || re_string_first_byte (&mctx.input, 0))
 744 #endif
 745         {
 746           /* It seems to be appropriate one, then use the matcher.  */
 747           /* We assume that the matching starts from 0.  */
 748           mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
 749           match_last = check_matching (&mctx, fl_longest_match,
 750                                        range >= 0 ? &match_first : NULL);
 751           if (match_last != -1)
 752             {
 753               if (BE (match_last == -2, 0))
 754                 {
 755                   err = REG_ESPACE;
 756                   goto free_return;
 757                 }
 758               else
 759                 {
 760                   mctx.match_last = match_last;
 761                   if ((!preg->no_sub && nmatch > 1) || dfa->nbackref)
 762                     {
 763                       re_dfastate_t *pstate = mctx.state_log[match_last];
 764                       mctx.last_node = check_halt_state_context (&mctx, pstate,
 765                                                                  match_last);
 766                     }
 767                   if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match)
 768                       || dfa->nbackref)
 769                     {
 770                       err = prune_impossible_nodes (&mctx);
 771                       if (err == REG_NOERROR)
 772                         break;
 773                       if (BE (err != REG_NOMATCH, 0))
 774                         goto free_return;
 775                       match_last = -1;
 776                     }
 777                   else
 778                     break; /* We found a match.  */
 779                 }
 780             }
 781           match_ctx_clean (&mctx);
 782         }
 783       /* Update counter.  */
 784       match_first += incr;
 785       if (match_first < left_lim || right_lim < match_first)
 786         break;
 787     }
 788
 789   /* Set pmatch[] if we need.  */
 790   if (match_last != -1 && nmatch > 0)
 791     {
 792       int reg_idx;
 793
 794       /* Initialize registers.  */
 795       for (reg_idx = 1; reg_idx < nmatch; ++reg_idx)
 796         pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;
 797
 798       /* Set the points where matching start/end.  */
 799       pmatch[0].rm_so = 0;
 800       pmatch[0].rm_eo = mctx.match_last;
 801
 802       if (!preg->no_sub && nmatch > 1)
 803         {
 804           err = set_regs (preg, &mctx, nmatch, pmatch,
 805                           dfa->has_plural_match && dfa->nbackref > 0);
 806           if (BE (err != REG_NOERROR, 0))
 807             goto free_return;
 808         }
 809
 810       /* At last, add the offset to the each registers, since we slided
 811          the buffers so that we could assume that the matching starts
 812          from 0.  */
 813       for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
 814         if (pmatch[reg_idx].rm_so != -1)
 815           {
 816 #ifdef RE_ENABLE_I18N
 817             if (BE (mctx.input.offsets_needed != 0, 0))
 818               {
 819                 if (pmatch[reg_idx].rm_so == mctx.input.valid_len)
 820                   pmatch[reg_idx].rm_so += mctx.input.valid_raw_len - mctx.input.valid_len;
 821                 else
 822                   pmatch[reg_idx].rm_so = mctx.input.offsets[pmatch[reg_idx].rm_so];
 823                 if (pmatch[reg_idx].rm_eo == mctx.input.valid_len)
 824                   pmatch[reg_idx].rm_eo += mctx.input.valid_raw_len - mctx.input.valid_len;
 825                 else
 826                   pmatch[reg_idx].rm_eo = mctx.input.offsets[pmatch[reg_idx].rm_eo];
 827               }
 828 #else
 829             assert (mctx.input.offsets_needed == 0);
 830 #endif
 831             pmatch[reg_idx].rm_so += match_first;
 832             pmatch[reg_idx].rm_eo += match_first;
 833           }
 834     }
 835   err = (match_last == -1) ? REG_NOMATCH : REG_NOERROR;
 836  free_return:
 837   re_free (mctx.state_log);
 838   if (dfa->nbackref)
 839     match_ctx_free (&mctx);
 840   re_string_destruct (&mctx.input);
 841   return err;
 842 }
 843
 844 static reg_errcode_t
 845 prune_impossible_nodes (mctx)
 846      re_match_context_t *mctx;
 847 {
 848   re_dfa_t *const dfa = mctx->dfa;
 849   int halt_node, match_last;
 850   reg_errcode_t ret;
 851   re_dfastate_t **sifted_states;
 852   re_dfastate_t **lim_states = NULL;
 853   re_sift_context_t sctx;
 854 #ifdef DEBUG
 855   assert (mctx->state_log != NULL);
 856 #endif
 857   match_last = mctx->match_last;
 858   halt_node = mctx->last_node;
 859   sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
 860   if (BE (sifted_states == NULL, 0))
 861     {
 862       ret = REG_ESPACE;
 863       goto free_return;
 864     }
 865   if (dfa->nbackref)
 866     {
 867       lim_states = re_malloc (re_dfastate_t *, match_last + 1);
 868       if (BE (lim_states == NULL, 0))
 869         {
 870           ret = REG_ESPACE;
 871           goto free_return;
 872         }
 873       while (1)
 874         {
 875           memset (lim_states, '\0',
 876                   sizeof (re_dfastate_t *) * (match_last + 1));
 877           match_ctx_clear_flag (mctx);
 878           sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
 879                          match_last, 0);
 880           ret = sift_states_backward (mctx, &sctx);
 881           re_node_set_free (&sctx.limits);
 882           if (BE (ret != REG_NOERROR, 0))
 883               goto free_return;
 884           if (sifted_states[0] != NULL || lim_states[0] != NULL)
 885             break;
 886           do
 887             {
 888               --match_last;
 889               if (match_last < 0)
 890                 {
 891                   ret = REG_NOMATCH;
 892                   goto free_return;
 893                 }
 894             } while (mctx->state_log[match_last] == NULL
 895                      || !mctx->state_log[match_last]->halt);
 896           halt_node = check_halt_state_context (mctx,
 897                                                 mctx->state_log[match_last],
 898                                                 match_last);
 899         }
 900       ret = merge_state_array (dfa, sifted_states, lim_states,
 901                                match_last + 1);
 902       re_free (lim_states);
 903       lim_states = NULL;
 904       if (BE (ret != REG_NOERROR, 0))
 905         goto free_return;
 906     }
 907   else
 908     {
 909       sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
 910                      match_last, 0);
 911       ret = sift_states_backward (mctx, &sctx);
 912       re_node_set_free (&sctx.limits);
 913       if (BE (ret != REG_NOERROR, 0))
 914         goto free_return;
 915     }
 916   re_free (mctx->state_log);
 917   mctx->state_log = sifted_states;
 918   sifted_states = NULL;
 919   mctx->last_node = halt_node;
 920   mctx->match_last = match_last;
 921   ret = REG_NOERROR;
 922  free_return:
 923   re_free (sifted_states);
 924   re_free (lim_states);
 925   return ret;
 926 }
 927
 928 /* Acquire an initial state and return it.
 929    We must select appropriate initial state depending on the context,
 930    since initial states may have constraints like "\<", "^", etc..  */
 931
 932 static inline re_dfastate_t *
 933 acquire_init_state_context (err, mctx, idx)
 934      reg_errcode_t *err;
 935      const re_match_context_t *mctx;
 936      int idx;
 937 {
 938   re_dfa_t *const dfa = mctx->dfa;
 939   *err = REG_NOERROR;
 940   if (dfa->init_state->has_constraint)
 941     {
 942       unsigned int context;
 943       context = re_string_context_at (&mctx->input, idx - 1, mctx->eflags);
 944       if (IS_WORD_CONTEXT (context))
 945         return dfa->init_state_word;
 946       else if (IS_ORDINARY_CONTEXT (context))
 947         return dfa->init_state;
 948       else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
 949         return dfa->init_state_begbuf;
 950       else if (IS_NEWLINE_CONTEXT (context))
 951         return dfa->init_state_nl;
 952       else if (IS_BEGBUF_CONTEXT (context))
 953         {
 954           /* It is relatively rare case, then calculate on demand.  */
 955           return  re_acquire_state_context (err, dfa,
 956                                             dfa->init_state->entrance_nodes,
 957                                             context);
 958         }
 959       else
 960         /* Must not happen?  */
 961         return dfa->init_state;
 962     }
 963   else
 964     return dfa->init_state;
 965 }
 966
 967 /* Check whether the regular expression match input string INPUT or not,
 968    and return the index where the matching end, return -1 if not match,
 969    or return -2 in case of an error.
 970    FL_LONGEST_MATCH means we want the POSIX longest matching.
 971    If P_MATCH_FIRST is not NULL, and the match fails, it is set to the
 972    next place where we may want to try matching.
 973    Note that the matcher assume that the maching starts from the current
 974    index of the buffer.  */
 975
 976 static int
 977 check_matching (mctx, fl_longest_match, p_match_first)
 978     re_match_context_t *mctx;
 979     int fl_longest_match;
 980     int *p_match_first;
 981 {
 982   re_dfa_t *const dfa = mctx->dfa;
 983   reg_errcode_t err;
 984   int match = 0;
 985   int match_last = -1;
 986   int cur_str_idx = re_string_cur_idx (&mctx->input);
 987   re_dfastate_t *cur_state;
 988   int at_init_state = p_match_first != NULL, skipped = 0;
 989
 990   cur_state = acquire_init_state_context (&err, mctx, cur_str_idx);
 991   /* An initial state must not be NULL(invalid state).  */
 992   if (BE (cur_state == NULL, 0))
 993     return -2;
 994   if (mctx->state_log != NULL)
 995     mctx->state_log[cur_str_idx] = cur_state;
 996
 997   /* Check OP_OPEN_SUBEXP in the initial state in case that we use them
 998      later.  E.g. Processing back references.  */
 999   if (BE (dfa->nbackref, 0))
1000     {
1001       at_init_state = 0;
1002       err = check_subexp_matching_top (mctx, &cur_state->nodes, 0);
1003       if (BE (err != REG_NOERROR, 0))
1004         return err;
1005
1006       if (cur_state->has_backref)
1007         {
1008           err = transit_state_bkref (mctx, &cur_state->nodes);
1009           if (BE (err != REG_NOERROR, 0))
1010             return err;
1011         }
1012     }
1013
1014   /* If the RE accepts NULL string.  */
1015   if (BE (cur_state->halt, 0))
1016     {
1017       if (!cur_state->has_constraint
1018           || check_halt_state_context (mctx, cur_state, cur_str_idx))
1019         {
1020           if (!fl_longest_match)
1021             return cur_str_idx;
1022           else
1023             {
1024               match_last = cur_str_idx;
1025               match = 1;
1026             }
1027         }
1028     }
1029
1030   while (!re_string_eoi (&mctx->input))
1031     {
1032       re_dfastate_t *old_state = cur_state;
1033       cur_state = transit_state (&err, mctx, cur_state);
1034       if (at_init_state)
1035         {
1036           if (old_state == cur_state)
1037             skipped++;
1038           else
1039             at_init_state = 0;
1040         }
1041
1042       if (cur_state == NULL) /* Reached at the invalid state or an error.  */
1043         {
1044           cur_str_idx = re_string_cur_idx (&mctx->input);
1045           if (BE (err != REG_NOERROR, 0))
1046             return -2;
1047           if (!fl_longest_match && match)
1048             break;
1049           else
1050             {
1051               if (mctx->state_log == NULL)
1052                 break;
1053               else
1054                 {
1055                   int max = mctx->state_log_top;
1056                   for (; cur_str_idx <= max; ++cur_str_idx)
1057                     if (mctx->state_log[cur_str_idx] != NULL)
1058                       break;
1059                   if (cur_str_idx > max)
1060                     break;
1061                 }
1062             }
1063         }
1064
1065       if (cur_state != NULL && cur_state->halt)
1066         {
1067           /* Reached at a halt state.
1068              Check the halt state can satisfy the current context.  */
1069           if (!cur_state->has_constraint
1070               || check_halt_state_context (mctx, cur_state,
1071                                            re_string_cur_idx (&mctx->input)))
1072             {
1073               /* We found an appropriate halt state.  */
1074               match_last = re_string_cur_idx (&mctx->input);
1075               match = 1;
1076               if (!fl_longest_match)
1077                 break;
1078             }
1079         }
1080    }
1081
1082   if (match_last == -1 && skipped)
1083     *p_match_first += skipped;
1084
1085   return match_last;
1086 }
1087
1088 /* Check NODE match the current context.  */
1089
1090 static int check_halt_node_context (dfa, node, context)
1091     const re_dfa_t *dfa;
1092     int node;
1093     unsigned int context;
1094 {
1095   re_token_type_t type = dfa->nodes[node].type;
1096   unsigned int constraint = dfa->nodes[node].constraint;
1097   if (type != END_OF_RE)
1098     return 0;
1099   if (!constraint)
1100     return 1;
1101   if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
1102     return 0;
1103   return 1;
1104 }
1105
1106 /* Check the halt state STATE match the current context.
1107    Return 0 if not match, if the node, STATE has, is a halt node and
1108    match the context, return the node.  */
1109
1110 static int
1111 check_halt_state_context (mctx, state, idx)
1112     const re_match_context_t *mctx;
1113     const re_dfastate_t *state;
1114     int idx;
1115 {
1116   int i;
1117   unsigned int context;
1118 #ifdef DEBUG
1119   assert (state->halt);
1120 #endif
1121   context = re_string_context_at (&mctx->input, idx, mctx->eflags);
1122   for (i = 0; i < state->nodes.nelem; ++i)
1123     if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context))
1124       return state->nodes.elems[i];
1125   return 0;
1126 }
1127
1128 /* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
1129    corresponding to the DFA).
1130    Return the destination node, and update EPS_VIA_NODES, return -1 in case
1131    of errors.  */
1132
1133 static int
1134 proceed_next_node (mctx, nregs, regs, pidx, node, eps_via_nodes, fs)
1135     const re_match_context_t *mctx;
1136     regmatch_t *regs;
1137     int nregs, *pidx, node;
1138     re_node_set *eps_via_nodes;
1139     struct re_fail_stack_t *fs;
1140 {
1141   re_dfa_t *const dfa = mctx->dfa;
1142   int i, err, dest_node;
1143   dest_node = -1;
1144   if (IS_EPSILON_NODE (dfa->nodes[node].type))
1145     {
1146       re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;
1147       int ndest, dest_nodes[2];
1148       err = re_node_set_insert (eps_via_nodes, node);
1149       if (BE (err < 0, 0))
1150         return -2;
1151       /* Pick up valid destinations.  */
1152       for (ndest = 0, i = 0; i < dfa->edests[node].nelem; ++i)
1153         {
1154           int candidate = dfa->edests[node].elems[i];
1155           if (!re_node_set_contains (cur_nodes, candidate))
1156             continue;
1157           dest_nodes[0] = (ndest == 0) ? candidate : dest_nodes[0];
1158           dest_nodes[1] = (ndest == 1) ? candidate : dest_nodes[1];
1159           ++ndest;
1160         }
1161       if (ndest <= 1)
1162         return ndest == 0 ? -1 : (ndest == 1 ? dest_nodes[0] : 0);
1163       /* In order to avoid infinite loop like "(a*)*".  */
1164       if (re_node_set_contains (eps_via_nodes, dest_nodes[0]))
1165         return dest_nodes[1];
1166       if (fs != NULL
1167           && push_fail_stack (fs, *pidx, dest_nodes, nregs, regs,
1168                               eps_via_nodes))
1169         return -2;
1170       return dest_nodes[0];
1171     }
1172   else
1173     {
1174       int naccepted = 0;
1175       re_token_type_t type = dfa->nodes[node].type;
1176
1177 #ifdef RE_ENABLE_I18N
1178       if (ACCEPT_MB_NODE (type))
1179         naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx);
1180       else
1181 #endif /* RE_ENABLE_I18N */
1182       if (type == OP_BACK_REF)
1183         {
1184           int subexp_idx = dfa->nodes[node].opr.idx;
1185           naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
1186           if (fs != NULL)
1187             {
1188               if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1)
1189                 return -1;
1190               else if (naccepted)
1191                 {
1192                   char *buf = (char *) re_string_get_buffer (&mctx->input);
1193                   if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
1194                               naccepted) != 0)
1195                     return -1;
1196                 }
1197             }
1198
1199           if (naccepted == 0)
1200             {
1201               err = re_node_set_insert (eps_via_nodes, node);
1202               if (BE (err < 0, 0))
1203                 return -2;
1204               dest_node = dfa->edests[node].elems[0];
1205               if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1206                                         dest_node))
1207                 return dest_node;
1208             }
1209         }
1210
1211       if (naccepted != 0
1212           || check_node_accept (mctx, dfa->nodes + node, *pidx))
1213         {
1214           dest_node = dfa->nexts[node];
1215           *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted;
1216           if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL
1217                      || !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1218                                                dest_node)))
1219             return -1;
1220           re_node_set_empty (eps_via_nodes);
1221           return dest_node;
1222         }
1223     }
1224   return -1;
1225 }
1226
1227 static reg_errcode_t
1228 push_fail_stack (fs, str_idx, dests, nregs, regs, eps_via_nodes)
1229      struct re_fail_stack_t *fs;
1230      int str_idx, *dests, nregs;
1231      regmatch_t *regs;
1232      re_node_set *eps_via_nodes;
1233 {
1234   reg_errcode_t err;
1235   int num = fs->num++;
1236   if (fs->num == fs->alloc)
1237     {
1238       struct re_fail_stack_ent_t *new_array;
1239       new_array = realloc (fs->stack, (sizeof (struct re_fail_stack_ent_t)
1240                                        * fs->alloc * 2));
1241       if (new_array == NULL)
1242         return REG_ESPACE;
1243       fs->alloc *= 2;
1244       fs->stack = new_array;
1245     }
1246   fs->stack[num].idx = str_idx;
1247   fs->stack[num].node = dests[1];
1248   fs->stack[num].regs = re_malloc (regmatch_t, nregs);
1249   if (fs->stack[num].regs == NULL)
1250     return REG_ESPACE;
1251   memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
1252   err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
1253   return err;
1254 }
1255
1256 static int
1257 pop_fail_stack (fs, pidx, nregs, regs, eps_via_nodes)
1258      struct re_fail_stack_t *fs;
1259      int *pidx, nregs;
1260      regmatch_t *regs;
1261      re_node_set *eps_via_nodes;
1262 {
1263   int num = --fs->num;
1264   assert (num >= 0);
1265   *pidx = fs->stack[num].idx;
1266   memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
1267   re_node_set_free (eps_via_nodes);
1268   re_free (fs->stack[num].regs);
1269   *eps_via_nodes = fs->stack[num].eps_via_nodes;
1270   return fs->stack[num].node;
1271 }
1272
1273 /* Set the positions where the subexpressions are starts/ends to registers
1274    PMATCH.
1275    Note: We assume that pmatch[0] is already set, and
1276    pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch.  */
1277
1278 static reg_errcode_t
1279 set_regs (preg, mctx, nmatch, pmatch, fl_backtrack)
1280      const regex_t *preg;
1281      const re_match_context_t *mctx;
1282      size_t nmatch;
1283      regmatch_t *pmatch;
1284      int fl_backtrack;
1285 {
1286   re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
1287   int idx, cur_node, real_nmatch;
1288   re_node_set eps_via_nodes;
1289   struct re_fail_stack_t *fs;
1290   struct re_fail_stack_t fs_body = { 0, 2, NULL };
1291   regmatch_t *prev_idx_match;
1292
1293 #ifdef DEBUG
1294   assert (nmatch > 1);
1295   assert (mctx->state_log != NULL);
1296 #endif
1297   if (fl_backtrack)
1298     {
1299       fs = &fs_body;
1300       fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc);
1301       if (fs->stack == NULL)
1302         return REG_ESPACE;
1303     }
1304   else
1305     fs = NULL;
1306
1307   cur_node = dfa->init_node;
1308   real_nmatch = (nmatch <= preg->re_nsub) ? nmatch : preg->re_nsub + 1;
1309   re_node_set_init_empty (&eps_via_nodes);
1310
1311   prev_idx_match = (regmatch_t *) alloca (sizeof (regmatch_t) * real_nmatch);
1312   memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * real_nmatch);
1313
1314   for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
1315     {
1316       update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, real_nmatch);
1317
1318       if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
1319         {
1320           int reg_idx;
1321           if (fs)
1322             {
1323               for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
1324                 if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)
1325                   break;
1326               if (reg_idx == nmatch)
1327                 {
1328                   re_node_set_free (&eps_via_nodes);
1329                   return free_fail_stack_return (fs);
1330                 }
1331               cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1332                                          &eps_via_nodes);
1333             }
1334           else
1335             {
1336               re_node_set_free (&eps_via_nodes);
1337               return REG_NOERROR;
1338             }
1339         }
1340
1341       /* Proceed to next node.  */
1342       cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node,
1343                                     &eps_via_nodes, fs);
1344
1345       if (BE (cur_node < 0, 0))
1346         {
1347           if (BE (cur_node == -2, 0))
1348             {
1349               re_node_set_free (&eps_via_nodes);
1350               free_fail_stack_return (fs);
1351               return REG_ESPACE;
1352             }
1353           if (fs)
1354             cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1355                                        &eps_via_nodes);
1356           else
1357             {
1358               re_node_set_free (&eps_via_nodes);
1359               return REG_NOMATCH;
1360             }
1361         }
1362     }
1363   re_node_set_free (&eps_via_nodes);
1364   return free_fail_stack_return (fs);
1365 }
1366
1367 static reg_errcode_t
1368 free_fail_stack_return (fs)
1369      struct re_fail_stack_t *fs;
1370 {
1371   if (fs)
1372     {
1373       int fs_idx;
1374       for (fs_idx = 0; fs_idx < fs->num; ++fs_idx)
1375         {
1376           re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
1377           re_free (fs->stack[fs_idx].regs);
1378         }
1379       re_free (fs->stack);
1380     }
1381   return REG_NOERROR;
1382 }
1383
1384 static void
1385 update_regs (dfa, pmatch, prev_idx_match, cur_node, cur_idx, nmatch)
1386      re_dfa_t *dfa;
1387      regmatch_t *pmatch, *prev_idx_match;
1388      int cur_node, cur_idx, nmatch;
1389 {
1390   int type = dfa->nodes[cur_node].type;
1391   if (type == OP_OPEN_SUBEXP)
1392     {
1393       int reg_num = dfa->nodes[cur_node].opr.idx + 1;
1394
1395       /* We are at the first node of this sub expression.  */
1396       if (reg_num < nmatch)
1397         {
1398           pmatch[reg_num].rm_so = cur_idx;
1399           pmatch[reg_num].rm_eo = -1;
1400         }
1401     }
1402   else if (type == OP_CLOSE_SUBEXP)
1403     {
1404       int reg_num = dfa->nodes[cur_node].opr.idx + 1;
1405       if (reg_num < nmatch)
1406         {
1407           /* We are at the last node of this sub expression.  */
1408           if (pmatch[reg_num].rm_so < cur_idx)
1409             {
1410               pmatch[reg_num].rm_eo = cur_idx;
1411               /* This is a non-empty match or we are not inside an optional
1412                  subexpression.  Accept this right away.  */
1413               memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1414             }
1415           else
1416             {
1417               if (dfa->nodes[cur_node].opt_subexp
1418                   && prev_idx_match[reg_num].rm_so != -1)
1419                 /* We transited through an empty match for an optional
1420                    subexpression, like (a?)*, and this is not the subexp's
1421                    first match.  Copy back the old content of the registers
1422                    so that matches of an inner subexpression are undone as
1423                    well, like in ((a?))*.  */
1424                 memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch);
1425               else
1426                 /* We completed a subexpression, but it may be part of
1427                    an optional one, so do not update PREV_IDX_MATCH.  */
1428                 pmatch[reg_num].rm_eo = cur_idx;
1429             }
1430         }
1431     }
1432 }
1433
1434 /* This function checks the STATE_LOG from the SCTX->last_str_idx to 0
1435    and sift the nodes in each states according to the following rules.
1436    Updated state_log will be wrote to STATE_LOG.
1437
1438    Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if...
1439      1. When STR_IDX == MATCH_LAST(the last index in the state_log):
1440         If `a' isn't the LAST_NODE and `a' can't epsilon transit to
1441         the LAST_NODE, we throw away the node `a'.
1442      2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts
1443         string `s' and transit to `b':
1444         i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
1445            away the node `a'.
1446         ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
1447             thrown away, we throw away the node `a'.
1448      3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b':
1449         i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
1450            node `a'.
1451         ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away,
1452             we throw away the node `a'.  */
1453
1454 #define STATE_NODE_CONTAINS(state,node) \
1455   ((state) != NULL && re_node_set_contains (&(state)->nodes, node))
1456
1457 static reg_errcode_t
1458 sift_states_backward (mctx, sctx)
1459      re_match_context_t *mctx;
1460      re_sift_context_t *sctx;
1461 {
1462   re_dfa_t *const dfa = mctx->dfa;
1463   reg_errcode_t err;
1464   int null_cnt = 0;
1465   int str_idx = sctx->last_str_idx;
1466   re_node_set cur_dest;
1467   re_node_set *cur_src; /* Points the state_log[str_idx]->nodes  */
1468
1469 #ifdef DEBUG
1470   assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
1471 #endif
1472   cur_src = &mctx->state_log[str_idx]->nodes;
1473
1474   /* Build sifted state_log[str_idx].  It has the nodes which can epsilon
1475      transit to the last_node and the last_node itself.  */
1476   err = re_node_set_init_1 (&cur_dest, sctx->last_node);
1477   if (BE (err != REG_NOERROR, 0))
1478     return err;
1479   err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1480   if (BE (err != REG_NOERROR, 0))
1481     goto free_return;
1482
1483   /* Then check each states in the state_log.  */
1484   while (str_idx > 0)
1485     {
1486       int i, ret;
1487       /* Update counters.  */
1488       null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0;
1489       if (null_cnt > mctx->max_mb_elem_len)
1490         {
1491           memset (sctx->sifted_states, '\0',
1492                   sizeof (re_dfastate_t *) * str_idx);
1493           re_node_set_free (&cur_dest);
1494           return REG_NOERROR;
1495         }
1496       re_node_set_empty (&cur_dest);
1497       --str_idx;
1498       cur_src = ((mctx->state_log[str_idx] == NULL) ? &empty_set
1499                  : &mctx->state_log[str_idx]->nodes);
1500
1501       /* Then build the next sifted state.
1502          We build the next sifted state on `cur_dest', and update
1503          `sifted_states[str_idx]' with `cur_dest'.
1504          Note:
1505          `cur_dest' is the sifted state from `state_log[str_idx + 1]'.
1506          `cur_src' points the node_set of the old `state_log[str_idx]'.  */
1507       for (i = 0; i < cur_src->nelem; i++)
1508         {
1509           int prev_node = cur_src->elems[i];
1510           int naccepted = 0;
1511           re_token_type_t type = dfa->nodes[prev_node].type;
1512
1513           if (IS_EPSILON_NODE (type))
1514             continue;
1515 #ifdef RE_ENABLE_I18N
1516           /* If the node may accept `multi byte'.  */
1517           if (ACCEPT_MB_NODE (type))
1518             naccepted = sift_states_iter_mb (mctx, sctx, prev_node,
1519                                              str_idx, sctx->last_str_idx);
1520
1521 #endif /* RE_ENABLE_I18N */
1522           /* We don't check backreferences here.
1523              See update_cur_sifted_state().  */
1524
1525           if (!naccepted
1526               && check_node_accept (mctx, dfa->nodes + prev_node, str_idx)
1527               && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1],
1528                                       dfa->nexts[prev_node]))
1529             naccepted = 1;
1530
1531           if (naccepted == 0)
1532             continue;
1533
1534           if (sctx->limits.nelem)
1535             {
1536               int to_idx = str_idx + naccepted;
1537               if (check_dst_limits (mctx, &sctx->limits,
1538                                     dfa->nexts[prev_node], to_idx,
1539                                     prev_node, str_idx))
1540                 continue;
1541             }
1542           ret = re_node_set_insert (&cur_dest, prev_node);
1543           if (BE (ret == -1, 0))
1544             {
1545               err = REG_ESPACE;
1546               goto free_return;
1547             }
1548         }
1549
1550       /* Add all the nodes which satisfy the following conditions:
1551          - It can epsilon transit to a node in CUR_DEST.
1552          - It is in CUR_SRC.
1553          And update state_log.  */
1554       err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1555       if (BE (err != REG_NOERROR, 0))
1556         goto free_return;
1557     }
1558   err = REG_NOERROR;
1559  free_return:
1560   re_node_set_free (&cur_dest);
1561   return err;
1562 }
1563
1564 /* Helper functions.  */
1565
1566 static reg_errcode_t
1567 clean_state_log_if_needed (mctx, next_state_log_idx)
1568     re_match_context_t *mctx;
1569     int next_state_log_idx;
1570 {
1571   int top = mctx->state_log_top;
1572
1573   if (next_state_log_idx >= mctx->input.bufs_len
1574       || (next_state_log_idx >= mctx->input.valid_len
1575           && mctx->input.valid_len < mctx->input.len))
1576     {
1577       reg_errcode_t err;
1578       err = extend_buffers (mctx);
1579       if (BE (err != REG_NOERROR, 0))
1580         return err;
1581     }
1582
1583   if (top < next_state_log_idx)
1584     {
1585       memset (mctx->state_log + top + 1, '\0',
1586               sizeof (re_dfastate_t *) * (next_state_log_idx - top));
1587       mctx->state_log_top = next_state_log_idx;
1588     }
1589   return REG_NOERROR;
1590 }
1591
1592 static reg_errcode_t
1593 merge_state_array (dfa, dst, src, num)
1594      re_dfa_t *dfa;
1595      re_dfastate_t **dst;
1596      re_dfastate_t **src;
1597      int num;
1598 {
1599   int st_idx;
1600   reg_errcode_t err;
1601   for (st_idx = 0; st_idx < num; ++st_idx)
1602     {
1603       if (dst[st_idx] == NULL)
1604         dst[st_idx] = src[st_idx];
1605       else if (src[st_idx] != NULL)
1606         {
1607           re_node_set merged_set;
1608           err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,
1609                                         &src[st_idx]->nodes);
1610           if (BE (err != REG_NOERROR, 0))
1611             return err;
1612           dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);
1613           re_node_set_free (&merged_set);
1614           if (BE (err != REG_NOERROR, 0))
1615             return err;
1616         }
1617     }
1618   return REG_NOERROR;
1619 }
1620
1621 static reg_errcode_t
1622 update_cur_sifted_state (mctx, sctx, str_idx, dest_nodes)
1623      re_match_context_t *mctx;
1624      re_sift_context_t *sctx;
1625      int str_idx;
1626      re_node_set *dest_nodes;
1627 {
1628   re_dfa_t *const dfa = mctx->dfa;
1629   reg_errcode_t err;
1630   const re_node_set *candidates;
1631   candidates = ((mctx->state_log[str_idx] == NULL) ? &empty_set
1632                 : &mctx->state_log[str_idx]->nodes);
1633
1634   /* At first, add the nodes which can epsilon transit to a node in
1635      DEST_NODE.  */
1636   if (dest_nodes->nelem)
1637     {
1638       err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
1639       if (BE (err != REG_NOERROR, 0))
1640         return err;
1641     }
1642
1643   /* Then, check the limitations in the current sift_context.  */
1644   if (dest_nodes->nelem && sctx->limits.nelem)
1645     {
1646       err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
1647                                  mctx->bkref_ents, str_idx);
1648       if (BE (err != REG_NOERROR, 0))
1649         return err;
1650     }
1651
1652   /* Update state_log.  */
1653   sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
1654   if (BE (sctx->sifted_states[str_idx] == NULL && err != REG_NOERROR, 0))
1655     return err;
1656
1657   if ((mctx->state_log[str_idx] != NULL
1658        && mctx->state_log[str_idx]->has_backref))
1659     {
1660       err = sift_states_bkref (mctx, sctx, str_idx, dest_nodes);
1661       if (BE (err != REG_NOERROR, 0))
1662         return err;
1663     }
1664   return REG_NOERROR;
1665 }
1666
1667 static reg_errcode_t
1668 add_epsilon_src_nodes (dfa, dest_nodes, candidates)
1669      re_dfa_t *dfa;
1670      re_node_set *dest_nodes;
1671      const re_node_set *candidates;
1672 {
1673   reg_errcode_t err;
1674   int src_idx;
1675   re_node_set src_copy;
1676
1677   err = re_node_set_init_copy (&src_copy, dest_nodes);
1678   if (BE (err != REG_NOERROR, 0))
1679     return err;
1680   for (src_idx = 0; src_idx < src_copy.nelem; ++src_idx)
1681     {
1682       err = re_node_set_add_intersect (dest_nodes, candidates,
1683                                        dfa->inveclosures
1684                                        + src_copy.elems[src_idx]);
1685       if (BE (err != REG_NOERROR, 0))
1686         {
1687           re_node_set_free (&src_copy);
1688           return err;
1689         }
1690     }
1691   re_node_set_free (&src_copy);
1692   return REG_NOERROR;
1693 }
1694
1695 static reg_errcode_t
1696 sub_epsilon_src_nodes (dfa, node, dest_nodes, candidates)
1697      re_dfa_t *dfa;
1698      int node;
1699      re_node_set *dest_nodes;
1700      const re_node_set *candidates;
1701 {
1702     int ecl_idx;
1703     reg_errcode_t err;
1704     re_node_set *inv_eclosure = dfa->inveclosures + node;
1705     re_node_set except_nodes;
1706     re_node_set_init_empty (&except_nodes);
1707     for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1708       {
1709         int cur_node = inv_eclosure->elems[ecl_idx];
1710         if (cur_node == node)
1711           continue;
1712         if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
1713           {
1714             int edst1 = dfa->edests[cur_node].elems[0];
1715             int edst2 = ((dfa->edests[cur_node].nelem > 1)
1716                          ? dfa->edests[cur_node].elems[1] : -1);
1717             if ((!re_node_set_contains (inv_eclosure, edst1)
1718                  && re_node_set_contains (dest_nodes, edst1))
1719                 || (edst2 > 0
1720                     && !re_node_set_contains (inv_eclosure, edst2)
1721                     && re_node_set_contains (dest_nodes, edst2)))
1722               {
1723                 err = re_node_set_add_intersect (&except_nodes, candidates,
1724                                                  dfa->inveclosures + cur_node);
1725                 if (BE (err != REG_NOERROR, 0))
1726                   {
1727                     re_node_set_free (&except_nodes);
1728                     return err;
1729                   }
1730               }
1731           }
1732       }
1733     for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1734       {
1735         int cur_node = inv_eclosure->elems[ecl_idx];
1736         if (!re_node_set_contains (&except_nodes, cur_node))
1737           {
1738             int idx = re_node_set_contains (dest_nodes, cur_node) - 1;
1739             re_node_set_remove_at (dest_nodes, idx);
1740           }
1741       }
1742     re_node_set_free (&except_nodes);
1743     return REG_NOERROR;
1744 }
1745
1746 static int
1747 check_dst_limits (mctx, limits, dst_node, dst_idx, src_node, src_idx)
1748      re_match_context_t *mctx;
1749      re_node_set *limits;
1750      int dst_node, dst_idx, src_node, src_idx;
1751 {
1752   re_dfa_t *const dfa = mctx->dfa;
1753   int lim_idx, src_pos, dst_pos;
1754
1755   for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
1756     {
1757       int subexp_idx;
1758       struct re_backref_cache_entry *ent;
1759       ent = mctx->bkref_ents + limits->elems[lim_idx];
1760       subexp_idx = dfa->nodes[ent->node].opr.idx - 1;
1761
1762       dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1763                                            dfa->eclosures + dst_node,
1764                                            subexp_idx, dst_node, dst_idx);
1765       src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1766                                            dfa->eclosures + src_node,
1767                                            subexp_idx, src_node, src_idx);
1768
1769       /* In case of:
1770          <src> <dst> ( <subexp> )
1771          ( <subexp> ) <src> <dst>
1772          ( <subexp1> <src> <subexp2> <dst> <subexp3> )  */
1773       if (src_pos == dst_pos)
1774         continue; /* This is unrelated limitation.  */
1775       else
1776         return 1;
1777     }
1778   return 0;
1779 }
1780
1781 static int
1782 check_dst_limits_calc_pos (mctx, limit, eclosures, subexp_idx, from_node,
1783                            str_idx)
1784      re_match_context_t *mctx;
1785      re_node_set *eclosures;
1786      int limit, subexp_idx, from_node, str_idx;
1787 {
1788   re_dfa_t *const dfa = mctx->dfa;
1789   struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
1790   int node_idx;
1791
1792   /* If we are outside the range of the subexpression, return -1 or 1.  */
1793   if (str_idx < lim->subexp_from)
1794     return -1;
1795
1796   if (lim->subexp_to < str_idx)
1797     return 1;
1798
1799   /* If we are within the subexpression, return 0.  */
1800   if (str_idx != lim->subexp_from && str_idx != lim->subexp_to)
1801     return 0;
1802
1803   /* Else, we are on the boundary: examine the nodes on the epsilon
1804      closure.  */
1805   for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)
1806     {
1807       int node = eclosures->elems[node_idx];
1808       switch (dfa->nodes[node].type)
1809         {
1810         case OP_BACK_REF:
1811           {
1812             int bi = search_cur_bkref_entry (mctx, str_idx);
1813             for (; bi < mctx->nbkref_ents; ++bi)
1814               {
1815                 struct re_backref_cache_entry *ent = mctx->bkref_ents + bi;
1816                 int dst, cpos;
1817
1818                 /* If this backreference goes beyond the point we're
1819                    examining, don't go any further.  */
1820                 if (ent->str_idx > str_idx)
1821                   break;
1822
1823                 if (ent->node != node || ent->subexp_from != ent->subexp_to)
1824                   continue;
1825
1826                 /* Recurse trying to reach the OP_OPEN_SUBEXP and
1827                    OP_CLOSE_SUBEXP cases below.  But, if the
1828                    destination node is the same node as the source
1829                    node, don't recurse because it would cause an
1830                    infinite loop: a regex that exhibits this behavior
1831                    is ()\1*\1*  */
1832                 dst = dfa->edests[node].elems[0];
1833                 if (dst == from_node)
1834                   {
1835                     if (str_idx == lim->subexp_from)
1836                       return -1;
1837                     else /* if (str_idx == lim->subexp_to) */
1838                       return 0;
1839                   }
1840
1841                 cpos = check_dst_limits_calc_pos (mctx, limit,
1842                                                   dfa->eclosures + dst,
1843                                                   subexp_idx, dst,
1844                                                   str_idx);
1845
1846                 if (cpos == -1 && str_idx == lim->subexp_from)
1847                   return -1;
1848
1849                 if (cpos == 0 /* && str_idx == lim->lim->subexp_to */)
1850                   return 0;
1851               }
1852               break;
1853             }
1854
1855         case OP_OPEN_SUBEXP:
1856           if (str_idx == lim->subexp_from && subexp_idx == dfa->nodes[node].opr.idx)
1857             return -1;
1858           break;
1859
1860         case OP_CLOSE_SUBEXP:
1861           if (str_idx == lim->subexp_to && subexp_idx == dfa->nodes[node].opr.idx)
1862             return 0;
1863           break;
1864
1865         default:
1866             break;
1867         }
1868     }
1869
1870   if (str_idx == lim->subexp_to)
1871     return 1;
1872   else
1873     return 0;
1874 }
1875
1876 /* Check the limitations of sub expressions LIMITS, and remove the nodes
1877    which are against limitations from DEST_NODES. */
1878
1879 static reg_errcode_t
1880 check_subexp_limits (dfa, dest_nodes, candidates, limits, bkref_ents, str_idx)
1881      re_dfa_t *dfa;
1882      re_node_set *dest_nodes;
1883      const re_node_set *candidates;
1884      re_node_set *limits;
1885      struct re_backref_cache_entry *bkref_ents;
1886      int str_idx;
1887 {
1888   reg_errcode_t err;
1889   int node_idx, lim_idx;
1890
1891   for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
1892     {
1893       int subexp_idx;
1894       struct re_backref_cache_entry *ent;
1895       ent = bkref_ents + limits->elems[lim_idx];
1896
1897       if (str_idx <= ent->subexp_from || ent->str_idx < str_idx)
1898         continue; /* This is unrelated limitation.  */
1899
1900       subexp_idx = dfa->nodes[ent->node].opr.idx - 1;
1901       if (ent->subexp_to == str_idx)
1902         {
1903           int ops_node = -1;
1904           int cls_node = -1;
1905           for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
1906             {
1907               int node = dest_nodes->elems[node_idx];
1908               re_token_type_t type = dfa->nodes[node].type;
1909               if (type == OP_OPEN_SUBEXP
1910                   && subexp_idx == dfa->nodes[node].opr.idx)
1911                 ops_node = node;
1912               else if (type == OP_CLOSE_SUBEXP
1913                        && subexp_idx == dfa->nodes[node].opr.idx)
1914                 cls_node = node;
1915             }
1916
1917           /* Check the limitation of the open subexpression.  */
1918           /* Note that (ent->subexp_to = str_idx != ent->subexp_from).  */
1919           if (ops_node >= 0)
1920             {
1921               err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes,
1922                                            candidates);
1923               if (BE (err != REG_NOERROR, 0))
1924                 return err;
1925             }
1926
1927           /* Check the limitation of the close subexpression.  */
1928           if (cls_node >= 0)
1929             for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
1930               {
1931                 int node = dest_nodes->elems[node_idx];
1932                 if (!re_node_set_contains (dfa->inveclosures + node,
1933                                            cls_node)
1934                     && !re_node_set_contains (dfa->eclosures + node,
1935                                               cls_node))
1936                   {
1937                     /* It is against this limitation.
1938                        Remove it form the current sifted state.  */
1939                     err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
1940                                                  candidates);
1941                     if (BE (err != REG_NOERROR, 0))
1942                       return err;
1943                     --node_idx;
1944                   }
1945               }
1946         }
1947       else /* (ent->subexp_to != str_idx)  */
1948         {
1949           for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
1950             {
1951               int node = dest_nodes->elems[node_idx];
1952               re_token_type_t type = dfa->nodes[node].type;
1953               if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP)
1954                 {
1955                   if (subexp_idx != dfa->nodes[node].opr.idx)
1956                     continue;
1957                   if ((type == OP_CLOSE_SUBEXP && ent->subexp_to != str_idx)
1958                       || (type == OP_OPEN_SUBEXP))
1959                     {
1960                       /* It is against this limitation.
1961                          Remove it form the current sifted state.  */
1962                       err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
1963                                                    candidates);
1964                       if (BE (err != REG_NOERROR, 0))
1965                         return err;
1966                     }
1967                 }
1968             }
1969         }
1970     }
1971   return REG_NOERROR;
1972 }
1973
1974 static reg_errcode_t
1975 sift_states_bkref (mctx, sctx, str_idx, dest_nodes)
1976      re_match_context_t *mctx;
1977      re_sift_context_t *sctx;
1978      int str_idx;
1979      re_node_set *dest_nodes;
1980 {
1981   re_dfa_t *const dfa = mctx->dfa;
1982   reg_errcode_t err;
1983   int node_idx, node;
1984   re_sift_context_t local_sctx;
1985   const re_node_set *candidates;
1986   candidates = ((mctx->state_log[str_idx] == NULL) ? &empty_set
1987                 : &mctx->state_log[str_idx]->nodes);
1988   local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized.  */
1989
1990   for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)
1991     {
1992       int cur_bkref_idx = re_string_cur_idx (&mctx->input);
1993       re_token_type_t type;
1994       node = candidates->elems[node_idx];
1995       type = dfa->nodes[node].type;
1996       if (node == sctx->cur_bkref && str_idx == cur_bkref_idx)
1997         continue;
1998       /* Avoid infinite loop for the REs like "()\1+".  */
1999       if (node == sctx->last_node && str_idx == sctx->last_str_idx)
2000         continue;
2001       if (type == OP_BACK_REF)
2002         {
2003           int enabled_idx = search_cur_bkref_entry (mctx, str_idx);
2004           for (; enabled_idx < mctx->nbkref_ents; ++enabled_idx)
2005             {
2006               int disabled_idx, subexp_len, to_idx, dst_node;
2007               struct re_backref_cache_entry *entry;
2008               entry = mctx->bkref_ents + enabled_idx;
2009               if (entry->str_idx > str_idx)
2010                 break;
2011               if (entry->node != node)
2012                   continue;
2013               subexp_len = entry->subexp_to - entry->subexp_from;
2014               to_idx = str_idx + subexp_len;
2015               dst_node = (subexp_len ? dfa->nexts[node]
2016                           : dfa->edests[node].elems[0]);
2017
2018               if (to_idx > sctx->last_str_idx
2019                   || sctx->sifted_states[to_idx] == NULL
2020                   || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx],
2021                                            dst_node)
2022                   || check_dst_limits (mctx, &sctx->limits, node,
2023                                        str_idx, dst_node, to_idx))
2024                 continue;
2025                 {
2026                   re_dfastate_t *cur_state;
2027                   entry->flag = 0;
2028                   for (disabled_idx = enabled_idx + 1;
2029                        disabled_idx < mctx->nbkref_ents; ++disabled_idx)
2030                     {
2031                       struct re_backref_cache_entry *entry2;
2032                       entry2 = mctx->bkref_ents + disabled_idx;
2033                       if (entry2->str_idx > str_idx)
2034                         break;
2035                       entry2->flag = (entry2->node == node) ? 1 : entry2->flag;
2036                     }
2037
2038                   if (local_sctx.sifted_states == NULL)
2039                     {
2040                       local_sctx = *sctx;
2041                       err = re_node_set_init_copy (&local_sctx.limits,
2042                                                    &sctx->limits);
2043                       if (BE (err != REG_NOERROR, 0))
2044                         goto free_return;
2045                     }
2046                   local_sctx.last_node = node;
2047                   local_sctx.last_str_idx = str_idx;
2048                   err = re_node_set_insert (&local_sctx.limits, enabled_idx);
2049                   if (BE (err < 0, 0))
2050                     {
2051                       err = REG_ESPACE;
2052                       goto free_return;
2053                     }
2054                   cur_state = local_sctx.sifted_states[str_idx];
2055                   err = sift_states_backward (mctx, &local_sctx);
2056                   if (BE (err != REG_NOERROR, 0))
2057                     goto free_return;
2058                   if (sctx->limited_states != NULL)
2059                     {
2060                       err = merge_state_array (dfa, sctx->limited_states,
2061                                                local_sctx.sifted_states,
2062                                                str_idx + 1);
2063                       if (BE (err != REG_NOERROR, 0))
2064                         goto free_return;
2065                     }
2066                   local_sctx.sifted_states[str_idx] = cur_state;
2067                   re_node_set_remove (&local_sctx.limits, enabled_idx);
2068                   /* We must not use the variable entry here, since
2069                      mctx->bkref_ents might be realloced.  */
2070                   mctx->bkref_ents[enabled_idx].flag = 1;
2071                 }
2072             }
2073           enabled_idx = search_cur_bkref_entry (mctx, str_idx);
2074           for (; enabled_idx < mctx->nbkref_ents; ++enabled_idx)
2075             {
2076               struct re_backref_cache_entry *entry;
2077               entry = mctx->bkref_ents + enabled_idx;
2078               if (entry->str_idx > str_idx)
2079                 break;
2080               if (entry->node == node)
2081                 entry->flag = 0;
2082             }
2083         }
2084     }
2085   err = REG_NOERROR;
2086  free_return:
2087   if (local_sctx.sifted_states != NULL)
2088     {
2089       re_node_set_free (&local_sctx.limits);
2090     }
2091
2092   return err;
2093 }
2094
2095
2096 #ifdef RE_ENABLE_I18N
2097 static int
2098 sift_states_iter_mb (mctx, sctx, node_idx, str_idx, max_str_idx)
2099     const re_match_context_t *mctx;
2100     re_sift_context_t *sctx;
2101     int node_idx, str_idx, max_str_idx;
2102 {
2103   re_dfa_t *const dfa = mctx->dfa;
2104   int naccepted;
2105   /* Check the node can accept `multi byte'.  */
2106   naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx);
2107   if (naccepted > 0 && str_idx + naccepted <= max_str_idx &&
2108       !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
2109                             dfa->nexts[node_idx]))
2110     /* The node can't accept the `multi byte', or the
2111        destination was already thrown away, then the node
2112        could't accept the current input `multi byte'.   */
2113     naccepted = 0;
2114   /* Otherwise, it is sure that the node could accept
2115      `naccepted' bytes input.  */
2116   return naccepted;
2117 }
2118 #endif /* RE_ENABLE_I18N */
2119
2120 \f
2121 /* Functions for state transition.  */
2122
2123 /* Return the next state to which the current state STATE will transit by
2124    accepting the current input byte, and update STATE_LOG if necessary.
2125    If STATE can accept a multibyte char/collating element/back reference
2126    update the destination of STATE_LOG.  */
2127
2128 static re_dfastate_t *
2129 transit_state (err, mctx, state)
2130      reg_errcode_t *err;
2131      re_match_context_t *mctx;
2132      re_dfastate_t *state;
2133 {
2134   re_dfa_t *const dfa = mctx->dfa;
2135   re_dfastate_t **trtable, *next_state;
2136   unsigned char ch;
2137   int cur_idx;
2138
2139   if (re_string_cur_idx (&mctx->input) + 1 >= mctx->input.bufs_len
2140       || (re_string_cur_idx (&mctx->input) + 1 >= mctx->input.valid_len
2141           && mctx->input.valid_len < mctx->input.len))
2142     {
2143       *err = extend_buffers (mctx);
2144       if (BE (*err != REG_NOERROR, 0))
2145         return NULL;
2146     }
2147
2148   *err = REG_NOERROR;
2149   if (state == NULL)
2150     {
2151       next_state = state;
2152       re_string_skip_bytes (&mctx->input, 1);
2153     }
2154   else
2155     {
2156 #ifdef RE_ENABLE_I18N
2157       /* If the current state can accept multibyte.  */
2158       if (state->accept_mb)
2159         {
2160           *err = transit_state_mb (mctx, state);
2161           if (BE (*err != REG_NOERROR, 0))
2162             return NULL;
2163         }
2164 #endif /* RE_ENABLE_I18N */
2165
2166       /* Then decide the next state with the single byte.  */
2167       if (1)
2168         {
2169           /* Use transition table  */
2170           ch = re_string_fetch_byte (&mctx->input);
2171           trtable = state->trtable;
2172           if (trtable == NULL)
2173             {
2174               trtable = build_trtable (dfa, state);
2175               if (trtable == NULL)
2176                 {
2177                   *err = REG_ESPACE;
2178                   return NULL;
2179                 }
2180             }
2181           if (BE (state->word_trtable, 0))
2182             {
2183               unsigned int context;
2184               context
2185                 = re_string_context_at (&mctx->input,
2186                                         re_string_cur_idx (&mctx->input) - 1,
2187                                         mctx->eflags);
2188               if (IS_WORD_CONTEXT (context))
2189                 next_state = trtable[ch + SBC_MAX];
2190               else
2191                 next_state = trtable[ch];
2192             }
2193           else
2194             next_state = trtable[ch];
2195         }
2196 #if 0
2197       else
2198         {
2199           /* don't use transition table  */
2200           next_state = transit_state_sb (err, mctx, state);
2201           if (BE (next_state == NULL && err != REG_NOERROR, 0))
2202             return NULL;
2203         }
2204 #endif
2205     }
2206
2207   cur_idx = re_string_cur_idx (&mctx->input);
2208   /* Update the state_log if we need.  */
2209   if (mctx->state_log != NULL)
2210     {
2211       if (cur_idx > mctx->state_log_top)
2212         {
2213           mctx->state_log[cur_idx] = next_state;
2214           mctx->state_log_top = cur_idx;
2215         }
2216       else if (mctx->state_log[cur_idx] == 0)
2217         {
2218           mctx->state_log[cur_idx] = next_state;
2219         }
2220       else
2221         {
2222           re_dfastate_t *pstate;
2223           unsigned int context;
2224           re_node_set next_nodes, *log_nodes, *table_nodes = NULL;
2225           /* If (state_log[cur_idx] != 0), it implies that cur_idx is
2226              the destination of a multibyte char/collating element/
2227              back reference.  Then the next state is the union set of
2228              these destinations and the results of the transition table.  */
2229           pstate = mctx->state_log[cur_idx];
2230           log_nodes = pstate->entrance_nodes;
2231           if (next_state != NULL)
2232             {
2233               table_nodes = next_state->entrance_nodes;
2234               *err = re_node_set_init_union (&next_nodes, table_nodes,
2235                                              log_nodes);
2236               if (BE (*err != REG_NOERROR, 0))
2237                 return NULL;
2238             }
2239           else
2240             next_nodes = *log_nodes;
2241           /* Note: We already add the nodes of the initial state,
2242                    then we don't need to add them here.  */
2243
2244           context = re_string_context_at (&mctx->input,
2245                                           re_string_cur_idx (&mctx->input) - 1,
2246                                           mctx->eflags);
2247           next_state = mctx->state_log[cur_idx]
2248             = re_acquire_state_context (err, dfa, &next_nodes, context);
2249           /* We don't need to check errors here, since the return value of
2250              this function is next_state and ERR is already set.  */
2251
2252           if (table_nodes != NULL)
2253             re_node_set_free (&next_nodes);
2254         }
2255     }
2256
2257   if (BE (dfa->nbackref, 0) && next_state != NULL)
2258     {
2259       /* Check OP_OPEN_SUBEXP in the current state in case that we use them
2260          later.  We must check them here, since the back references in the
2261          next state might use them.  */
2262       *err = check_subexp_matching_top (mctx, &next_state->nodes,
2263                                         cur_idx);
2264       if (BE (*err != REG_NOERROR, 0))
2265         return NULL;
2266
2267       /* If the next state has back references.  */
2268       if (next_state->has_backref)
2269         {
2270           *err = transit_state_bkref (mctx, &next_state->nodes);
2271           if (BE (*err != REG_NOERROR, 0))
2272             return NULL;
2273           next_state = mctx->state_log[cur_idx];
2274         }
2275     }
2276   return next_state;
2277 }
2278
2279 /* Helper functions for transit_state.  */
2280
2281 /* From the node set CUR_NODES, pick up the nodes whose types are
2282    OP_OPEN_SUBEXP and which have corresponding back references in the regular
2283    expression. And register them to use them later for evaluating the
2284    correspoding back references.  */
2285
2286 static reg_errcode_t
2287 check_subexp_matching_top (mctx, cur_nodes, str_idx)
2288      re_match_context_t *mctx;
2289      re_node_set *cur_nodes;
2290      int str_idx;
2291 {
2292   re_dfa_t *const dfa = mctx->dfa;
2293   int node_idx;
2294   reg_errcode_t err;
2295
2296   /* TODO: This isn't efficient.
2297            Because there might be more than one nodes whose types are
2298            OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2299            nodes.
2300            E.g. RE: (a){2}  */
2301   for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx)
2302     {
2303       int node = cur_nodes->elems[node_idx];
2304       if (dfa->nodes[node].type == OP_OPEN_SUBEXP
2305           && dfa->nodes[node].opr.idx < (8 * sizeof (dfa->used_bkref_map))
2306           && dfa->used_bkref_map & (1 << dfa->nodes[node].opr.idx))
2307         {
2308           err = match_ctx_add_subtop (mctx, node, str_idx);
2309           if (BE (err != REG_NOERROR, 0))
2310             return err;
2311         }
2312     }
2313   return REG_NOERROR;
2314 }
2315
2316 #if 0
2317 /* Return the next state to which the current state STATE will transit by
2318    accepting the current input byte.  */
2319
2320 static re_dfastate_t *
2321 transit_state_sb (err, mctx, state)
2322      reg_errcode_t *err;
2323      re_match_context_t *mctx;
2324      re_dfastate_t *state;
2325 {
2326   re_dfa_t *const dfa = mctx->dfa;
2327   re_node_set next_nodes;
2328   re_dfastate_t *next_state;
2329   int node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input);
2330   unsigned int context;
2331
2332   *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
2333   if (BE (*err != REG_NOERROR, 0))
2334     return NULL;
2335   for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)
2336     {
2337       int cur_node = state->nodes.elems[node_cnt];
2338       if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx))
2339         {
2340           *err = re_node_set_merge (&next_nodes,
2341                                     dfa->eclosures + dfa->nexts[cur_node]);
2342           if (BE (*err != REG_NOERROR, 0))
2343             {
2344               re_node_set_free (&next_nodes);
2345               return NULL;
2346             }
2347         }
2348     }
2349   context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags);
2350   next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
2351   /* We don't need to check errors here, since the return value of
2352      this function is next_state and ERR is already set.  */
2353
2354   re_node_set_free (&next_nodes);
2355   re_string_skip_bytes (&mctx->input, 1);
2356   return next_state;
2357 }
2358 #endif
2359
2360 #ifdef RE_ENABLE_I18N
2361 static reg_errcode_t
2362 transit_state_mb (mctx, pstate)
2363     re_match_context_t *mctx;
2364     re_dfastate_t *pstate;
2365 {
2366   re_dfa_t *const dfa = mctx->dfa;
2367   reg_errcode_t err;
2368   int i;
2369
2370   for (i = 0; i < pstate->nodes.nelem; ++i)
2371     {
2372       re_node_set dest_nodes, *new_nodes;
2373       int cur_node_idx = pstate->nodes.elems[i];
2374       int naccepted = 0, dest_idx;
2375       unsigned int context;
2376       re_dfastate_t *dest_state;
2377
2378       if (dfa->nodes[cur_node_idx].constraint)
2379         {
2380           context = re_string_context_at (&mctx->input,
2381                                           re_string_cur_idx (&mctx->input),
2382                                           mctx->eflags);
2383           if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
2384                                            context))
2385             continue;
2386         }
2387
2388       /* How many bytes the node can accept?  */
2389       if (ACCEPT_MB_NODE (dfa->nodes[cur_node_idx].type))
2390         naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input,
2391                                              re_string_cur_idx (&mctx->input));
2392       if (naccepted == 0)
2393         continue;
2394
2395       /* The node can accepts `naccepted' bytes.  */
2396       dest_idx = re_string_cur_idx (&mctx->input) + naccepted;
2397       mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
2398                                : mctx->max_mb_elem_len);
2399       err = clean_state_log_if_needed (mctx, dest_idx);
2400       if (BE (err != REG_NOERROR, 0))
2401         return err;
2402 #ifdef DEBUG
2403       assert (dfa->nexts[cur_node_idx] != -1);
2404 #endif
2405       /* `cur_node_idx' may point the entity of the OP_CONTEXT_NODE,
2406          then we use pstate->nodes.elems[i] instead.  */
2407       new_nodes = dfa->eclosures + dfa->nexts[pstate->nodes.elems[i]];
2408
2409       dest_state = mctx->state_log[dest_idx];
2410       if (dest_state == NULL)
2411         dest_nodes = *new_nodes;
2412       else
2413         {
2414           err = re_node_set_init_union (&dest_nodes,
2415                                         dest_state->entrance_nodes, new_nodes);
2416           if (BE (err != REG_NOERROR, 0))
2417             return err;
2418         }
2419       context = re_string_context_at (&mctx->input, dest_idx - 1, mctx->eflags);
2420       mctx->state_log[dest_idx]
2421         = re_acquire_state_context (&err, dfa, &dest_nodes, context);
2422       if (dest_state != NULL)
2423         re_node_set_free (&dest_nodes);
2424       if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0))
2425         return err;
2426     }
2427   return REG_NOERROR;
2428 }
2429 #endif /* RE_ENABLE_I18N */
2430
2431 static reg_errcode_t
2432 transit_state_bkref (mctx, nodes)
2433     re_match_context_t *mctx;
2434     const re_node_set *nodes;
2435 {
2436   re_dfa_t *const dfa = mctx->dfa;
2437   reg_errcode_t err;
2438   int i;
2439   int cur_str_idx = re_string_cur_idx (&mctx->input);
2440
2441   for (i = 0; i < nodes->nelem; ++i)
2442     {
2443       int dest_str_idx, prev_nelem, bkc_idx;
2444       int node_idx = nodes->elems[i];
2445       unsigned int context;
2446       const re_token_t *node = dfa->nodes + node_idx;
2447       re_node_set *new_dest_nodes;
2448
2449       /* Check whether `node' is a backreference or not.  */
2450       if (node->type != OP_BACK_REF)
2451         continue;
2452
2453       if (node->constraint)
2454         {
2455           context = re_string_context_at (&mctx->input, cur_str_idx,
2456                                           mctx->eflags);
2457           if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
2458             continue;
2459         }
2460
2461       /* `node' is a backreference.
2462          Check the substring which the substring matched.  */
2463       bkc_idx = mctx->nbkref_ents;
2464       err = get_subexp (mctx, node_idx, cur_str_idx);
2465       if (BE (err != REG_NOERROR, 0))
2466         goto free_return;
2467
2468       /* And add the epsilon closures (which is `new_dest_nodes') of
2469          the backreference to appropriate state_log.  */
2470 #ifdef DEBUG
2471       assert (dfa->nexts[node_idx] != -1);
2472 #endif
2473       for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
2474         {
2475           int subexp_len;
2476           re_dfastate_t *dest_state;
2477           struct re_backref_cache_entry *bkref_ent;
2478           bkref_ent = mctx->bkref_ents + bkc_idx;
2479           if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx)
2480             continue;
2481           subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
2482           new_dest_nodes = (subexp_len == 0
2483                             ? dfa->eclosures + dfa->edests[node_idx].elems[0]
2484                             : dfa->eclosures + dfa->nexts[node_idx]);
2485           dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
2486                           - bkref_ent->subexp_from);
2487           context = re_string_context_at (&mctx->input, dest_str_idx - 1,
2488                                           mctx->eflags);
2489           dest_state = mctx->state_log[dest_str_idx];
2490           prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0
2491                         : mctx->state_log[cur_str_idx]->nodes.nelem);
2492           /* Add `new_dest_node' to state_log.  */
2493           if (dest_state == NULL)
2494             {
2495               mctx->state_log[dest_str_idx]
2496                 = re_acquire_state_context (&err, dfa, new_dest_nodes,
2497                                             context);
2498               if (BE (mctx->state_log[dest_str_idx] == NULL
2499                       && err != REG_NOERROR, 0))
2500                 goto free_return;
2501             }
2502           else
2503             {
2504               re_node_set dest_nodes;
2505               err = re_node_set_init_union (&dest_nodes,
2506                                             dest_state->entrance_nodes,
2507                                             new_dest_nodes);
2508               if (BE (err != REG_NOERROR, 0))
2509                 {
2510                   re_node_set_free (&dest_nodes);
2511                   goto free_return;
2512                 }
2513               mctx->state_log[dest_str_idx]
2514                 = re_acquire_state_context (&err, dfa, &dest_nodes, context);
2515               re_node_set_free (&dest_nodes);
2516               if (BE (mctx->state_log[dest_str_idx] == NULL
2517                       && err != REG_NOERROR, 0))
2518                 goto free_return;
2519             }
2520           /* We need to check recursively if the backreference can epsilon
2521              transit.  */
2522           if (subexp_len == 0
2523               && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
2524             {
2525               err = check_subexp_matching_top (mctx, new_dest_nodes,
2526                                                cur_str_idx);
2527               if (BE (err != REG_NOERROR, 0))
2528                 goto free_return;
2529               err = transit_state_bkref (mctx, new_dest_nodes);
2530               if (BE (err != REG_NOERROR, 0))
2531                 goto free_return;
2532             }
2533         }
2534     }
2535   err = REG_NOERROR;
2536  free_return:
2537   return err;
2538 }
2539
2540 /* Enumerate all the candidates which the backreference BKREF_NODE can match
2541    at BKREF_STR_IDX, and register them by match_ctx_add_entry().
2542    Note that we might collect inappropriate candidates here.
2543    However, the cost of checking them strictly here is too high, then we
2544    delay these checking for prune_impossible_nodes().  */
2545
2546 static reg_errcode_t
2547 get_subexp (mctx, bkref_node, bkref_str_idx)
2548      re_match_context_t *mctx;
2549      int bkref_node, bkref_str_idx;
2550 {
2551   re_dfa_t *const dfa = mctx->dfa;
2552   int subexp_num, sub_top_idx;
2553   const char *buf = (const char *) re_string_get_buffer (&mctx->input);
2554   /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX.  */
2555   int cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
2556   for (; cache_idx < mctx->nbkref_ents; ++cache_idx)
2557     {
2558       const struct re_backref_cache_entry *entry
2559         = &mctx->bkref_ents[cache_idx];
2560       if (entry->str_idx > bkref_str_idx)
2561         break;
2562       if (entry->node == bkref_node)
2563         return REG_NOERROR; /* We already checked it.  */
2564     }
2565   subexp_num = dfa->nodes[bkref_node].opr.idx - 1;
2566
2567   /* For each sub expression  */
2568   for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)
2569     {
2570       reg_errcode_t err;
2571       re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx];
2572       re_sub_match_last_t *sub_last;
2573       int sub_last_idx, sl_str, bkref_str_off;
2574
2575       if (dfa->nodes[sub_top->node].opr.idx != subexp_num)
2576         continue; /* It isn't related.  */
2577
2578       sl_str = sub_top->str_idx;
2579       bkref_str_off = bkref_str_idx;
2580       /* At first, check the last node of sub expressions we already
2581          evaluated.  */
2582       for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx)
2583         {
2584           int sl_str_diff;
2585           sub_last = sub_top->lasts[sub_last_idx];
2586           sl_str_diff = sub_last->str_idx - sl_str;
2587           /* The matched string by the sub expression match with the substring
2588              at the back reference?  */
2589           if (sl_str_diff > 0)
2590             {
2591               if (BE (bkref_str_off + sl_str_diff > mctx->input.valid_len, 0))
2592                 {
2593                   /* Not enough chars for a successful match.  */
2594                   if (bkref_str_off + sl_str_diff > mctx->input.len)
2595                     break;
2596
2597                   err = clean_state_log_if_needed (mctx,
2598                                                    bkref_str_off
2599                                                    + sl_str_diff);
2600                   if (BE (err != REG_NOERROR, 0))
2601                     return err;
2602                   buf = (const char *) re_string_get_buffer (&mctx->input);
2603                 }
2604               if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0)
2605                 break; /* We don't need to search this sub expression any more.  */
2606             }
2607           bkref_str_off += sl_str_diff;
2608           sl_str += sl_str_diff;
2609           err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2610                                 bkref_str_idx);
2611
2612           /* Reload buf, since the preceding call might have reallocated
2613              the buffer.  */
2614           buf = (const char *) re_string_get_buffer (&mctx->input);
2615
2616           if (err == REG_NOMATCH)
2617             continue;
2618           if (BE (err != REG_NOERROR, 0))
2619             return err;
2620         }
2621
2622       if (sub_last_idx < sub_top->nlasts)
2623         continue;
2624       if (sub_last_idx > 0)
2625         ++sl_str;
2626       /* Then, search for the other last nodes of the sub expression.  */
2627       for (; sl_str <= bkref_str_idx; ++sl_str)
2628         {
2629           int cls_node, sl_str_off;
2630           const re_node_set *nodes;
2631           sl_str_off = sl_str - sub_top->str_idx;
2632           /* The matched string by the sub expression match with the substring
2633              at the back reference?  */
2634           if (sl_str_off > 0)
2635             {
2636               if (BE (bkref_str_off >= mctx->input.valid_len, 0))
2637                 {
2638                   /* If we are at the end of the input, we cannot match.  */
2639                   if (bkref_str_off >= mctx->input.len)
2640                     break;
2641
2642                   err = extend_buffers (mctx);
2643                   if (BE (err != REG_NOERROR, 0))
2644                     return err;
2645
2646                   buf = (const char *) re_string_get_buffer (&mctx->input);
2647                 }
2648               if (buf [bkref_str_off++] != buf[sl_str - 1])
2649                 break; /* We don't need to search this sub expression
2650                           any more.  */
2651             }
2652           if (mctx->state_log[sl_str] == NULL)
2653             continue;
2654           /* Does this state have a ')' of the sub expression?  */
2655           nodes = &mctx->state_log[sl_str]->nodes;
2656           cls_node = find_subexp_node (dfa, nodes, subexp_num, OP_CLOSE_SUBEXP);
2657           if (cls_node == -1)
2658             continue; /* No.  */
2659           if (sub_top->path == NULL)
2660             {
2661               sub_top->path = calloc (sizeof (state_array_t),
2662                                       sl_str - sub_top->str_idx + 1);
2663               if (sub_top->path == NULL)
2664                 return REG_ESPACE;
2665             }
2666           /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node
2667              in the current context?  */
2668           err = check_arrival (mctx, sub_top->path, sub_top->node,
2669                                sub_top->str_idx, cls_node, sl_str, OP_CLOSE_SUBEXP);
2670           if (err == REG_NOMATCH)
2671               continue;
2672           if (BE (err != REG_NOERROR, 0))
2673               return err;
2674           sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str);
2675           if (BE (sub_last == NULL, 0))
2676             return REG_ESPACE;
2677           err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2678                                 bkref_str_idx);
2679           if (err == REG_NOMATCH)
2680             continue;
2681         }
2682     }
2683   return REG_NOERROR;
2684 }
2685
2686 /* Helper functions for get_subexp().  */
2687
2688 /* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR.
2689    If it can arrive, register the sub expression expressed with SUB_TOP
2690    and SUB_LAST.  */
2691
2692 static reg_errcode_t
2693 get_subexp_sub (mctx, sub_top, sub_last, bkref_node, bkref_str)
2694      re_match_context_t *mctx;
2695      const re_sub_match_top_t *sub_top;
2696      re_sub_match_last_t *sub_last;
2697      int bkref_node, bkref_str;
2698 {
2699   reg_errcode_t err;
2700   int to_idx;
2701   /* Can the subexpression arrive the back reference?  */
2702   err = check_arrival (mctx, &sub_last->path, sub_last->node,
2703                        sub_last->str_idx, bkref_node, bkref_str, OP_OPEN_SUBEXP);
2704   if (err != REG_NOERROR)
2705     return err;
2706   err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,
2707                              sub_last->str_idx);
2708   if (BE (err != REG_NOERROR, 0))
2709     return err;
2710   to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;
2711   return clean_state_log_if_needed (mctx, to_idx);
2712 }
2713
2714 /* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX.
2715    Search '(' if FL_OPEN, or search ')' otherwise.
2716    TODO: This function isn't efficient...
2717          Because there might be more than one nodes whose types are
2718          OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2719          nodes.
2720          E.g. RE: (a){2}  */
2721
2722 static int
2723 find_subexp_node (dfa, nodes, subexp_idx, type)
2724      const re_dfa_t *dfa;
2725      const re_node_set *nodes;
2726      int subexp_idx, type;
2727 {
2728   int cls_idx;
2729   for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx)
2730     {
2731       int cls_node = nodes->elems[cls_idx];
2732       const re_token_t *node = dfa->nodes + cls_node;
2733       if (node->type == type
2734           && node->opr.idx == subexp_idx)
2735         return cls_node;
2736     }
2737   return -1;
2738 }
2739
2740 /* Check whether the node TOP_NODE at TOP_STR can arrive to the node
2741    LAST_NODE at LAST_STR.  We record the path onto PATH since it will be
2742    heavily reused.
2743    Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise.  */
2744
2745 static reg_errcode_t
2746 check_arrival (mctx, path, top_node, top_str, last_node, last_str,
2747                type)
2748      re_match_context_t *mctx;
2749      state_array_t *path;
2750      int top_node, top_str, last_node, last_str, type;
2751 {
2752   re_dfa_t *const dfa = mctx->dfa;
2753   reg_errcode_t err;
2754   int subexp_num, backup_cur_idx, str_idx, null_cnt;
2755   re_dfastate_t *cur_state = NULL;
2756   re_node_set *cur_nodes, next_nodes;
2757   re_dfastate_t **backup_state_log;
2758   unsigned int context;
2759
2760   subexp_num = dfa->nodes[top_node].opr.idx;
2761   /* Extend the buffer if we need.  */
2762   if (BE (path->alloc < last_str + mctx->max_mb_elem_len + 1, 0))
2763     {
2764       re_dfastate_t **new_array;
2765       int old_alloc = path->alloc;
2766       path->alloc += last_str + mctx->max_mb_elem_len + 1;
2767       new_array = re_realloc (path->array, re_dfastate_t *, path->alloc);
2768       if (new_array == NULL)
2769         {
2770           path->alloc = old_alloc;
2771           return REG_ESPACE;
2772         }
2773       path->array = new_array;
2774       memset (new_array + old_alloc, '\0',
2775               sizeof (re_dfastate_t *) * (path->alloc - old_alloc));
2776     }
2777
2778   str_idx = path->next_idx == 0 ? top_str : path->next_idx;
2779
2780   /* Temporary modify MCTX.  */
2781   backup_state_log = mctx->state_log;
2782   backup_cur_idx = mctx->input.cur_idx;
2783   mctx->state_log = path->array;
2784   mctx->input.cur_idx = str_idx;
2785
2786   /* Setup initial node set.  */
2787   context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
2788   if (str_idx == top_str)
2789     {
2790       err = re_node_set_init_1 (&next_nodes, top_node);
2791       if (BE (err != REG_NOERROR, 0))
2792         return err;
2793       err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
2794       if (BE (err != REG_NOERROR, 0))
2795         {
2796           re_node_set_free (&next_nodes);
2797           return err;
2798         }
2799     }
2800   else
2801     {
2802       cur_state = mctx->state_log[str_idx];
2803       if (cur_state && cur_state->has_backref)
2804         {
2805           err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);
2806           if (BE ( err != REG_NOERROR, 0))
2807             return err;
2808         }
2809       else
2810         re_node_set_init_empty (&next_nodes);
2811     }
2812   if (str_idx == top_str || (cur_state && cur_state->has_backref))
2813     {
2814       if (next_nodes.nelem)
2815         {
2816           err = expand_bkref_cache (mctx, &next_nodes, str_idx, last_str,
2817                                     subexp_num, type);
2818           if (BE ( err != REG_NOERROR, 0))
2819             {
2820               re_node_set_free (&next_nodes);
2821               return err;
2822             }
2823         }
2824       cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
2825       if (BE (cur_state == NULL && err != REG_NOERROR, 0))
2826         {
2827           re_node_set_free (&next_nodes);
2828           return err;
2829         }
2830       mctx->state_log[str_idx] = cur_state;
2831     }
2832
2833   for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;)
2834     {
2835       re_node_set_empty (&next_nodes);
2836       if (mctx->state_log[str_idx + 1])
2837         {
2838           err = re_node_set_merge (&next_nodes,
2839                                    &mctx->state_log[str_idx + 1]->nodes);
2840           if (BE (err != REG_NOERROR, 0))
2841             {
2842               re_node_set_free (&next_nodes);
2843               return err;
2844             }
2845         }
2846       if (cur_state)
2847         {
2848           err = check_arrival_add_next_nodes (mctx, str_idx,
2849                                               &cur_state->nodes, &next_nodes);
2850           if (BE (err != REG_NOERROR, 0))
2851             {
2852               re_node_set_free (&next_nodes);
2853               return err;
2854             }
2855         }
2856       ++str_idx;
2857       if (next_nodes.nelem)
2858         {
2859           err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
2860           if (BE (err != REG_NOERROR, 0))
2861             {
2862               re_node_set_free (&next_nodes);
2863               return err;
2864             }
2865           err = expand_bkref_cache (mctx, &next_nodes, str_idx, last_str,
2866                                     subexp_num, type);
2867           if (BE ( err != REG_NOERROR, 0))
2868             {
2869               re_node_set_free (&next_nodes);
2870               return err;
2871             }
2872         }
2873       context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
2874       cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
2875       if (BE (cur_state == NULL && err != REG_NOERROR, 0))
2876         {
2877           re_node_set_free (&next_nodes);
2878           return err;
2879         }
2880       mctx->state_log[str_idx] = cur_state;
2881       null_cnt = cur_state == NULL ? null_cnt + 1 : 0;
2882     }
2883   re_node_set_free (&next_nodes);
2884   cur_nodes = (mctx->state_log[last_str] == NULL ? NULL
2885                : &mctx->state_log[last_str]->nodes);
2886   path->next_idx = str_idx;
2887
2888   /* Fix MCTX.  */
2889   mctx->state_log = backup_state_log;
2890   mctx->input.cur_idx = backup_cur_idx;
2891
2892   /* Then check the current node set has the node LAST_NODE.  */
2893   if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node))
2894     return REG_NOERROR;
2895
2896   return REG_NOMATCH;
2897 }
2898
2899 /* Helper functions for check_arrival.  */
2900
2901 /* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them
2902    to NEXT_NODES.
2903    TODO: This function is similar to the functions transit_state*(),
2904          however this function has many additional works.
2905          Can't we unify them?  */
2906
2907 static reg_errcode_t
2908 check_arrival_add_next_nodes (mctx, str_idx, cur_nodes, next_nodes)
2909      re_match_context_t *mctx;
2910      int str_idx;
2911      re_node_set *cur_nodes, *next_nodes;
2912 {
2913   re_dfa_t *const dfa = mctx->dfa;
2914   int cur_idx;
2915   reg_errcode_t err;
2916   re_node_set union_set;
2917   re_node_set_init_empty (&union_set);
2918   for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)
2919     {
2920       int naccepted = 0;
2921       int cur_node = cur_nodes->elems[cur_idx];
2922       re_token_type_t type = dfa->nodes[cur_node].type;
2923       if (IS_EPSILON_NODE (type))
2924         continue;
2925 #ifdef RE_ENABLE_I18N
2926       /* If the node may accept `multi byte'.  */
2927       if (ACCEPT_MB_NODE (type))
2928         {
2929           naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input,
2930                                                str_idx);
2931           if (naccepted > 1)
2932             {
2933               re_dfastate_t *dest_state;
2934               int next_node = dfa->nexts[cur_node];
2935               int next_idx = str_idx + naccepted;
2936               dest_state = mctx->state_log[next_idx];
2937               re_node_set_empty (&union_set);
2938               if (dest_state)
2939                 {
2940                   err = re_node_set_merge (&union_set, &dest_state->nodes);
2941                   if (BE (err != REG_NOERROR, 0))
2942                     {
2943                       re_node_set_free (&union_set);
2944                       return err;
2945                     }
2946                 }
2947               err = re_node_set_insert (&union_set, next_node);
2948               if (BE (err < 0, 0))
2949                 {
2950                   re_node_set_free (&union_set);
2951                   return REG_ESPACE;
2952                 }
2953               mctx->state_log[next_idx] = re_acquire_state (&err, dfa,
2954                                                             &union_set);
2955               if (BE (mctx->state_log[next_idx] == NULL
2956                       && err != REG_NOERROR, 0))
2957                 {
2958                   re_node_set_free (&union_set);
2959                   return err;
2960                 }
2961             }
2962         }
2963 #endif /* RE_ENABLE_I18N */
2964       if (naccepted
2965           || check_node_accept (mctx, dfa->nodes + cur_node, str_idx))
2966         {
2967           err = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);
2968           if (BE (err < 0, 0))
2969             {
2970               re_node_set_free (&union_set);
2971               return REG_ESPACE;
2972             }
2973         }
2974     }
2975   re_node_set_free (&union_set);
2976   return REG_NOERROR;
2977 }
2978
2979 /* For all the nodes in CUR_NODES, add the epsilon closures of them to
2980    CUR_NODES, however exclude the nodes which are:
2981     - inside the sub expression whose number is EX_SUBEXP, if FL_OPEN.
2982     - out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN.
2983 */
2984
2985 static reg_errcode_t
2986 check_arrival_expand_ecl (dfa, cur_nodes, ex_subexp, type)
2987      re_dfa_t *dfa;
2988      re_node_set *cur_nodes;
2989      int ex_subexp, type;
2990 {
2991   reg_errcode_t err;
2992   int idx, outside_node;
2993   re_node_set new_nodes;
2994 #ifdef DEBUG
2995   assert (cur_nodes->nelem);
2996 #endif
2997   err = re_node_set_alloc (&new_nodes, cur_nodes->nelem);
2998   if (BE (err != REG_NOERROR, 0))
2999     return err;
3000   /* Create a new node set NEW_NODES with the nodes which are epsilon
3001      closures of the node in CUR_NODES.  */
3002
3003   for (idx = 0; idx < cur_nodes->nelem; ++idx)
3004     {
3005       int cur_node = cur_nodes->elems[idx];
3006       re_node_set *eclosure = dfa->eclosures + cur_node;
3007       outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type);
3008       if (outside_node == -1)
3009         {
3010           /* There are no problematic nodes, just merge them.  */
3011           err = re_node_set_merge (&new_nodes, eclosure);
3012           if (BE (err != REG_NOERROR, 0))
3013             {
3014               re_node_set_free (&new_nodes);
3015               return err;
3016             }
3017         }
3018       else
3019         {
3020           /* There are problematic nodes, re-calculate incrementally.  */
3021           err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node,
3022                                               ex_subexp, type);
3023           if (BE (err != REG_NOERROR, 0))
3024             {
3025               re_node_set_free (&new_nodes);
3026               return err;
3027             }
3028         }
3029     }
3030   re_node_set_free (cur_nodes);
3031   *cur_nodes = new_nodes;
3032   return REG_NOERROR;
3033 }
3034
3035 /* Helper function for check_arrival_expand_ecl.
3036    Check incrementally the epsilon closure of TARGET, and if it isn't
3037    problematic append it to DST_NODES.  */
3038
3039 static reg_errcode_t
3040 check_arrival_expand_ecl_sub (dfa, dst_nodes, target, ex_subexp, type)
3041      re_dfa_t *dfa;
3042      int target, ex_subexp, type;
3043      re_node_set *dst_nodes;
3044 {
3045   int cur_node;
3046   for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)
3047     {
3048       int err;
3049
3050       if (dfa->nodes[cur_node].type == type
3051           && dfa->nodes[cur_node].opr.idx == ex_subexp)
3052         {
3053           if (type == OP_CLOSE_SUBEXP)
3054             {
3055               err = re_node_set_insert (dst_nodes, cur_node);
3056               if (BE (err == -1, 0))
3057                 return REG_ESPACE;
3058             }
3059           break;
3060         }
3061       err = re_node_set_insert (dst_nodes, cur_node);
3062       if (BE (err == -1, 0))
3063         return REG_ESPACE;
3064       if (dfa->edests[cur_node].nelem == 0)
3065         break;
3066       if (dfa->edests[cur_node].nelem == 2)
3067         {
3068           err = check_arrival_expand_ecl_sub (dfa, dst_nodes,
3069                                               dfa->edests[cur_node].elems[1],
3070                                               ex_subexp, type);
3071           if (BE (err != REG_NOERROR, 0))
3072             return err;
3073         }
3074       cur_node = dfa->edests[cur_node].elems[0];
3075     }
3076   return REG_NOERROR;
3077 }
3078
3079
3080 /* For all the back references in the current state, calculate the
3081    destination of the back references by the appropriate entry
3082    in MCTX->BKREF_ENTS.  */
3083
3084 static reg_errcode_t
3085 expand_bkref_cache (mctx, cur_nodes, cur_str, last_str, subexp_num,
3086                     type)
3087      re_match_context_t *mctx;
3088      int cur_str, last_str, subexp_num, type;
3089      re_node_set *cur_nodes;
3090 {
3091   re_dfa_t *const dfa = mctx->dfa;
3092   reg_errcode_t err;
3093   int cache_idx, cache_idx_start;
3094   /* The current state.  */
3095
3096   cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
3097   for (cache_idx = cache_idx_start; cache_idx < mctx->nbkref_ents; ++cache_idx)
3098     {
3099       int to_idx, next_node;
3100       struct re_backref_cache_entry *ent = mctx->bkref_ents + cache_idx;
3101       if (ent->str_idx > cur_str)
3102         break;
3103       /* Is this entry ENT is appropriate?  */
3104       if (!re_node_set_contains (cur_nodes, ent->node))
3105         continue; /* No.  */
3106
3107       to_idx = cur_str + ent->subexp_to - ent->subexp_from;
3108       /* Calculate the destination of the back reference, and append it
3109          to MCTX->STATE_LOG.  */
3110       if (to_idx == cur_str)
3111         {
3112           /* The backreference did epsilon transit, we must re-check all the
3113              node in the current state.  */
3114           re_node_set new_dests;
3115           reg_errcode_t err2, err3;
3116           next_node = dfa->edests[ent->node].elems[0];
3117           if (re_node_set_contains (cur_nodes, next_node))
3118             continue;
3119           err = re_node_set_init_1 (&new_dests, next_node);
3120           err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type);
3121           err3 = re_node_set_merge (cur_nodes, &new_dests);
3122           re_node_set_free (&new_dests);
3123           if (BE (err != REG_NOERROR || err2 != REG_NOERROR
3124                   || err3 != REG_NOERROR, 0))
3125             {
3126               err = (err != REG_NOERROR ? err
3127                      : (err2 != REG_NOERROR ? err2 : err3));
3128               return err;
3129             }
3130           /* TODO: It is still inefficient...  */
3131           cache_idx = cache_idx_start - 1;
3132           continue;
3133         }
3134       else
3135         {
3136           re_node_set union_set;
3137           next_node = dfa->nexts[ent->node];
3138           if (mctx->state_log[to_idx])
3139             {
3140               int ret;
3141               if (re_node_set_contains (&mctx->state_log[to_idx]->nodes,
3142                                         next_node))
3143                 continue;
3144               err = re_node_set_init_copy (&union_set,
3145                                            &mctx->state_log[to_idx]->nodes);
3146               ret = re_node_set_insert (&union_set, next_node);
3147               if (BE (err != REG_NOERROR || ret < 0, 0))
3148                 {
3149                   re_node_set_free (&union_set);
3150                   err = err != REG_NOERROR ? err : REG_ESPACE;
3151                   return err;
3152                 }
3153             }
3154           else
3155             {
3156               err = re_node_set_init_1 (&union_set, next_node);
3157               if (BE (err != REG_NOERROR, 0))
3158                 return err;
3159             }
3160           mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set);
3161           re_node_set_free (&union_set);
3162           if (BE (mctx->state_log[to_idx] == NULL
3163                   && err != REG_NOERROR, 0))
3164             return err;
3165         }
3166     }
3167   return REG_NOERROR;
3168 }
3169
3170 /* Build transition table for the state.
3171    Return the new table if succeeded, otherwise return NULL.  */
3172
3173 static re_dfastate_t **
3174 build_trtable (dfa, state)
3175     re_dfa_t *dfa;
3176     re_dfastate_t *state;
3177 {
3178   reg_errcode_t err;
3179   int i, j, ch;
3180   unsigned int elem, mask;
3181   int dests_node_malloced = 0, dest_states_malloced = 0;
3182   int ndests; /* Number of the destination states from `state'.  */
3183   re_dfastate_t **trtable;
3184   re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl;
3185   re_node_set follows, *dests_node;
3186   bitset *dests_ch;
3187   bitset acceptable;
3188
3189   /* We build DFA states which corresponds to the destination nodes
3190      from `state'.  `dests_node[i]' represents the nodes which i-th
3191      destination state contains, and `dests_ch[i]' represents the
3192      characters which i-th destination state accepts.  */
3193 #ifdef _LIBC
3194   if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX))
3195     dests_node = (re_node_set *)
3196                  alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX);
3197   else
3198 #endif
3199     {
3200       dests_node = (re_node_set *)
3201                    malloc ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX);
3202       if (BE (dests_node == NULL, 0))
3203         return NULL;
3204       dests_node_malloced = 1;
3205     }
3206   dests_ch = (bitset *) (dests_node + SBC_MAX);
3207
3208   /* Initialize transiton table.  */
3209   state->word_trtable = 0;
3210
3211   /* At first, group all nodes belonging to `state' into several
3212      destinations.  */
3213   ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);
3214   if (BE (ndests <= 0, 0))
3215     {
3216       if (dests_node_malloced)
3217         free (dests_node);
3218       /* Return NULL in case of an error, trtable otherwise.  */
3219       if (ndests == 0)
3220         {
3221           state->trtable = (re_dfastate_t **)
3222             calloc (sizeof (re_dfastate_t *), SBC_MAX);;
3223           return state->trtable;
3224         }
3225       return NULL;
3226     }
3227
3228   err = re_node_set_alloc (&follows, ndests + 1);
3229   if (BE (err != REG_NOERROR, 0))
3230     goto out_free;
3231
3232 #ifdef _LIBC
3233   if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX
3234                          + ndests * 3 * sizeof (re_dfastate_t *)))
3235     dest_states = (re_dfastate_t **)
3236                   alloca (ndests * 3 * sizeof (re_dfastate_t *));
3237   else
3238 #endif
3239     {
3240       dest_states = (re_dfastate_t **)
3241                     malloc (ndests * 3 * sizeof (re_dfastate_t *));
3242       if (BE (dest_states == NULL, 0))
3243         {
3244 out_free:
3245           if (dest_states_malloced)
3246             free (dest_states);
3247           re_node_set_free (&follows);
3248           for (i = 0; i < ndests; ++i)
3249             re_node_set_free (dests_node + i);
3250           if (dests_node_malloced)
3251             free (dests_node);
3252           return NULL;
3253         }
3254       dest_states_malloced = 1;
3255     }
3256   dest_states_word = dest_states + ndests;
3257   dest_states_nl = dest_states_word + ndests;
3258   bitset_empty (acceptable);
3259
3260   /* Then build the states for all destinations.  */
3261   for (i = 0; i < ndests; ++i)
3262     {
3263       int next_node;
3264       re_node_set_empty (&follows);
3265       /* Merge the follows of this destination states.  */
3266       for (j = 0; j < dests_node[i].nelem; ++j)
3267         {
3268           next_node = dfa->nexts[dests_node[i].elems[j]];
3269           if (next_node != -1)
3270             {
3271               err = re_node_set_merge (&follows, dfa->eclosures + next_node);
3272               if (BE (err != REG_NOERROR, 0))
3273                 goto out_free;
3274             }
3275         }
3276       dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);
3277       if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0))
3278         goto out_free;
3279       /* If the new state has context constraint,
3280          build appropriate states for these contexts.  */
3281       if (dest_states[i]->has_constraint)
3282         {
3283           dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
3284                                                           CONTEXT_WORD);
3285           if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0))
3286             goto out_free;
3287
3288           if (dest_states[i] != dest_states_word[i]
3289               && dfa->mb_cur_max > 1)
3290             state->word_trtable = 1;
3291
3292           dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
3293                                                         CONTEXT_NEWLINE);
3294           if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0))
3295             goto out_free;
3296         }
3297       else
3298         {
3299           dest_states_word[i] = dest_states[i];
3300           dest_states_nl[i] = dest_states[i];
3301         }
3302       bitset_merge (acceptable, dests_ch[i]);
3303     }
3304
3305   if (!BE (state->word_trtable, 0))
3306     {
3307       /* We don't care about whether the following character is a word
3308          character, or we are in a single-byte character set so we can
3309          discern by looking at the character code: allocate a
3310          256-entry transition table.  */
3311       trtable = (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);
3312       if (BE (trtable == NULL, 0))
3313         goto out_free;
3314
3315       /* For all characters ch...:  */
3316       for (i = 0; i < BITSET_UINTS; ++i)
3317         for (ch = i * UINT_BITS, elem = acceptable[i], mask = 1;
3318              elem;
3319              mask <<= 1, elem >>= 1, ++ch)
3320           if (BE (elem & 1, 0))
3321             {
3322               /* There must be exactly one destination which accepts
3323                  character ch.  See group_nodes_into_DFAstates.  */
3324               for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
3325                 ;
3326
3327               /* j-th destination accepts the word character ch.  */
3328               if (dfa->word_char[i] & mask)
3329                 trtable[ch] = dest_states_word[j];
3330               else
3331                 trtable[ch] = dest_states[j];
3332             }
3333     }
3334   else
3335     {
3336       /* We care about whether the following character is a word
3337          character, and we are in a multi-byte character set: discern
3338          by looking at the character code: build two 256-entry
3339          transition tables, one starting at trtable[0] and one
3340          starting at trtable[SBC_MAX].  */
3341       trtable = (re_dfastate_t **) calloc (sizeof (re_dfastate_t *),
3342                                            2 * SBC_MAX);
3343       if (BE (trtable == NULL, 0))
3344         goto out_free;
3345
3346       /* For all characters ch...:  */
3347       for (i = 0; i < BITSET_UINTS; ++i)
3348         for (ch = i * UINT_BITS, elem = acceptable[i], mask = 1;
3349              elem;
3350              mask <<= 1, elem >>= 1, ++ch)
3351           if (BE (elem & 1, 0))
3352             {
3353               /* There must be exactly one destination which accepts
3354                  character ch.  See group_nodes_into_DFAstates.  */
3355               for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
3356                 ;
3357
3358               /* j-th destination accepts the word character ch.  */
3359               trtable[ch] = dest_states[j];
3360               trtable[ch + SBC_MAX] = dest_states_word[j];
3361             }
3362     }
3363
3364   /* new line */
3365   if (bitset_contain (acceptable, NEWLINE_CHAR))
3366     {
3367       /* The current state accepts newline character.  */
3368       for (j = 0; j < ndests; ++j)
3369         if (bitset_contain (dests_ch[j], NEWLINE_CHAR))
3370           {
3371             /* k-th destination accepts newline character.  */
3372             trtable[NEWLINE_CHAR] = dest_states_nl[j];
3373             if (state->word_trtable)
3374               trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j];
3375             /* There must be only one destination which accepts
3376                newline.  See group_nodes_into_DFAstates.  */
3377             break;
3378           }
3379     }
3380
3381   if (dest_states_malloced)
3382     free (dest_states);
3383
3384   re_node_set_free (&follows);
3385   for (i = 0; i < ndests; ++i)
3386     re_node_set_free (dests_node + i);
3387
3388   if (dests_node_malloced)
3389     free (dests_node);
3390
3391   state->trtable = trtable;
3392   return trtable;
3393 }
3394
3395 /* Group all nodes belonging to STATE into several destinations.
3396    Then for all destinations, set the nodes belonging to the destination
3397    to DESTS_NODE[i] and set the characters accepted by the destination
3398    to DEST_CH[i].  This function return the number of destinations.  */
3399
3400 static int
3401 group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch)
3402     re_dfa_t *dfa;
3403     const re_dfastate_t *state;
3404     re_node_set *dests_node;
3405     bitset *dests_ch;
3406 {
3407   reg_errcode_t err;
3408   int i, j, k;
3409   int ndests; /* Number of the destinations from `state'.  */
3410   bitset accepts; /* Characters a node can accept.  */
3411   const re_node_set *cur_nodes = &state->nodes;
3412   bitset_empty (accepts);
3413   ndests = 0;
3414
3415   /* For all the nodes belonging to `state',  */
3416   for (i = 0; i < cur_nodes->nelem; ++i)
3417     {
3418       re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
3419       re_token_type_t type = node->type;
3420       unsigned int constraint = node->constraint;
3421
3422       /* Enumerate all single byte character this node can accept.  */
3423       if (type == CHARACTER)
3424         bitset_set (accepts, node->opr.c);
3425       else if (type == SIMPLE_BRACKET)
3426         {
3427           bitset_merge (accepts, node->opr.sbcset);
3428         }
3429       else if (type == OP_PERIOD)
3430         {
3431 #ifdef RE_ENABLE_I18N
3432           if (dfa->mb_cur_max > 1)
3433             bitset_merge (accepts, dfa->sb_char);
3434           else
3435 #endif
3436             bitset_set_all (accepts);
3437           if (!(dfa->syntax & RE_DOT_NEWLINE))
3438             bitset_clear (accepts, '\n');
3439           if (dfa->syntax & RE_DOT_NOT_NULL)
3440             bitset_clear (accepts, '\0');
3441         }
3442 #ifdef RE_ENABLE_I18N
3443       else if (type == OP_UTF8_PERIOD)
3444         {
3445           memset (accepts, 255, sizeof (unsigned int) * BITSET_UINTS / 2);
3446           if (!(dfa->syntax & RE_DOT_NEWLINE))
3447             bitset_clear (accepts, '\n');
3448           if (dfa->syntax & RE_DOT_NOT_NULL)
3449             bitset_clear (accepts, '\0');
3450         }
3451 #endif
3452       else
3453         continue;
3454
3455       /* Check the `accepts' and sift the characters which are not
3456          match it the context.  */
3457       if (constraint)
3458         {
3459           if (constraint & NEXT_NEWLINE_CONSTRAINT)
3460             {
3461               int accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
3462               bitset_empty (accepts);
3463               if (accepts_newline)
3464                 bitset_set (accepts, NEWLINE_CHAR);
3465               else
3466                 continue;
3467             }
3468           if (constraint & NEXT_ENDBUF_CONSTRAINT)
3469             {
3470               bitset_empty (accepts);
3471               continue;
3472             }
3473
3474           if (constraint & NEXT_WORD_CONSTRAINT)
3475             {
3476               unsigned int any_set = 0;
3477               if (type == CHARACTER && !node->word_char)
3478                 {
3479                   bitset_empty (accepts);
3480                   continue;
3481                 }
3482 #ifdef RE_ENABLE_I18N
3483               if (dfa->mb_cur_max > 1)
3484                 for (j = 0; j < BITSET_UINTS; ++j)
3485                   any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j]));
3486               else
3487 #endif
3488                 for (j = 0; j < BITSET_UINTS; ++j)
3489                   any_set |= (accepts[j] &= dfa->word_char[j]);
3490               if (!any_set)
3491                 continue;
3492             }
3493           if (constraint & NEXT_NOTWORD_CONSTRAINT)
3494             {
3495               unsigned int any_set = 0;
3496               if (type == CHARACTER && node->word_char)
3497                 {
3498                   bitset_empty (accepts);
3499                   continue;
3500                 }
3501 #ifdef RE_ENABLE_I18N
3502               if (dfa->mb_cur_max > 1)
3503                 for (j = 0; j < BITSET_UINTS; ++j)
3504                   any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]));
3505               else
3506 #endif
3507                 for (j = 0; j < BITSET_UINTS; ++j)
3508                   any_set |= (accepts[j] &= ~dfa->word_char[j]);
3509               if (!any_set)
3510                 continue;
3511             }
3512         }
3513
3514       /* Then divide `accepts' into DFA states, or create a new
3515          state.  Above, we make sure that accepts is not empty.  */
3516       for (j = 0; j < ndests; ++j)
3517         {
3518           bitset intersec; /* Intersection sets, see below.  */
3519           bitset remains;
3520           /* Flags, see below.  */
3521           int has_intersec, not_subset, not_consumed;
3522
3523           /* Optimization, skip if this state doesn't accept the character.  */
3524           if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
3525             continue;
3526
3527           /* Enumerate the intersection set of this state and `accepts'.  */
3528           has_intersec = 0;
3529           for (k = 0; k < BITSET_UINTS; ++k)
3530             has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];
3531           /* And skip if the intersection set is empty.  */
3532           if (!has_intersec)
3533             continue;
3534
3535           /* Then check if this state is a subset of `accepts'.  */
3536           not_subset = not_consumed = 0;
3537           for (k = 0; k < BITSET_UINTS; ++k)
3538             {
3539               not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k];
3540               not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k];
3541             }
3542
3543           /* If this state isn't a subset of `accepts', create a
3544              new group state, which has the `remains'. */
3545           if (not_subset)
3546             {
3547               bitset_copy (dests_ch[ndests], remains);
3548               bitset_copy (dests_ch[j], intersec);
3549               err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
3550               if (BE (err != REG_NOERROR, 0))
3551                 goto error_return;
3552               ++ndests;
3553             }
3554
3555           /* Put the position in the current group. */
3556           err = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
3557           if (BE (err < 0, 0))
3558             goto error_return;
3559
3560           /* If all characters are consumed, go to next node. */
3561           if (!not_consumed)
3562             break;
3563         }
3564       /* Some characters remain, create a new group. */
3565       if (j == ndests)
3566         {
3567           bitset_copy (dests_ch[ndests], accepts);
3568           err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
3569           if (BE (err != REG_NOERROR, 0))
3570             goto error_return;
3571           ++ndests;
3572           bitset_empty (accepts);
3573         }
3574     }
3575   return ndests;
3576  error_return:
3577   for (j = 0; j < ndests; ++j)
3578     re_node_set_free (dests_node + j);
3579   return -1;
3580 }
3581
3582 #ifdef RE_ENABLE_I18N
3583 /* Check how many bytes the node `dfa->nodes[node_idx]' accepts.
3584    Return the number of the bytes the node accepts.
3585    STR_IDX is the current index of the input string.
3586
3587    This function handles the nodes which can accept one character, or
3588    one collating element like '.', '[a-z]', opposite to the other nodes
3589    can only accept one byte.  */
3590
3591 static int
3592 check_node_accept_bytes (dfa, node_idx, input, str_idx)
3593     re_dfa_t *dfa;
3594     int node_idx, str_idx;
3595     const re_string_t *input;
3596 {
3597   const re_token_t *node = dfa->nodes + node_idx;
3598   int char_len, elem_len;
3599   int i;
3600
3601   if (BE (node->type == OP_UTF8_PERIOD, 0))
3602     {
3603       unsigned char c = re_string_byte_at (input, str_idx), d;
3604       if (BE (c < 0xc2, 1))
3605         return 0;
3606
3607       if (str_idx + 2 > input->len)
3608         return 0;
3609
3610       d = re_string_byte_at (input, str_idx + 1);
3611       if (c < 0xe0)
3612         return (d < 0x80 || d > 0xbf) ? 0 : 2;
3613       else if (c < 0xf0)
3614         {
3615           char_len = 3;
3616           if (c == 0xe0 && d < 0xa0)
3617             return 0;
3618         }
3619       else if (c < 0xf8)
3620         {
3621           char_len = 4;
3622           if (c == 0xf0 && d < 0x90)
3623             return 0;
3624         }
3625       else if (c < 0xfc)
3626         {
3627           char_len = 5;
3628           if (c == 0xf8 && d < 0x88)
3629             return 0;
3630         }
3631       else if (c < 0xfe)
3632         {
3633           char_len = 6;
3634           if (c == 0xfc && d < 0x84)
3635             return 0;
3636         }
3637       else
3638         return 0;
3639
3640       if (str_idx + char_len > input->len)
3641         return 0;
3642
3643       for (i = 1; i < char_len; ++i)
3644         {
3645           d = re_string_byte_at (input, str_idx + i);
3646           if (d < 0x80 || d > 0xbf)
3647             return 0;
3648         }
3649       return char_len;
3650     }
3651
3652   char_len = re_string_char_size_at (input, str_idx);
3653   if (node->type == OP_PERIOD)
3654     {
3655       if (char_len <= 1)
3656         return 0;
3657       /* FIXME: I don't think this if is needed, as both '\n'
3658          and '\0' are char_len == 1.  */
3659       /* '.' accepts any one character except the following two cases.  */
3660       if ((!(dfa->syntax & RE_DOT_NEWLINE) &&
3661            re_string_byte_at (input, str_idx) == '\n') ||
3662           ((dfa->syntax & RE_DOT_NOT_NULL) &&
3663            re_string_byte_at (input, str_idx) == '\0'))
3664         return 0;
3665       return char_len;
3666     }
3667
3668   elem_len = re_string_elem_size_at (input, str_idx);
3669   if (elem_len <= 1 && char_len <= 1)
3670     return 0;
3671
3672   if (node->type == COMPLEX_BRACKET)
3673     {
3674       const re_charset_t *cset = node->opr.mbcset;
3675 # ifdef _LIBC
3676       const unsigned char *pin = ((char *) re_string_get_buffer (input)
3677                                   + str_idx);
3678       int j;
3679       uint32_t nrules;
3680 # endif /* _LIBC */
3681       int match_len = 0;
3682       wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)
3683                     ? re_string_wchar_at (input, str_idx) : 0);
3684
3685       /* match with multibyte character?  */
3686       for (i = 0; i < cset->nmbchars; ++i)
3687         if (wc == cset->mbchars[i])
3688           {
3689             match_len = char_len;
3690             goto check_node_accept_bytes_match;
3691           }
3692       /* match with character_class?  */
3693       for (i = 0; i < cset->nchar_classes; ++i)
3694         {
3695           wctype_t wt = cset->char_classes[i];
3696           if (__iswctype (wc, wt))
3697             {
3698               match_len = char_len;
3699               goto check_node_accept_bytes_match;
3700             }
3701         }
3702
3703 # ifdef _LIBC
3704       nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3705       if (nrules != 0)
3706         {
3707           unsigned int in_collseq = 0;
3708           const int32_t *table, *indirect;
3709           const unsigned char *weights, *extra;
3710           const char *collseqwc;
3711           int32_t idx;
3712           /* This #include defines a local function!  */
3713 #  include <locale/weight.h>
3714
3715           /* match with collating_symbol?  */
3716           if (cset->ncoll_syms)
3717             extra = (const unsigned char *)
3718               _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3719           for (i = 0; i < cset->ncoll_syms; ++i)
3720             {
3721               const unsigned char *coll_sym = extra + cset->coll_syms[i];
3722               /* Compare the length of input collating element and
3723                  the length of current collating element.  */
3724               if (*coll_sym != elem_len)
3725                 continue;
3726               /* Compare each bytes.  */
3727               for (j = 0; j < *coll_sym; j++)
3728                 if (pin[j] != coll_sym[1 + j])
3729                   break;
3730               if (j == *coll_sym)
3731                 {
3732                   /* Match if every bytes is equal.  */
3733                   match_len = j;
3734                   goto check_node_accept_bytes_match;
3735                 }
3736             }
3737
3738           if (cset->nranges)
3739             {
3740               if (elem_len <= char_len)
3741                 {
3742                   collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
3743                   in_collseq = __collseq_table_lookup (collseqwc, wc);
3744                 }
3745               else
3746                 in_collseq = find_collation_sequence_value (pin, elem_len);
3747             }
3748           /* match with range expression?  */
3749           for (i = 0; i < cset->nranges; ++i)
3750             if (cset->range_starts[i] <= in_collseq
3751                 && in_collseq <= cset->range_ends[i])
3752               {
3753                 match_len = elem_len;
3754                 goto check_node_accept_bytes_match;
3755               }
3756
3757           /* match with equivalence_class?  */
3758           if (cset->nequiv_classes)
3759             {
3760               const unsigned char *cp = pin;
3761               table = (const int32_t *)
3762                 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
3763               weights = (const unsigned char *)
3764                 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
3765               extra = (const unsigned char *)
3766                 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
3767               indirect = (const int32_t *)
3768                 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
3769               idx = findidx (&cp);
3770               if (idx > 0)
3771                 for (i = 0; i < cset->nequiv_classes; ++i)
3772                   {
3773                     int32_t equiv_class_idx = cset->equiv_classes[i];
3774                     size_t weight_len = weights[idx];
3775                     if (weight_len == weights[equiv_class_idx])
3776                       {
3777                         int cnt = 0;
3778                         while (cnt <= weight_len
3779                                && (weights[equiv_class_idx + 1 + cnt]
3780                                    == weights[idx + 1 + cnt]))
3781                           ++cnt;
3782                         if (cnt > weight_len)
3783                           {
3784                             match_len = elem_len;
3785                             goto check_node_accept_bytes_match;
3786                           }
3787                       }
3788                   }
3789             }
3790         }
3791       else
3792 # endif /* _LIBC */
3793         {
3794           /* match with range expression?  */
3795 #if __GNUC__ >= 2
3796           wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'};
3797 #else
3798           wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
3799           cmp_buf[2] = wc;
3800 #endif
3801           for (i = 0; i < cset->nranges; ++i)
3802             {
3803               cmp_buf[0] = cset->range_starts[i];
3804               cmp_buf[4] = cset->range_ends[i];
3805               if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
3806                   && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
3807                 {
3808                   match_len = char_len;
3809                   goto check_node_accept_bytes_match;
3810                 }
3811             }
3812         }
3813     check_node_accept_bytes_match:
3814       if (!cset->non_match)
3815         return match_len;
3816       else
3817         {
3818           if (match_len > 0)
3819             return 0;
3820           else
3821             return (elem_len > char_len) ? elem_len : char_len;
3822         }
3823     }
3824   return 0;
3825 }
3826
3827 # ifdef _LIBC
3828 static unsigned int
3829 find_collation_sequence_value (mbs, mbs_len)
3830     const unsigned char *mbs;
3831     size_t mbs_len;
3832 {
3833   uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3834   if (nrules == 0)
3835     {
3836       if (mbs_len == 1)
3837         {
3838           /* No valid character.  Match it as a single byte character.  */
3839           const unsigned char *collseq = (const unsigned char *)
3840             _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
3841           return collseq[mbs[0]];
3842         }
3843       return UINT_MAX;
3844     }
3845   else
3846     {
3847       int32_t idx;
3848       const unsigned char *extra = (const unsigned char *)
3849         _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3850
3851       for (idx = 0; ;)
3852         {
3853           int mbs_cnt, found = 0;
3854           int32_t elem_mbs_len;
3855           /* Skip the name of collating element name.  */
3856           idx = idx + extra[idx] + 1;
3857           elem_mbs_len = extra[idx++];
3858           if (mbs_len == elem_mbs_len)
3859             {
3860               for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt)
3861                 if (extra[idx + mbs_cnt] != mbs[mbs_cnt])
3862                   break;
3863               if (mbs_cnt == elem_mbs_len)
3864                 /* Found the entry.  */
3865                 found = 1;
3866             }
3867           /* Skip the byte sequence of the collating element.  */
3868           idx += elem_mbs_len;
3869           /* Adjust for the alignment.  */
3870           idx = (idx + 3) & ~3;
3871           /* Skip the collation sequence value.  */
3872           idx += sizeof (uint32_t);
3873           /* Skip the wide char sequence of the collating element.  */
3874           idx = idx + sizeof (uint32_t) * (extra[idx] + 1);
3875           /* If we found the entry, return the sequence value.  */
3876           if (found)
3877             return *(uint32_t *) (extra + idx);
3878           /* Skip the collation sequence value.  */
3879           idx += sizeof (uint32_t);
3880         }
3881     }
3882 }
3883 # endif /* _LIBC */
3884 #endif /* RE_ENABLE_I18N */
3885
3886 /* Check whether the node accepts the byte which is IDX-th
3887    byte of the INPUT.  */
3888
3889 static int
3890 check_node_accept (mctx, node, idx)
3891     const re_match_context_t *mctx;
3892     const re_token_t *node;
3893     int idx;
3894 {
3895   re_dfa_t *const dfa = mctx->dfa;
3896   unsigned char ch;
3897   if (node->constraint)
3898     {
3899       /* The node has constraints.  Check whether the current context
3900          satisfies the constraints.  */
3901       unsigned int context = re_string_context_at (&mctx->input, idx,
3902                                                    mctx->eflags);
3903       if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
3904         return 0;
3905     }
3906   ch = re_string_byte_at (&mctx->input, idx);
3907   switch (node->type)
3908     {
3909     case CHARACTER:
3910       return node->opr.c == ch;
3911     case SIMPLE_BRACKET:
3912       return bitset_contain (node->opr.sbcset, ch);
3913 #ifdef RE_ENABLE_I18N
3914     case OP_UTF8_PERIOD:
3915       if (ch >= 0x80)
3916         return 0;
3917       /* FALLTHROUGH */
3918 #endif
3919     case OP_PERIOD:
3920       return !((ch == '\n' && !(dfa->syntax & RE_DOT_NEWLINE))
3921                || (ch == '\0' && (dfa->syntax & RE_DOT_NOT_NULL)));
3922     default:
3923       return 0;
3924     }
3925 }
3926
3927 /* Extend the buffers, if the buffers have run out.  */
3928
3929 static reg_errcode_t
3930 extend_buffers (mctx)
3931      re_match_context_t *mctx;
3932 {
3933   reg_errcode_t ret;
3934   re_string_t *pstr = &mctx->input;
3935
3936   /* Double the lengthes of the buffers.  */
3937   ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
3938   if (BE (ret != REG_NOERROR, 0))
3939     return ret;
3940
3941   if (mctx->state_log != NULL)
3942     {
3943       /* And double the length of state_log.  */
3944       /* XXX We have no indication of the size of this buffer.  If this
3945          allocation fail we have no indication that the state_log array
3946          does not have the right size.  */
3947       re_dfastate_t **new_array = re_realloc (mctx->state_log, re_dfastate_t *,
3948                                               pstr->bufs_len + 1);
3949       if (BE (new_array == NULL, 0))
3950         return REG_ESPACE;
3951       mctx->state_log = new_array;
3952     }
3953
3954   /* Then reconstruct the buffers.  */
3955   if (pstr->icase)
3956     {
3957 #ifdef RE_ENABLE_I18N
3958       if (pstr->mb_cur_max > 1)
3959         {
3960           ret = build_wcs_upper_buffer (pstr);
3961           if (BE (ret != REG_NOERROR, 0))
3962             return ret;
3963         }
3964       else
3965 #endif /* RE_ENABLE_I18N  */
3966         build_upper_buffer (pstr);
3967     }
3968   else
3969     {
3970 #ifdef RE_ENABLE_I18N
3971       if (pstr->mb_cur_max > 1)
3972         build_wcs_buffer (pstr);
3973       else
3974 #endif /* RE_ENABLE_I18N  */
3975         {
3976           if (pstr->trans != NULL)
3977             re_string_translate_buffer (pstr);
3978         }
3979     }
3980   return REG_NOERROR;
3981 }
3982
3983 \f
3984 /* Functions for matching context.  */
3985
3986 /* Initialize MCTX.  */
3987
3988 static reg_errcode_t
3989 match_ctx_init (mctx, eflags, n)
3990     re_match_context_t *mctx;
3991     int eflags, n;
3992 {
3993   mctx->eflags = eflags;
3994   mctx->match_last = -1;
3995   if (n > 0)
3996     {
3997       mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
3998       mctx->sub_tops = re_malloc (re_sub_match_top_t *, n);
3999       if (BE (mctx->bkref_ents == NULL || mctx->sub_tops == NULL, 0))
4000         return REG_ESPACE;
4001     }
4002   /* Already zero-ed by the caller.
4003      else
4004        mctx->bkref_ents = NULL;
4005      mctx->nbkref_ents = 0;
4006      mctx->nsub_tops = 0;  */
4007   mctx->abkref_ents = n;
4008   mctx->max_mb_elem_len = 1;
4009   mctx->asub_tops = n;
4010   return REG_NOERROR;
4011 }
4012
4013 /* Clean the entries which depend on the current input in MCTX.
4014    This function must be invoked when the matcher changes the start index
4015    of the input, or changes the input string.  */
4016
4017 static void
4018 match_ctx_clean (mctx)
4019     re_match_context_t *mctx;
4020 {
4021   match_ctx_free_subtops (mctx);
4022   mctx->nsub_tops = 0;
4023   mctx->nbkref_ents = 0;
4024 }
4025
4026 /* Free all the memory associated with MCTX.  */
4027
4028 static void
4029 match_ctx_free (mctx)
4030     re_match_context_t *mctx;
4031 {
4032   match_ctx_free_subtops (mctx);
4033   re_free (mctx->sub_tops);
4034   re_free (mctx->bkref_ents);
4035 }
4036
4037 /* Free all the memory associated with MCTX->SUB_TOPS.  */
4038
4039 static void
4040 match_ctx_free_subtops (mctx)
4041      re_match_context_t *mctx;
4042 {
4043   int st_idx;
4044   for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx)
4045     {
4046       int sl_idx;
4047       re_sub_match_top_t *top = mctx->sub_tops[st_idx];
4048       for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx)
4049         {
4050           re_sub_match_last_t *last = top->lasts[sl_idx];
4051           re_free (last->path.array);
4052           re_free (last);
4053         }
4054       re_free (top->lasts);
4055       if (top->path)
4056         {
4057           re_free (top->path->array);
4058           re_free (top->path);
4059         }
4060       free (top);
4061     }
4062 }
4063
4064 /* Add a new backreference entry to MCTX.
4065    Note that we assume that caller never call this function with duplicate
4066    entry, and call with STR_IDX which isn't smaller than any existing entry.
4067 */
4068
4069 static reg_errcode_t
4070 match_ctx_add_entry (mctx, node, str_idx, from, to)
4071      re_match_context_t *mctx;
4072      int node, str_idx, from, to;
4073 {
4074   if (mctx->nbkref_ents >= mctx->abkref_ents)
4075     {
4076       struct re_backref_cache_entry* new_entry;
4077       new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry,
4078                               mctx->abkref_ents * 2);
4079       if (BE (new_entry == NULL, 0))
4080         {
4081           re_free (mctx->bkref_ents);
4082           return REG_ESPACE;
4083         }
4084       mctx->bkref_ents = new_entry;
4085       memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',
4086               sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);
4087       mctx->abkref_ents *= 2;
4088     }
4089   mctx->bkref_ents[mctx->nbkref_ents].node = node;
4090   mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
4091   mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
4092   mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
4093   mctx->bkref_ents[mctx->nbkref_ents++].flag = 0;
4094   if (mctx->max_mb_elem_len < to - from)
4095     mctx->max_mb_elem_len = to - from;
4096   return REG_NOERROR;
4097 }
4098
4099 /* Search for the first entry which has the same str_idx.
4100    Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX.  */
4101
4102 static int
4103 search_cur_bkref_entry (mctx, str_idx)
4104      re_match_context_t *mctx;
4105      int str_idx;
4106 {
4107   int left, right, mid;
4108   right = mctx->nbkref_ents;
4109   for (left = 0; left < right;)
4110     {
4111       mid = (left + right) / 2;
4112       if (mctx->bkref_ents[mid].str_idx < str_idx)
4113         left = mid + 1;
4114       else
4115         right = mid;
4116     }
4117   return left;
4118 }
4119
4120 static void
4121 match_ctx_clear_flag (mctx)
4122      re_match_context_t *mctx;
4123 {
4124   int i;
4125   for (i = 0; i < mctx->nbkref_ents; ++i)
4126     mctx->bkref_ents[i].flag = 0;
4127 }
4128
4129 /* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches
4130    at STR_IDX.  */
4131
4132 static reg_errcode_t
4133 match_ctx_add_subtop (mctx, node, str_idx)
4134      re_match_context_t *mctx;
4135      int node, str_idx;
4136 {
4137 #ifdef DEBUG
4138   assert (mctx->sub_tops != NULL);
4139   assert (mctx->asub_tops > 0);
4140 #endif
4141   if (BE (mctx->nsub_tops == mctx->asub_tops, 0))
4142     {
4143       int new_asub_tops = mctx->asub_tops * 2;
4144       re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops,
4145                                                    re_sub_match_top_t *,
4146                                                    new_asub_tops);
4147       if (BE (new_array == NULL, 0))
4148         return REG_ESPACE;
4149       mctx->sub_tops = new_array;
4150       mctx->asub_tops = new_asub_tops;
4151     }
4152   mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t));
4153   if (BE (mctx->sub_tops[mctx->nsub_tops] == NULL, 0))
4154     return REG_ESPACE;
4155   mctx->sub_tops[mctx->nsub_tops]->node = node;
4156   mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;
4157   return REG_NOERROR;
4158 }
4159
4160 /* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches
4161    at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP.  */
4162
4163 static re_sub_match_last_t *
4164 match_ctx_add_sublast (subtop, node, str_idx)
4165      re_sub_match_top_t *subtop;
4166      int node, str_idx;
4167 {
4168   re_sub_match_last_t *new_entry;
4169   if (BE (subtop->nlasts == subtop->alasts, 0))
4170     {
4171       int new_alasts = 2 * subtop->alasts + 1;
4172       re_sub_match_last_t **new_array = re_realloc (subtop->lasts,
4173                                                     re_sub_match_last_t *,
4174                                                     new_alasts);
4175       if (BE (new_array == NULL, 0))
4176         return NULL;
4177       subtop->lasts = new_array;
4178       subtop->alasts = new_alasts;
4179     }
4180   new_entry = calloc (1, sizeof (re_sub_match_last_t));
4181   if (BE (new_entry != NULL, 1))
4182     {
4183       subtop->lasts[subtop->nlasts] = new_entry;
4184       new_entry->node = node;
4185       new_entry->str_idx = str_idx;
4186       ++subtop->nlasts;
4187     }
4188   return new_entry;
4189 }
4190
4191 static void
4192 sift_ctx_init (sctx, sifted_sts, limited_sts, last_node, last_str_idx,
4193                check_subexp)
4194     re_sift_context_t *sctx;
4195     re_dfastate_t **sifted_sts, **limited_sts;
4196     int last_node, last_str_idx, check_subexp;
4197 {
4198   sctx->sifted_states = sifted_sts;
4199   sctx->limited_states = limited_sts;
4200   sctx->last_node = last_node;
4201   sctx->last_str_idx = last_str_idx;
4202   sctx->check_subexp = check_subexp;
4203   sctx->cur_bkref = -1;
4204   sctx->cls_subexp_idx = -1;
4205   re_node_set_init_empty (&sctx->limits);
4206 }