posix/regexec.c

   1 /* Extended regular expression matching and search library.
   2    Copyright (C) 2002-2017 Free Software Foundation, Inc.
   3    This file is part of the GNU C Library.
   4    Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
   5
   6    The GNU C Library is free software; you can redistribute it and/or
   7    modify it under the terms of the GNU Lesser General Public
   8    License as published by the Free Software Foundation; either
   9    version 2.1 of the License, or (at your option) any later version.
  10
  11    The GNU C Library is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14    Lesser General Public License for more details.
  15
  16    You should have received a copy of the GNU Lesser General Public
  17    License along with the GNU C Library; if not, see
  18    <http://www.gnu.org/licenses/>.  */
  19
  20 #include <stdint.h>
  21
  22 static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
  23                                      int n);
  24 static void match_ctx_clean (re_match_context_t *mctx);
  25 static void match_ctx_free (re_match_context_t *cache);
  26 static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, int node,
  27                                           int str_idx, int from, int to);
  28 static int search_cur_bkref_entry (const re_match_context_t *mctx,
  29                                    int str_idx);
  30 static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, int node,
  31                                            int str_idx);
  32 static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
  33                                                    int node, int str_idx);
  34 static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
  35                            re_dfastate_t **limited_sts, int last_node,
  36                            int last_str_idx);
  37 static reg_errcode_t re_search_internal (const regex_t *preg,
  38                                          const char *string, int length,
  39                                          int start, int range, int stop,
  40                                          size_t nmatch, regmatch_t pmatch[],
  41                                          int eflags);
  42 static int re_search_2_stub (struct re_pattern_buffer *bufp,
  43                              const char *string1, int length1,
  44                              const char *string2, int length2,
  45                              int start, int range, struct re_registers *regs,
  46                              int stop, int ret_len);
  47 static int re_search_stub (struct re_pattern_buffer *bufp,
  48                            const char *string, int length, int start,
  49                            int range, int stop, struct re_registers *regs,
  50                            int ret_len);
  51 static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,
  52                               int nregs, int regs_allocated);
  53 static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx);
  54 static int check_matching (re_match_context_t *mctx, int fl_longest_match,
  55                            int *p_match_first);
  56 static int check_halt_state_context (const re_match_context_t *mctx,
  57                                      const re_dfastate_t *state, int idx);
  58 static void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
  59                          regmatch_t *prev_idx_match, int cur_node,
  60                          int cur_idx, int nmatch);
  61 static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
  62                                       int str_idx, int dest_node, int nregs,
  63                                       regmatch_t *regs,
  64                                       re_node_set *eps_via_nodes);
  65 static reg_errcode_t set_regs (const regex_t *preg,
  66                                const re_match_context_t *mctx,
  67                                size_t nmatch, regmatch_t *pmatch,
  68                                int fl_backtrack);
  69 static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs);
  70
  71 #ifdef RE_ENABLE_I18N
  72 static int sift_states_iter_mb (const re_match_context_t *mctx,
  73                                 re_sift_context_t *sctx,
  74                                 int node_idx, int str_idx, int max_str_idx);
  75 #endif /* RE_ENABLE_I18N */
  76 static reg_errcode_t sift_states_backward (const re_match_context_t *mctx,
  77                                            re_sift_context_t *sctx);
  78 static reg_errcode_t build_sifted_states (const re_match_context_t *mctx,
  79                                           re_sift_context_t *sctx, int str_idx,
  80                                           re_node_set *cur_dest);
  81 static reg_errcode_t update_cur_sifted_state (const re_match_context_t *mctx,
  82                                               re_sift_context_t *sctx,
  83                                               int str_idx,
  84                                               re_node_set *dest_nodes);
  85 static reg_errcode_t add_epsilon_src_nodes (const re_dfa_t *dfa,
  86                                             re_node_set *dest_nodes,
  87                                             const re_node_set *candidates);
  88 static int check_dst_limits (const re_match_context_t *mctx,
  89                              re_node_set *limits,
  90                              int dst_node, int dst_idx, int src_node,
  91                              int src_idx);
  92 static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx,
  93                                         int boundaries, int subexp_idx,
  94                                         int from_node, int bkref_idx);
  95 static int check_dst_limits_calc_pos (const re_match_context_t *mctx,
  96                                       int limit, int subexp_idx,
  97                                       int node, int str_idx,
  98                                       int bkref_idx);
  99 static reg_errcode_t check_subexp_limits (const re_dfa_t *dfa,
 100                                           re_node_set *dest_nodes,
 101                                           const re_node_set *candidates,
 102                                           re_node_set *limits,
 103                                           struct re_backref_cache_entry *bkref_ents,
 104                                           int str_idx);
 105 static reg_errcode_t sift_states_bkref (const re_match_context_t *mctx,
 106                                         re_sift_context_t *sctx,
 107                                         int str_idx,
 108                                         const re_node_set *candidates);
 109 static reg_errcode_t merge_state_array (const re_dfa_t *dfa,
 110                                         re_dfastate_t **dst,
 111                                         re_dfastate_t **src, int num);
 112 static re_dfastate_t *find_recover_state (reg_errcode_t *err,
 113                                          re_match_context_t *mctx);
 114 static re_dfastate_t *transit_state (reg_errcode_t *err,
 115                                      re_match_context_t *mctx,
 116                                      re_dfastate_t *state);
 117 static re_dfastate_t *merge_state_with_log (reg_errcode_t *err,
 118                                             re_match_context_t *mctx,
 119                                             re_dfastate_t *next_state);
 120 static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx,
 121                                                 re_node_set *cur_nodes,
 122                                                 int str_idx);
 123 #if 0
 124 static re_dfastate_t *transit_state_sb (reg_errcode_t *err,
 125                                         re_match_context_t *mctx,
 126                                         re_dfastate_t *pstate);
 127 #endif
 128 #ifdef RE_ENABLE_I18N
 129 static reg_errcode_t transit_state_mb (re_match_context_t *mctx,
 130                                        re_dfastate_t *pstate);
 131 #endif /* RE_ENABLE_I18N */
 132 static reg_errcode_t transit_state_bkref (re_match_context_t *mctx,
 133                                           const re_node_set *nodes);
 134 static reg_errcode_t get_subexp (re_match_context_t *mctx,
 135                                  int bkref_node, int bkref_str_idx);
 136 static reg_errcode_t get_subexp_sub (re_match_context_t *mctx,
 137                                      const re_sub_match_top_t *sub_top,
 138                                      re_sub_match_last_t *sub_last,
 139                                      int bkref_node, int bkref_str);
 140 static int find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
 141                              int subexp_idx, int type);
 142 static reg_errcode_t check_arrival (re_match_context_t *mctx,
 143                                     state_array_t *path, int top_node,
 144                                     int top_str, int last_node, int last_str,
 145                                     int type);
 146 static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx,
 147                                                    int str_idx,
 148                                                    re_node_set *cur_nodes,
 149                                                    re_node_set *next_nodes);
 150 static reg_errcode_t check_arrival_expand_ecl (const re_dfa_t *dfa,
 151                                                re_node_set *cur_nodes,
 152                                                int ex_subexp, int type);
 153 static reg_errcode_t check_arrival_expand_ecl_sub (const re_dfa_t *dfa,
 154                                                    re_node_set *dst_nodes,
 155                                                    int target, int ex_subexp,
 156                                                    int type);
 157 static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx,
 158                                          re_node_set *cur_nodes, int cur_str,
 159                                          int subexp_num, int type);
 160 static int build_trtable (const re_dfa_t *dfa, re_dfastate_t *state);
 161 #ifdef RE_ENABLE_I18N
 162 static int check_node_accept_bytes (const re_dfa_t *dfa, int node_idx,
 163                                     const re_string_t *input, int idx);
 164 # ifdef _LIBC
 165 static unsigned int find_collation_sequence_value (const unsigned char *mbs,
 166                                                    size_t name_len);
 167 # endif /* _LIBC */
 168 #endif /* RE_ENABLE_I18N */
 169 static int group_nodes_into_DFAstates (const re_dfa_t *dfa,
 170                                        const re_dfastate_t *state,
 171                                        re_node_set *states_node,
 172                                        bitset_t *states_ch);
 173 static int check_node_accept (const re_match_context_t *mctx,
 174                               const re_token_t *node, int idx);
 175 static reg_errcode_t extend_buffers (re_match_context_t *mctx, int min_len);
 176 \f
 177 /* Entry point for POSIX code.  */
 178
 179 /* regexec searches for a given pattern, specified by PREG, in the
 180    string STRING.
 181
 182    If NMATCH is zero or REG_NOSUB was set in the cflags argument to
 183    `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
 184    least NMATCH elements, and we set them to the offsets of the
 185    corresponding matched substrings.
 186
 187    EFLAGS specifies `execution flags' which affect matching: if
 188    REG_NOTBOL is set, then ^ does not match at the beginning of the
 189    string; if REG_NOTEOL is set, then $ does not match at the end.
 190
 191    We return 0 if we find a match and REG_NOMATCH if not.  */
 192
 193 int
 194 regexec (const regex_t *__restrict preg, const char *__restrict string,
 195          size_t nmatch, regmatch_t pmatch[], int eflags)
 196 {
 197   reg_errcode_t err;
 198   int start, length;
 199   re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
 200
 201   if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND))
 202     return REG_BADPAT;
 203
 204   if (eflags & REG_STARTEND)
 205     {
 206       start = pmatch[0].rm_so;
 207       length = pmatch[0].rm_eo;
 208     }
 209   else
 210     {
 211       start = 0;
 212       length = strlen (string);
 213     }
 214
 215   __libc_lock_lock (dfa->lock);
 216   if (preg->no_sub)
 217     err = re_search_internal (preg, string, length, start, length - start,
 218                               length, 0, NULL, eflags);
 219   else
 220     err = re_search_internal (preg, string, length, start, length - start,
 221                               length, nmatch, pmatch, eflags);
 222   __libc_lock_unlock (dfa->lock);
 223   return err != REG_NOERROR;
 224 }
 225
 226 #ifdef _LIBC
 227 # include <shlib-compat.h>
 228 versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4);
 229
 230 # if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)
 231 __typeof__ (__regexec) __compat_regexec;
 232
 233 int
 234 attribute_compat_text_section
 235 __compat_regexec (const regex_t *__restrict preg,
 236                   const char *__restrict string, size_t nmatch,
 237                   regmatch_t pmatch[], int eflags)
 238 {
 239   return regexec (preg, string, nmatch, pmatch,
 240                   eflags & (REG_NOTBOL | REG_NOTEOL));
 241 }
 242 compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0);
 243 # endif
 244 #endif
 245
 246 /* Entry points for GNU code.  */
 247
 248 /* re_match, re_search, re_match_2, re_search_2
 249
 250    The former two functions operate on STRING with length LENGTH,
 251    while the later two operate on concatenation of STRING1 and STRING2
 252    with lengths LENGTH1 and LENGTH2, respectively.
 253
 254    re_match() matches the compiled pattern in BUFP against the string,
 255    starting at index START.
 256
 257    re_search() first tries matching at index START, then it tries to match
 258    starting from index START + 1, and so on.  The last start position tried
 259    is START + RANGE.  (Thus RANGE = 0 forces re_search to operate the same
 260    way as re_match().)
 261
 262    The parameter STOP of re_{match,search}_2 specifies that no match exceeding
 263    the first STOP characters of the concatenation of the strings should be
 264    concerned.
 265
 266    If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match
 267    and all groups is stroed in REGS.  (For the "_2" variants, the offsets are
 268    computed relative to the concatenation, not relative to the individual
 269    strings.)
 270
 271    On success, re_match* functions return the length of the match, re_search*
 272    return the position of the start of the match.  Return value -1 means no
 273    match was found and -2 indicates an internal error.  */
 274
 275 int
 276 re_match (struct re_pattern_buffer *bufp, const char *string, int length,
 277           int start, struct re_registers *regs)
 278 {
 279   return re_search_stub (bufp, string, length, start, 0, length, regs, 1);
 280 }
 281 #ifdef _LIBC
 282 weak_alias (__re_match, re_match)
 283 #endif
 284
 285 int
 286 re_search (struct re_pattern_buffer *bufp, const char *string, int length,
 287            int start, int range, struct re_registers *regs)
 288 {
 289   return re_search_stub (bufp, string, length, start, range, length, regs, 0);
 290 }
 291 #ifdef _LIBC
 292 weak_alias (__re_search, re_search)
 293 #endif
 294
 295 int
 296 re_match_2 (struct re_pattern_buffer *bufp, const char *string1, int length1,
 297             const char *string2, int length2, int start,
 298             struct re_registers *regs, int stop)
 299 {
 300   return re_search_2_stub (bufp, string1, length1, string2, length2,
 301                            start, 0, regs, stop, 1);
 302 }
 303 #ifdef _LIBC
 304 weak_alias (__re_match_2, re_match_2)
 305 #endif
 306
 307 int
 308 re_search_2 (struct re_pattern_buffer *bufp, const char *string1, int length1,
 309              const char *string2, int length2, int start, int range,
 310              struct re_registers *regs, int stop)
 311 {
 312   return re_search_2_stub (bufp, string1, length1, string2, length2,
 313                            start, range, regs, stop, 0);
 314 }
 315 #ifdef _LIBC
 316 weak_alias (__re_search_2, re_search_2)
 317 #endif
 318
 319 static int
 320 re_search_2_stub (struct re_pattern_buffer *bufp, const char *string1,
 321                   int length1, const char *string2, int length2, int start,
 322                   int range, struct re_registers *regs,
 323                   int stop, int ret_len)
 324 {
 325   const char *str;
 326   int rval;
 327   int len = length1 + length2;
 328   char *s = NULL;
 329
 330   if (BE (length1 < 0 || length2 < 0 || stop < 0 || len < length1, 0))
 331     return -2;
 332
 333   /* Concatenate the strings.  */
 334   if (length2 > 0)
 335     if (length1 > 0)
 336       {
 337         s = re_malloc (char, len);
 338
 339         if (BE (s == NULL, 0))
 340           return -2;
 341 #ifdef _LIBC
 342         memcpy (__mempcpy (s, string1, length1), string2, length2);
 343 #else
 344         memcpy (s, string1, length1);
 345         memcpy (s + length1, string2, length2);
 346 #endif
 347         str = s;
 348       }
 349     else
 350       str = string2;
 351   else
 352     str = string1;
 353
 354   rval = re_search_stub (bufp, str, len, start, range, stop, regs, ret_len);
 355   re_free (s);
 356   return rval;
 357 }
 358
 359 /* The parameters have the same meaning as those of re_search.
 360    Additional parameters:
 361    If RET_LEN is nonzero the length of the match is returned (re_match style);
 362    otherwise the position of the match is returned.  */
 363
 364 static int
 365 re_search_stub (struct re_pattern_buffer *bufp, const char *string, int length,
 366                 int start, int range, int stop, struct re_registers *regs,
 367                 int ret_len)
 368 {
 369   reg_errcode_t result;
 370   regmatch_t *pmatch;
 371   int nregs, rval;
 372   int eflags = 0;
 373   re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
 374
 375   /* Check for out-of-range.  */
 376   if (BE (start < 0 || start > length, 0))
 377     return -1;
 378   if (BE (start + range > length, 0))
 379     range = length - start;
 380   else if (BE (start + range < 0, 0))
 381     range = -start;
 382
 383   __libc_lock_lock (dfa->lock);
 384
 385   eflags |= (bufp->not_bol) ? REG_NOTBOL : 0;
 386   eflags |= (bufp->not_eol) ? REG_NOTEOL : 0;
 387
 388   /* Compile fastmap if we haven't yet.  */
 389   if (range > 0 && bufp->fastmap != NULL && !bufp->fastmap_accurate)
 390     re_compile_fastmap (bufp);
 391
 392   if (BE (bufp->no_sub, 0))
 393     regs = NULL;
 394
 395   /* We need at least 1 register.  */
 396   if (regs == NULL)
 397     nregs = 1;
 398   else if (BE (bufp->regs_allocated == REGS_FIXED &&
 399                regs->num_regs < bufp->re_nsub + 1, 0))
 400     {
 401       nregs = regs->num_regs;
 402       if (BE (nregs < 1, 0))
 403         {
 404           /* Nothing can be copied to regs.  */
 405           regs = NULL;
 406           nregs = 1;
 407         }
 408     }
 409   else
 410     nregs = bufp->re_nsub + 1;
 411   pmatch = re_malloc (regmatch_t, nregs);
 412   if (BE (pmatch == NULL, 0))
 413     {
 414       rval = -2;
 415       goto out;
 416     }
 417
 418   result = re_search_internal (bufp, string, length, start, range, stop,
 419                                nregs, pmatch, eflags);
 420
 421   rval = 0;
 422
 423   /* I hope we needn't fill ther regs with -1's when no match was found.  */
 424   if (result != REG_NOERROR)
 425     rval = -1;
 426   else if (regs != NULL)
 427     {
 428       /* If caller wants register contents data back, copy them.  */
 429       bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,
 430                                            bufp->regs_allocated);
 431       if (BE (bufp->regs_allocated == REGS_UNALLOCATED, 0))
 432         rval = -2;
 433     }
 434
 435   if (BE (rval == 0, 1))
 436     {
 437       if (ret_len)
 438         {
 439           assert (pmatch[0].rm_so == start);
 440           rval = pmatch[0].rm_eo - start;
 441         }
 442       else
 443         rval = pmatch[0].rm_so;
 444     }
 445   re_free (pmatch);
 446  out:
 447   __libc_lock_unlock (dfa->lock);
 448   return rval;
 449 }
 450
 451 static unsigned
 452 re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, int nregs,
 453               int regs_allocated)
 454 {
 455   int rval = REGS_REALLOCATE;
 456   int i;
 457   int need_regs = nregs + 1;
 458   /* We need one extra element beyond `num_regs' for the `-1' marker GNU code
 459      uses.  */
 460
 461   /* Have the register data arrays been allocated?  */
 462   if (regs_allocated == REGS_UNALLOCATED)
 463     { /* No.  So allocate them with malloc.  */
 464       regs->start = re_malloc (regoff_t, need_regs);
 465       if (BE (regs->start == NULL, 0))
 466         return REGS_UNALLOCATED;
 467       regs->end = re_malloc (regoff_t, need_regs);
 468       if (BE (regs->end == NULL, 0))
 469         {
 470           re_free (regs->start);
 471           return REGS_UNALLOCATED;
 472         }
 473       regs->num_regs = need_regs;
 474     }
 475   else if (regs_allocated == REGS_REALLOCATE)
 476     { /* Yes.  If we need more elements than were already
 477          allocated, reallocate them.  If we need fewer, just
 478          leave it alone.  */
 479       if (BE (need_regs > regs->num_regs, 0))
 480         {
 481           regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs);
 482           regoff_t *new_end;
 483           if (BE (new_start == NULL, 0))
 484             return REGS_UNALLOCATED;
 485           new_end = re_realloc (regs->end, regoff_t, need_regs);
 486           if (BE (new_end == NULL, 0))
 487             {
 488               re_free (new_start);
 489               return REGS_UNALLOCATED;
 490             }
 491           regs->start = new_start;
 492           regs->end = new_end;
 493           regs->num_regs = need_regs;
 494         }
 495     }
 496   else
 497     {
 498       assert (regs_allocated == REGS_FIXED);
 499       /* This function may not be called with REGS_FIXED and nregs too big.  */
 500       assert (regs->num_regs >= nregs);
 501       rval = REGS_FIXED;
 502     }
 503
 504   /* Copy the regs.  */
 505   for (i = 0; i < nregs; ++i)
 506     {
 507       regs->start[i] = pmatch[i].rm_so;
 508       regs->end[i] = pmatch[i].rm_eo;
 509     }
 510   for ( ; i < regs->num_regs; ++i)
 511     regs->start[i] = regs->end[i] = -1;
 512
 513   return rval;
 514 }
 515
 516 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
 517    ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
 518    this memory for recording register information.  STARTS and ENDS
 519    must be allocated using the malloc library routine, and must each
 520    be at least NUM_REGS * sizeof (regoff_t) bytes long.
 521
 522    If NUM_REGS == 0, then subsequent matches should allocate their own
 523    register data.
 524
 525    Unless this function is called, the first search or match using
 526    PATTERN_BUFFER will allocate its own register data, without
 527    freeing the old data.  */
 528
 529 void
 530 re_set_registers (struct re_pattern_buffer *bufp, struct re_registers *regs,
 531                   unsigned num_regs, regoff_t *starts, regoff_t *ends)
 532 {
 533   if (num_regs)
 534     {
 535       bufp->regs_allocated = REGS_REALLOCATE;
 536       regs->num_regs = num_regs;
 537       regs->start = starts;
 538       regs->end = ends;
 539     }
 540   else
 541     {
 542       bufp->regs_allocated = REGS_UNALLOCATED;
 543       regs->num_regs = 0;
 544       regs->start = regs->end = (regoff_t *) 0;
 545     }
 546 }
 547 #ifdef _LIBC
 548 weak_alias (__re_set_registers, re_set_registers)
 549 #endif
 550 \f
 551 /* Entry points compatible with 4.2 BSD regex library.  We don't define
 552    them unless specifically requested.  */
 553
 554 #if defined _REGEX_RE_COMP || defined _LIBC
 555 int
 556 # ifdef _LIBC
 557 weak_function
 558 # endif
 559 re_exec (const char *s)
 560 {
 561   return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);
 562 }
 563 #endif /* _REGEX_RE_COMP */
 564 \f
 565 /* Internal entry point.  */
 566
 567 /* Searches for a compiled pattern PREG in the string STRING, whose
 568    length is LENGTH.  NMATCH, PMATCH, and EFLAGS have the same
 569    mingings with regexec.  START, and RANGE have the same meanings
 570    with re_search.
 571    Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
 572    otherwise return the error code.
 573    Note: We assume front end functions already check ranges.
 574    (START + RANGE >= 0 && START + RANGE <= LENGTH)  */
 575
 576 static reg_errcode_t
 577 __attribute_warn_unused_result__
 578 re_search_internal (const regex_t *preg, const char *string, int length,
 579                     int start, int range, int stop, size_t nmatch,
 580                     regmatch_t pmatch[], int eflags)
 581 {
 582   reg_errcode_t err;
 583   const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer;
 584   int left_lim, right_lim, incr;
 585   int fl_longest_match, match_first, match_kind, match_last = -1;
 586   int extra_nmatch;
 587   int sb, ch;
 588 #if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
 589   re_match_context_t mctx = { .dfa = dfa };
 590 #else
 591   re_match_context_t mctx;
 592 #endif
 593   char *fastmap = (preg->fastmap != NULL && preg->fastmap_accurate
 594                    && range && !preg->can_be_null) ? preg->fastmap : NULL;
 595   RE_TRANSLATE_TYPE t = preg->translate;
 596
 597 #if !(defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L))
 598   memset (&mctx, '\0', sizeof (re_match_context_t));
 599   mctx.dfa = dfa;
 600 #endif
 601
 602   extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0;
 603   nmatch -= extra_nmatch;
 604
 605   /* Check if the DFA haven't been compiled.  */
 606   if (BE (preg->used == 0 || dfa->init_state == NULL
 607           || dfa->init_state_word == NULL || dfa->init_state_nl == NULL
 608           || dfa->init_state_begbuf == NULL, 0))
 609     return REG_NOMATCH;
 610
 611 #ifdef DEBUG
 612   /* We assume front-end functions already check them.  */
 613   assert (start + range >= 0 && start + range <= length);
 614 #endif
 615
 616   /* If initial states with non-begbuf contexts have no elements,
 617      the regex must be anchored.  If preg->newline_anchor is set,
 618      we'll never use init_state_nl, so do not check it.  */
 619   if (dfa->init_state->nodes.nelem == 0
 620       && dfa->init_state_word->nodes.nelem == 0
 621       && (dfa->init_state_nl->nodes.nelem == 0
 622           || !preg->newline_anchor))
 623     {
 624       if (start != 0 && start + range != 0)
 625         return REG_NOMATCH;
 626       start = range = 0;
 627     }
 628
 629   /* We must check the longest matching, if nmatch > 0.  */
 630   fl_longest_match = (nmatch != 0 || dfa->nbackref);
 631
 632   err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1,
 633                             preg->translate, preg->syntax & RE_ICASE, dfa);
 634   if (BE (err != REG_NOERROR, 0))
 635     goto free_return;
 636   mctx.input.stop = stop;
 637   mctx.input.raw_stop = stop;
 638   mctx.input.newline_anchor = preg->newline_anchor;
 639
 640   err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2);
 641   if (BE (err != REG_NOERROR, 0))
 642     goto free_return;
 643
 644   /* We will log all the DFA states through which the dfa pass,
 645      if nmatch > 1, or this dfa has "multibyte node", which is a
 646      back-reference or a node which can accept multibyte character or
 647      multi character collating element.  */
 648   if (nmatch > 1 || dfa->has_mb_node)
 649     {
 650       /* Avoid overflow.  */
 651       if (BE (SIZE_MAX / sizeof (re_dfastate_t *) <= mctx.input.bufs_len, 0))
 652         {
 653           err = REG_ESPACE;
 654           goto free_return;
 655         }
 656
 657       mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1);
 658       if (BE (mctx.state_log == NULL, 0))
 659         {
 660           err = REG_ESPACE;
 661           goto free_return;
 662         }
 663     }
 664   else
 665     mctx.state_log = NULL;
 666
 667   match_first = start;
 668   mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
 669                            : CONTEXT_NEWLINE | CONTEXT_BEGBUF;
 670
 671   /* Check incrementally whether of not the input string match.  */
 672   incr = (range < 0) ? -1 : 1;
 673   left_lim = (range < 0) ? start + range : start;
 674   right_lim = (range < 0) ? start : start + range;
 675   sb = dfa->mb_cur_max == 1;
 676   match_kind =
 677     (fastmap
 678      ? ((sb || !(preg->syntax & RE_ICASE || t) ? 4 : 0)
 679         | (range >= 0 ? 2 : 0)
 680         | (t != NULL ? 1 : 0))
 681      : 8);
 682
 683   for (;; match_first += incr)
 684     {
 685       err = REG_NOMATCH;
 686       if (match_first < left_lim || right_lim < match_first)
 687         goto free_return;
 688
 689       /* Advance as rapidly as possible through the string, until we
 690          find a plausible place to start matching.  This may be done
 691          with varying efficiency, so there are various possibilities:
 692          only the most common of them are specialized, in order to
 693          save on code size.  We use a switch statement for speed.  */
 694       switch (match_kind)
 695         {
 696         case 8:
 697           /* No fastmap.  */
 698           break;
 699
 700         case 7:
 701           /* Fastmap with single-byte translation, match forward.  */
 702           while (BE (match_first < right_lim, 1)
 703                  && !fastmap[t[(unsigned char) string[match_first]]])
 704             ++match_first;
 705           goto forward_match_found_start_or_reached_end;
 706
 707         case 6:
 708           /* Fastmap without translation, match forward.  */
 709           while (BE (match_first < right_lim, 1)
 710                  && !fastmap[(unsigned char) string[match_first]])
 711             ++match_first;
 712
 713         forward_match_found_start_or_reached_end:
 714           if (BE (match_first == right_lim, 0))
 715             {
 716               ch = match_first >= length
 717                        ? 0 : (unsigned char) string[match_first];
 718               if (!fastmap[t ? t[ch] : ch])
 719                 goto free_return;
 720             }
 721           break;
 722
 723         case 4:
 724         case 5:
 725           /* Fastmap without multi-byte translation, match backwards.  */
 726           while (match_first >= left_lim)
 727             {
 728               ch = match_first >= length
 729                        ? 0 : (unsigned char) string[match_first];
 730               if (fastmap[t ? t[ch] : ch])
 731                 break;
 732               --match_first;
 733             }
 734           if (match_first < left_lim)
 735             goto free_return;
 736           break;
 737
 738         default:
 739           /* In this case, we can't determine easily the current byte,
 740              since it might be a component byte of a multibyte
 741              character.  Then we use the constructed buffer instead.  */
 742           for (;;)
 743             {
 744               /* If MATCH_FIRST is out of the valid range, reconstruct the
 745                  buffers.  */
 746               unsigned int offset = match_first - mctx.input.raw_mbs_idx;
 747               if (BE (offset >= (unsigned int) mctx.input.valid_raw_len, 0))
 748                 {
 749                   err = re_string_reconstruct (&mctx.input, match_first,
 750                                                eflags);
 751                   if (BE (err != REG_NOERROR, 0))
 752                     goto free_return;
 753
 754                   offset = match_first - mctx.input.raw_mbs_idx;
 755                 }
 756               /* If MATCH_FIRST is out of the buffer, leave it as '\0'.
 757                  Note that MATCH_FIRST must not be smaller than 0.  */
 758               ch = (match_first >= length
 759                     ? 0 : re_string_byte_at (&mctx.input, offset));
 760               if (fastmap[ch])
 761                 break;
 762               match_first += incr;
 763               if (match_first < left_lim || match_first > right_lim)
 764                 {
 765                   err = REG_NOMATCH;
 766                   goto free_return;
 767                 }
 768             }
 769           break;
 770         }
 771
 772       /* Reconstruct the buffers so that the matcher can assume that
 773          the matching starts from the beginning of the buffer.  */
 774       err = re_string_reconstruct (&mctx.input, match_first, eflags);
 775       if (BE (err != REG_NOERROR, 0))
 776         goto free_return;
 777
 778 #ifdef RE_ENABLE_I18N
 779      /* Don't consider this char as a possible match start if it part,
 780         yet isn't the head, of a multibyte character.  */
 781       if (!sb && !re_string_first_byte (&mctx.input, 0))
 782         continue;
 783 #endif
 784
 785       /* It seems to be appropriate one, then use the matcher.  */
 786       /* We assume that the matching starts from 0.  */
 787       mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
 788       match_last = check_matching (&mctx, fl_longest_match,
 789                                    range >= 0 ? &match_first : NULL);
 790       if (match_last != -1)
 791         {
 792           if (BE (match_last == -2, 0))
 793             {
 794               err = REG_ESPACE;
 795               goto free_return;
 796             }
 797           else
 798             {
 799               mctx.match_last = match_last;
 800               if ((!preg->no_sub && nmatch > 1) || dfa->nbackref)
 801                 {
 802                   re_dfastate_t *pstate = mctx.state_log[match_last];
 803                   mctx.last_node = check_halt_state_context (&mctx, pstate,
 804                                                              match_last);
 805                 }
 806               if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match)
 807                   || dfa->nbackref)
 808                 {
 809                   err = prune_impossible_nodes (&mctx);
 810                   if (err == REG_NOERROR)
 811                     break;
 812                   if (BE (err != REG_NOMATCH, 0))
 813                     goto free_return;
 814                   match_last = -1;
 815                 }
 816               else
 817                 break; /* We found a match.  */
 818             }
 819         }
 820
 821       match_ctx_clean (&mctx);
 822     }
 823
 824 #ifdef DEBUG
 825   assert (match_last != -1);
 826   assert (err == REG_NOERROR);
 827 #endif
 828
 829   /* Set pmatch[] if we need.  */
 830   if (nmatch > 0)
 831     {
 832       int reg_idx;
 833
 834       /* Initialize registers.  */
 835       for (reg_idx = 1; reg_idx < nmatch; ++reg_idx)
 836         pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;
 837
 838       /* Set the points where matching start/end.  */
 839       pmatch[0].rm_so = 0;
 840       pmatch[0].rm_eo = mctx.match_last;
 841
 842       if (!preg->no_sub && nmatch > 1)
 843         {
 844           err = set_regs (preg, &mctx, nmatch, pmatch,
 845                           dfa->has_plural_match && dfa->nbackref > 0);
 846           if (BE (err != REG_NOERROR, 0))
 847             goto free_return;
 848         }
 849
 850       /* At last, add the offset to the each registers, since we slided
 851          the buffers so that we could assume that the matching starts
 852          from 0.  */
 853       for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
 854         if (pmatch[reg_idx].rm_so != -1)
 855           {
 856 #ifdef RE_ENABLE_I18N
 857             if (BE (mctx.input.offsets_needed != 0, 0))
 858               {
 859                 pmatch[reg_idx].rm_so =
 860                   (pmatch[reg_idx].rm_so == mctx.input.valid_len
 861                    ? mctx.input.valid_raw_len
 862                    : mctx.input.offsets[pmatch[reg_idx].rm_so]);
 863                 pmatch[reg_idx].rm_eo =
 864                   (pmatch[reg_idx].rm_eo == mctx.input.valid_len
 865                    ? mctx.input.valid_raw_len
 866                    : mctx.input.offsets[pmatch[reg_idx].rm_eo]);
 867               }
 868 #else
 869             assert (mctx.input.offsets_needed == 0);
 870 #endif
 871             pmatch[reg_idx].rm_so += match_first;
 872             pmatch[reg_idx].rm_eo += match_first;
 873           }
 874       for (reg_idx = 0; reg_idx < extra_nmatch; ++reg_idx)
 875         {
 876           pmatch[nmatch + reg_idx].rm_so = -1;
 877           pmatch[nmatch + reg_idx].rm_eo = -1;
 878         }
 879
 880       if (dfa->subexp_map)
 881         for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)
 882           if (dfa->subexp_map[reg_idx] != reg_idx)
 883             {
 884               pmatch[reg_idx + 1].rm_so
 885                 = pmatch[dfa->subexp_map[reg_idx] + 1].rm_so;
 886               pmatch[reg_idx + 1].rm_eo
 887                 = pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo;
 888             }
 889     }
 890
 891  free_return:
 892   re_free (mctx.state_log);
 893   if (dfa->nbackref)
 894     match_ctx_free (&mctx);
 895   re_string_destruct (&mctx.input);
 896   return err;
 897 }
 898
 899 static reg_errcode_t
 900 __attribute_warn_unused_result__
 901 prune_impossible_nodes (re_match_context_t *mctx)
 902 {
 903   const re_dfa_t *const dfa = mctx->dfa;
 904   int halt_node, match_last;
 905   reg_errcode_t ret;
 906   re_dfastate_t **sifted_states;
 907   re_dfastate_t **lim_states = NULL;
 908   re_sift_context_t sctx;
 909 #ifdef DEBUG
 910   assert (mctx->state_log != NULL);
 911 #endif
 912   match_last = mctx->match_last;
 913   halt_node = mctx->last_node;
 914
 915   /* Avoid overflow.  */
 916   if (BE (SIZE_MAX / sizeof (re_dfastate_t *) <= match_last, 0))
 917     return REG_ESPACE;
 918
 919   sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
 920   if (BE (sifted_states == NULL, 0))
 921     {
 922       ret = REG_ESPACE;
 923       goto free_return;
 924     }
 925   if (dfa->nbackref)
 926     {
 927       lim_states = re_malloc (re_dfastate_t *, match_last + 1);
 928       if (BE (lim_states == NULL, 0))
 929         {
 930           ret = REG_ESPACE;
 931           goto free_return;
 932         }
 933       while (1)
 934         {
 935           memset (lim_states, '\0',
 936                   sizeof (re_dfastate_t *) * (match_last + 1));
 937           sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
 938                          match_last);
 939           ret = sift_states_backward (mctx, &sctx);
 940           re_node_set_free (&sctx.limits);
 941           if (BE (ret != REG_NOERROR, 0))
 942               goto free_return;
 943           if (sifted_states[0] != NULL || lim_states[0] != NULL)
 944             break;
 945           do
 946             {
 947               --match_last;
 948               if (match_last < 0)
 949                 {
 950                   ret = REG_NOMATCH;
 951                   goto free_return;
 952                 }
 953             } while (mctx->state_log[match_last] == NULL
 954                      || !mctx->state_log[match_last]->halt);
 955           halt_node = check_halt_state_context (mctx,
 956                                                 mctx->state_log[match_last],
 957                                                 match_last);
 958         }
 959       ret = merge_state_array (dfa, sifted_states, lim_states,
 960                                match_last + 1);
 961       re_free (lim_states);
 962       lim_states = NULL;
 963       if (BE (ret != REG_NOERROR, 0))
 964         goto free_return;
 965     }
 966   else
 967     {
 968       sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last);
 969       ret = sift_states_backward (mctx, &sctx);
 970       re_node_set_free (&sctx.limits);
 971       if (BE (ret != REG_NOERROR, 0))
 972         goto free_return;
 973       if (sifted_states[0] == NULL)
 974         {
 975           ret = REG_NOMATCH;
 976           goto free_return;
 977         }
 978     }
 979   re_free (mctx->state_log);
 980   mctx->state_log = sifted_states;
 981   sifted_states = NULL;
 982   mctx->last_node = halt_node;
 983   mctx->match_last = match_last;
 984   ret = REG_NOERROR;
 985  free_return:
 986   re_free (sifted_states);
 987   re_free (lim_states);
 988   return ret;
 989 }
 990
 991 /* Acquire an initial state and return it.
 992    We must select appropriate initial state depending on the context,
 993    since initial states may have constraints like "\<", "^", etc..  */
 994
 995 static inline re_dfastate_t *
 996 __attribute ((always_inline))
 997 acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx,
 998                             int idx)
 999 {
1000   const re_dfa_t *const dfa = mctx->dfa;
1001   if (dfa->init_state->has_constraint)
1002     {
1003       unsigned int context;
1004       context = re_string_context_at (&mctx->input, idx - 1, mctx->eflags);
1005       if (IS_WORD_CONTEXT (context))
1006         return dfa->init_state_word;
1007       else if (IS_ORDINARY_CONTEXT (context))
1008         return dfa->init_state;
1009       else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
1010         return dfa->init_state_begbuf;
1011       else if (IS_NEWLINE_CONTEXT (context))
1012         return dfa->init_state_nl;
1013       else if (IS_BEGBUF_CONTEXT (context))
1014         {
1015           /* It is relatively rare case, then calculate on demand.  */
1016           return re_acquire_state_context (err, dfa,
1017                                            dfa->init_state->entrance_nodes,
1018                                            context);
1019         }
1020       else
1021         /* Must not happen?  */
1022         return dfa->init_state;
1023     }
1024   else
1025     return dfa->init_state;
1026 }
1027
1028 /* Check whether the regular expression match input string INPUT or not,
1029    and return the index where the matching end, return -1 if not match,
1030    or return -2 in case of an error.
1031    FL_LONGEST_MATCH means we want the POSIX longest matching.
1032    If P_MATCH_FIRST is not NULL, and the match fails, it is set to the
1033    next place where we may want to try matching.
1034    Note that the matcher assume that the maching starts from the current
1035    index of the buffer.  */
1036
1037 static int
1038 __attribute_warn_unused_result__
1039 check_matching (re_match_context_t *mctx, int fl_longest_match,
1040                 int *p_match_first)
1041 {
1042   const re_dfa_t *const dfa = mctx->dfa;
1043   reg_errcode_t err;
1044   int match = 0;
1045   int match_last = -1;
1046   int cur_str_idx = re_string_cur_idx (&mctx->input);
1047   re_dfastate_t *cur_state;
1048   int at_init_state = p_match_first != NULL;
1049   int next_start_idx = cur_str_idx;
1050
1051   err = REG_NOERROR;
1052   cur_state = acquire_init_state_context (&err, mctx, cur_str_idx);
1053   /* An initial state must not be NULL (invalid).  */
1054   if (BE (cur_state == NULL, 0))
1055     {
1056       assert (err == REG_ESPACE);
1057       return -2;
1058     }
1059
1060   if (mctx->state_log != NULL)
1061     {
1062       mctx->state_log[cur_str_idx] = cur_state;
1063
1064       /* Check OP_OPEN_SUBEXP in the initial state in case that we use them
1065          later.  E.g. Processing back references.  */
1066       if (BE (dfa->nbackref, 0))
1067         {
1068           at_init_state = 0;
1069           err = check_subexp_matching_top (mctx, &cur_state->nodes, 0);
1070           if (BE (err != REG_NOERROR, 0))
1071             return err;
1072
1073           if (cur_state->has_backref)
1074             {
1075               err = transit_state_bkref (mctx, &cur_state->nodes);
1076               if (BE (err != REG_NOERROR, 0))
1077                 return err;
1078             }
1079         }
1080     }
1081
1082   /* If the RE accepts NULL string.  */
1083   if (BE (cur_state->halt, 0))
1084     {
1085       if (!cur_state->has_constraint
1086           || check_halt_state_context (mctx, cur_state, cur_str_idx))
1087         {
1088           if (!fl_longest_match)
1089             return cur_str_idx;
1090           else
1091             {
1092               match_last = cur_str_idx;
1093               match = 1;
1094             }
1095         }
1096     }
1097
1098   while (!re_string_eoi (&mctx->input))
1099     {
1100       re_dfastate_t *old_state = cur_state;
1101       int next_char_idx = re_string_cur_idx (&mctx->input) + 1;
1102
1103       if ((BE (next_char_idx >= mctx->input.bufs_len, 0)
1104            && mctx->input.bufs_len < mctx->input.len)
1105           || (BE (next_char_idx >= mctx->input.valid_len, 0)
1106               && mctx->input.valid_len < mctx->input.len))
1107         {
1108           err = extend_buffers (mctx, next_char_idx + 1);
1109           if (BE (err != REG_NOERROR, 0))
1110             {
1111               assert (err == REG_ESPACE);
1112               return -2;
1113             }
1114         }
1115
1116       cur_state = transit_state (&err, mctx, cur_state);
1117       if (mctx->state_log != NULL)
1118         cur_state = merge_state_with_log (&err, mctx, cur_state);
1119
1120       if (cur_state == NULL)
1121         {
1122           /* Reached the invalid state or an error.  Try to recover a valid
1123              state using the state log, if available and if we have not
1124              already found a valid (even if not the longest) match.  */
1125           if (BE (err != REG_NOERROR, 0))
1126             return -2;
1127
1128           if (mctx->state_log == NULL
1129               || (match && !fl_longest_match)
1130               || (cur_state = find_recover_state (&err, mctx)) == NULL)
1131             break;
1132         }
1133
1134       if (BE (at_init_state, 0))
1135         {
1136           if (old_state == cur_state)
1137             next_start_idx = next_char_idx;
1138           else
1139             at_init_state = 0;
1140         }
1141
1142       if (cur_state->halt)
1143         {
1144           /* Reached a halt state.
1145              Check the halt state can satisfy the current context.  */
1146           if (!cur_state->has_constraint
1147               || check_halt_state_context (mctx, cur_state,
1148                                            re_string_cur_idx (&mctx->input)))
1149             {
1150               /* We found an appropriate halt state.  */
1151               match_last = re_string_cur_idx (&mctx->input);
1152               match = 1;
1153
1154               /* We found a match, do not modify match_first below.  */
1155               p_match_first = NULL;
1156               if (!fl_longest_match)
1157                 break;
1158             }
1159         }
1160     }
1161
1162   if (p_match_first)
1163     *p_match_first += next_start_idx;
1164
1165   return match_last;
1166 }
1167
1168 /* Check NODE match the current context.  */
1169
1170 static int
1171 check_halt_node_context (const re_dfa_t *dfa, int node, unsigned int context)
1172 {
1173   re_token_type_t type = dfa->nodes[node].type;
1174   unsigned int constraint = dfa->nodes[node].constraint;
1175   if (type != END_OF_RE)
1176     return 0;
1177   if (!constraint)
1178     return 1;
1179   if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
1180     return 0;
1181   return 1;
1182 }
1183
1184 /* Check the halt state STATE match the current context.
1185    Return 0 if not match, if the node, STATE has, is a halt node and
1186    match the context, return the node.  */
1187
1188 static int
1189 check_halt_state_context (const re_match_context_t *mctx,
1190                           const re_dfastate_t *state, int idx)
1191 {
1192   int i;
1193   unsigned int context;
1194 #ifdef DEBUG
1195   assert (state->halt);
1196 #endif
1197   context = re_string_context_at (&mctx->input, idx, mctx->eflags);
1198   for (i = 0; i < state->nodes.nelem; ++i)
1199     if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context))
1200       return state->nodes.elems[i];
1201   return 0;
1202 }
1203
1204 /* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
1205    corresponding to the DFA).
1206    Return the destination node, and update EPS_VIA_NODES, return -1 in case
1207    of errors.  */
1208
1209 static int
1210 proceed_next_node (const re_match_context_t *mctx, int nregs, regmatch_t *regs,
1211                    int *pidx, int node, re_node_set *eps_via_nodes,
1212                    struct re_fail_stack_t *fs)
1213 {
1214   const re_dfa_t *const dfa = mctx->dfa;
1215   int i, err;
1216   if (IS_EPSILON_NODE (dfa->nodes[node].type))
1217     {
1218       re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;
1219       re_node_set *edests = &dfa->edests[node];
1220       int dest_node;
1221       err = re_node_set_insert (eps_via_nodes, node);
1222       if (BE (err < 0, 0))
1223         return -2;
1224       /* Pick up a valid destination, or return -1 if none is found.  */
1225       for (dest_node = -1, i = 0; i < edests->nelem; ++i)
1226         {
1227           int candidate = edests->elems[i];
1228           if (!re_node_set_contains (cur_nodes, candidate))
1229             continue;
1230           if (dest_node == -1)
1231             dest_node = candidate;
1232
1233           else
1234             {
1235               /* In order to avoid infinite loop like "(a*)*", return the second
1236                  epsilon-transition if the first was already considered.  */
1237               if (re_node_set_contains (eps_via_nodes, dest_node))
1238                 return candidate;
1239
1240               /* Otherwise, push the second epsilon-transition on the fail stack.  */
1241               else if (fs != NULL
1242                        && push_fail_stack (fs, *pidx, candidate, nregs, regs,
1243                                            eps_via_nodes))
1244                 return -2;
1245
1246               /* We know we are going to exit.  */
1247               break;
1248             }
1249         }
1250       return dest_node;
1251     }
1252   else
1253     {
1254       int naccepted = 0;
1255       re_token_type_t type = dfa->nodes[node].type;
1256
1257 #ifdef RE_ENABLE_I18N
1258       if (dfa->nodes[node].accept_mb)
1259         naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx);
1260       else
1261 #endif /* RE_ENABLE_I18N */
1262       if (type == OP_BACK_REF)
1263         {
1264           int subexp_idx = dfa->nodes[node].opr.idx + 1;
1265           naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
1266           if (fs != NULL)
1267             {
1268               if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1)
1269                 return -1;
1270               else if (naccepted)
1271                 {
1272                   char *buf = (char *) re_string_get_buffer (&mctx->input);
1273                   if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
1274                               naccepted) != 0)
1275                     return -1;
1276                 }
1277             }
1278
1279           if (naccepted == 0)
1280             {
1281               int dest_node;
1282               err = re_node_set_insert (eps_via_nodes, node);
1283               if (BE (err < 0, 0))
1284                 return -2;
1285               dest_node = dfa->edests[node].elems[0];
1286               if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1287                                         dest_node))
1288                 return dest_node;
1289             }
1290         }
1291
1292       if (naccepted != 0
1293           || check_node_accept (mctx, dfa->nodes + node, *pidx))
1294         {
1295           int dest_node = dfa->nexts[node];
1296           *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted;
1297           if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL
1298                      || !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1299                                                dest_node)))
1300             return -1;
1301           re_node_set_empty (eps_via_nodes);
1302           return dest_node;
1303         }
1304     }
1305   return -1;
1306 }
1307
1308 static reg_errcode_t
1309 __attribute_warn_unused_result__
1310 push_fail_stack (struct re_fail_stack_t *fs, int str_idx, int dest_node,
1311                  int nregs, regmatch_t *regs, re_node_set *eps_via_nodes)
1312 {
1313   reg_errcode_t err;
1314   int num = fs->num++;
1315   if (fs->num == fs->alloc)
1316     {
1317       struct re_fail_stack_ent_t *new_array;
1318       new_array = realloc (fs->stack, (sizeof (struct re_fail_stack_ent_t)
1319                                        * fs->alloc * 2));
1320       if (new_array == NULL)
1321         return REG_ESPACE;
1322       fs->alloc *= 2;
1323       fs->stack = new_array;
1324     }
1325   fs->stack[num].idx = str_idx;
1326   fs->stack[num].node = dest_node;
1327   fs->stack[num].regs = re_malloc (regmatch_t, nregs);
1328   if (fs->stack[num].regs == NULL)
1329     return REG_ESPACE;
1330   memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
1331   err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
1332   return err;
1333 }
1334
1335 static int
1336 pop_fail_stack (struct re_fail_stack_t *fs, int *pidx, int nregs,
1337                 regmatch_t *regs, re_node_set *eps_via_nodes)
1338 {
1339   int num = --fs->num;
1340   assert (num >= 0);
1341   *pidx = fs->stack[num].idx;
1342   memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
1343   re_node_set_free (eps_via_nodes);
1344   re_free (fs->stack[num].regs);
1345   *eps_via_nodes = fs->stack[num].eps_via_nodes;
1346   return fs->stack[num].node;
1347 }
1348
1349 /* Set the positions where the subexpressions are starts/ends to registers
1350    PMATCH.
1351    Note: We assume that pmatch[0] is already set, and
1352    pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch.  */
1353
1354 static reg_errcode_t
1355 __attribute_warn_unused_result__
1356 set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,
1357           regmatch_t *pmatch, int fl_backtrack)
1358 {
1359   const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer;
1360   int idx, cur_node;
1361   re_node_set eps_via_nodes;
1362   struct re_fail_stack_t *fs;
1363   struct re_fail_stack_t fs_body = { 0, 2, NULL };
1364   regmatch_t *prev_idx_match;
1365   int prev_idx_match_malloced = 0;
1366
1367 #ifdef DEBUG
1368   assert (nmatch > 1);
1369   assert (mctx->state_log != NULL);
1370 #endif
1371   if (fl_backtrack)
1372     {
1373       fs = &fs_body;
1374       fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc);
1375       if (fs->stack == NULL)
1376         return REG_ESPACE;
1377     }
1378   else
1379     fs = NULL;
1380
1381   cur_node = dfa->init_node;
1382   re_node_set_init_empty (&eps_via_nodes);
1383
1384   if (__libc_use_alloca (nmatch * sizeof (regmatch_t)))
1385     prev_idx_match = (regmatch_t *) alloca (nmatch * sizeof (regmatch_t));
1386   else
1387     {
1388       prev_idx_match = re_malloc (regmatch_t, nmatch);
1389       if (prev_idx_match == NULL)
1390         {
1391           free_fail_stack_return (fs);
1392           return REG_ESPACE;
1393         }
1394       prev_idx_match_malloced = 1;
1395     }
1396   memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1397
1398   for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
1399     {
1400       update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch);
1401
1402       if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
1403         {
1404           int reg_idx;
1405           if (fs)
1406             {
1407               for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
1408                 if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)
1409                   break;
1410               if (reg_idx == nmatch)
1411                 {
1412                   re_node_set_free (&eps_via_nodes);
1413                   if (prev_idx_match_malloced)
1414                     re_free (prev_idx_match);
1415                   return free_fail_stack_return (fs);
1416                 }
1417               cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1418                                          &eps_via_nodes);
1419             }
1420           else
1421             {
1422               re_node_set_free (&eps_via_nodes);
1423               if (prev_idx_match_malloced)
1424                 re_free (prev_idx_match);
1425               return REG_NOERROR;
1426             }
1427         }
1428
1429       /* Proceed to next node.  */
1430       cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node,
1431                                     &eps_via_nodes, fs);
1432
1433       if (BE (cur_node < 0, 0))
1434         {
1435           if (BE (cur_node == -2, 0))
1436             {
1437               re_node_set_free (&eps_via_nodes);
1438               if (prev_idx_match_malloced)
1439                 re_free (prev_idx_match);
1440               free_fail_stack_return (fs);
1441               return REG_ESPACE;
1442             }
1443           if (fs)
1444             cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1445                                        &eps_via_nodes);
1446           else
1447             {
1448               re_node_set_free (&eps_via_nodes);
1449               if (prev_idx_match_malloced)
1450                 re_free (prev_idx_match);
1451               return REG_NOMATCH;
1452             }
1453         }
1454     }
1455   re_node_set_free (&eps_via_nodes);
1456   if (prev_idx_match_malloced)
1457     re_free (prev_idx_match);
1458   return free_fail_stack_return (fs);
1459 }
1460
1461 static reg_errcode_t
1462 free_fail_stack_return (struct re_fail_stack_t *fs)
1463 {
1464   if (fs)
1465     {
1466       int fs_idx;
1467       for (fs_idx = 0; fs_idx < fs->num; ++fs_idx)
1468         {
1469           re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
1470           re_free (fs->stack[fs_idx].regs);
1471         }
1472       re_free (fs->stack);
1473     }
1474   return REG_NOERROR;
1475 }
1476
1477 static void
1478 update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
1479              regmatch_t *prev_idx_match, int cur_node, int cur_idx, int nmatch)
1480 {
1481   int type = dfa->nodes[cur_node].type;
1482   if (type == OP_OPEN_SUBEXP)
1483     {
1484       int reg_num = dfa->nodes[cur_node].opr.idx + 1;
1485
1486       /* We are at the first node of this sub expression.  */
1487       if (reg_num < nmatch)
1488         {
1489           pmatch[reg_num].rm_so = cur_idx;
1490           pmatch[reg_num].rm_eo = -1;
1491         }
1492     }
1493   else if (type == OP_CLOSE_SUBEXP)
1494     {
1495       int reg_num = dfa->nodes[cur_node].opr.idx + 1;
1496       if (reg_num < nmatch)
1497         {
1498           /* We are at the last node of this sub expression.  */
1499           if (pmatch[reg_num].rm_so < cur_idx)
1500             {
1501               pmatch[reg_num].rm_eo = cur_idx;
1502               /* This is a non-empty match or we are not inside an optional
1503                  subexpression.  Accept this right away.  */
1504               memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1505             }
1506           else
1507             {
1508               if (dfa->nodes[cur_node].opt_subexp
1509                   && prev_idx_match[reg_num].rm_so != -1)
1510                 /* We transited through an empty match for an optional
1511                    subexpression, like (a?)*, and this is not the subexp's
1512                    first match.  Copy back the old content of the registers
1513                    so that matches of an inner subexpression are undone as
1514                    well, like in ((a?))*.  */
1515                 memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch);
1516               else
1517                 /* We completed a subexpression, but it may be part of
1518                    an optional one, so do not update PREV_IDX_MATCH.  */
1519                 pmatch[reg_num].rm_eo = cur_idx;
1520             }
1521         }
1522     }
1523 }
1524
1525 /* This function checks the STATE_LOG from the SCTX->last_str_idx to 0
1526    and sift the nodes in each states according to the following rules.
1527    Updated state_log will be wrote to STATE_LOG.
1528
1529    Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if...
1530      1. When STR_IDX == MATCH_LAST(the last index in the state_log):
1531         If `a' isn't the LAST_NODE and `a' can't epsilon transit to
1532         the LAST_NODE, we throw away the node `a'.
1533      2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts
1534         string `s' and transit to `b':
1535         i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
1536            away the node `a'.
1537         ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
1538             thrown away, we throw away the node `a'.
1539      3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b':
1540         i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
1541            node `a'.
1542         ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away,
1543             we throw away the node `a'.  */
1544
1545 #define STATE_NODE_CONTAINS(state,node) \
1546   ((state) != NULL && re_node_set_contains (&(state)->nodes, node))
1547
1548 static reg_errcode_t
1549 sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx)
1550 {
1551   reg_errcode_t err;
1552   int null_cnt = 0;
1553   int str_idx = sctx->last_str_idx;
1554   re_node_set cur_dest;
1555
1556 #ifdef DEBUG
1557   assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
1558 #endif
1559
1560   /* Build sifted state_log[str_idx].  It has the nodes which can epsilon
1561      transit to the last_node and the last_node itself.  */
1562   err = re_node_set_init_1 (&cur_dest, sctx->last_node);
1563   if (BE (err != REG_NOERROR, 0))
1564     return err;
1565   err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1566   if (BE (err != REG_NOERROR, 0))
1567     goto free_return;
1568
1569   /* Then check each states in the state_log.  */
1570   while (str_idx > 0)
1571     {
1572       /* Update counters.  */
1573       null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0;
1574       if (null_cnt > mctx->max_mb_elem_len)
1575         {
1576           memset (sctx->sifted_states, '\0',
1577                   sizeof (re_dfastate_t *) * str_idx);
1578           re_node_set_free (&cur_dest);
1579           return REG_NOERROR;
1580         }
1581       re_node_set_empty (&cur_dest);
1582       --str_idx;
1583
1584       if (mctx->state_log[str_idx])
1585         {
1586           err = build_sifted_states (mctx, sctx, str_idx, &cur_dest);
1587           if (BE (err != REG_NOERROR, 0))
1588             goto free_return;
1589         }
1590
1591       /* Add all the nodes which satisfy the following conditions:
1592          - It can epsilon transit to a node in CUR_DEST.
1593          - It is in CUR_SRC.
1594          And update state_log.  */
1595       err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1596       if (BE (err != REG_NOERROR, 0))
1597         goto free_return;
1598     }
1599   err = REG_NOERROR;
1600  free_return:
1601   re_node_set_free (&cur_dest);
1602   return err;
1603 }
1604
1605 static reg_errcode_t
1606 __attribute_warn_unused_result__
1607 build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx,
1608                      int str_idx, re_node_set *cur_dest)
1609 {
1610   const re_dfa_t *const dfa = mctx->dfa;
1611   const re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes;
1612   int i;
1613
1614   /* Then build the next sifted state.
1615      We build the next sifted state on `cur_dest', and update
1616      `sifted_states[str_idx]' with `cur_dest'.
1617      Note:
1618      `cur_dest' is the sifted state from `state_log[str_idx + 1]'.
1619      `cur_src' points the node_set of the old `state_log[str_idx]'
1620      (with the epsilon nodes pre-filtered out).  */
1621   for (i = 0; i < cur_src->nelem; i++)
1622     {
1623       int prev_node = cur_src->elems[i];
1624       int naccepted = 0;
1625       int ret;
1626
1627 #ifdef DEBUG
1628       re_token_type_t type = dfa->nodes[prev_node].type;
1629       assert (!IS_EPSILON_NODE (type));
1630 #endif
1631 #ifdef RE_ENABLE_I18N
1632       /* If the node may accept `multi byte'.  */
1633       if (dfa->nodes[prev_node].accept_mb)
1634         naccepted = sift_states_iter_mb (mctx, sctx, prev_node,
1635                                          str_idx, sctx->last_str_idx);
1636 #endif /* RE_ENABLE_I18N */
1637
1638       /* We don't check backreferences here.
1639          See update_cur_sifted_state().  */
1640       if (!naccepted
1641           && check_node_accept (mctx, dfa->nodes + prev_node, str_idx)
1642           && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1],
1643                                   dfa->nexts[prev_node]))
1644         naccepted = 1;
1645
1646       if (naccepted == 0)
1647         continue;
1648
1649       if (sctx->limits.nelem)
1650         {
1651           int to_idx = str_idx + naccepted;
1652           if (check_dst_limits (mctx, &sctx->limits,
1653                                 dfa->nexts[prev_node], to_idx,
1654                                 prev_node, str_idx))
1655             continue;
1656         }
1657       ret = re_node_set_insert (cur_dest, prev_node);
1658       if (BE (ret == -1, 0))
1659         return REG_ESPACE;
1660     }
1661
1662   return REG_NOERROR;
1663 }
1664
1665 /* Helper functions.  */
1666
1667 static reg_errcode_t
1668 clean_state_log_if_needed (re_match_context_t *mctx, int next_state_log_idx)
1669 {
1670   int top = mctx->state_log_top;
1671
1672   if ((next_state_log_idx >= mctx->input.bufs_len
1673        && mctx->input.bufs_len < mctx->input.len)
1674       || (next_state_log_idx >= mctx->input.valid_len
1675           && mctx->input.valid_len < mctx->input.len))
1676     {
1677       reg_errcode_t err;
1678       err = extend_buffers (mctx, next_state_log_idx + 1);
1679       if (BE (err != REG_NOERROR, 0))
1680         return err;
1681     }
1682
1683   if (top < next_state_log_idx)
1684     {
1685       memset (mctx->state_log + top + 1, '\0',
1686               sizeof (re_dfastate_t *) * (next_state_log_idx - top));
1687       mctx->state_log_top = next_state_log_idx;
1688     }
1689   return REG_NOERROR;
1690 }
1691
1692 static reg_errcode_t
1693 merge_state_array (const re_dfa_t *dfa, re_dfastate_t **dst,
1694                    re_dfastate_t **src, int num)
1695 {
1696   int st_idx;
1697   reg_errcode_t err;
1698   for (st_idx = 0; st_idx < num; ++st_idx)
1699     {
1700       if (dst[st_idx] == NULL)
1701         dst[st_idx] = src[st_idx];
1702       else if (src[st_idx] != NULL)
1703         {
1704           re_node_set merged_set;
1705           err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,
1706                                         &src[st_idx]->nodes);
1707           if (BE (err != REG_NOERROR, 0))
1708             return err;
1709           dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);
1710           re_node_set_free (&merged_set);
1711           if (BE (err != REG_NOERROR, 0))
1712             return err;
1713         }
1714     }
1715   return REG_NOERROR;
1716 }
1717
1718 static reg_errcode_t
1719 update_cur_sifted_state (const re_match_context_t *mctx,
1720                          re_sift_context_t *sctx, int str_idx,
1721                          re_node_set *dest_nodes)
1722 {
1723   const re_dfa_t *const dfa = mctx->dfa;
1724   reg_errcode_t err = REG_NOERROR;
1725   const re_node_set *candidates;
1726   candidates = ((mctx->state_log[str_idx] == NULL) ? NULL
1727                 : &mctx->state_log[str_idx]->nodes);
1728
1729   if (dest_nodes->nelem == 0)
1730     sctx->sifted_states[str_idx] = NULL;
1731   else
1732     {
1733       if (candidates)
1734         {
1735           /* At first, add the nodes which can epsilon transit to a node in
1736              DEST_NODE.  */
1737           err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
1738           if (BE (err != REG_NOERROR, 0))
1739             return err;
1740
1741           /* Then, check the limitations in the current sift_context.  */
1742           if (sctx->limits.nelem)
1743             {
1744               err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
1745                                          mctx->bkref_ents, str_idx);
1746               if (BE (err != REG_NOERROR, 0))
1747                 return err;
1748             }
1749         }
1750
1751       sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
1752       if (BE (err != REG_NOERROR, 0))
1753         return err;
1754     }
1755
1756   if (candidates && mctx->state_log[str_idx]->has_backref)
1757     {
1758       err = sift_states_bkref (mctx, sctx, str_idx, candidates);
1759       if (BE (err != REG_NOERROR, 0))
1760         return err;
1761     }
1762   return REG_NOERROR;
1763 }
1764
1765 static reg_errcode_t
1766 __attribute_warn_unused_result__
1767 add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes,
1768                        const re_node_set *candidates)
1769 {
1770   reg_errcode_t err = REG_NOERROR;
1771   int i;
1772
1773   re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes);
1774   if (BE (err != REG_NOERROR, 0))
1775     return err;
1776
1777   if (!state->inveclosure.alloc)
1778     {
1779       err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem);
1780       if (BE (err != REG_NOERROR, 0))
1781         return REG_ESPACE;
1782       for (i = 0; i < dest_nodes->nelem; i++)
1783         {
1784           err = re_node_set_merge (&state->inveclosure,
1785                                    dfa->inveclosures + dest_nodes->elems[i]);
1786           if (BE (err != REG_NOERROR, 0))
1787             return REG_ESPACE;
1788         }
1789     }
1790   return re_node_set_add_intersect (dest_nodes, candidates,
1791                                     &state->inveclosure);
1792 }
1793
1794 static reg_errcode_t
1795 sub_epsilon_src_nodes (const re_dfa_t *dfa, int node, re_node_set *dest_nodes,
1796                        const re_node_set *candidates)
1797 {
1798     int ecl_idx;
1799     reg_errcode_t err;
1800     re_node_set *inv_eclosure = dfa->inveclosures + node;
1801     re_node_set except_nodes;
1802     re_node_set_init_empty (&except_nodes);
1803     for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1804       {
1805         int cur_node = inv_eclosure->elems[ecl_idx];
1806         if (cur_node == node)
1807           continue;
1808         if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
1809           {
1810             int edst1 = dfa->edests[cur_node].elems[0];
1811             int edst2 = ((dfa->edests[cur_node].nelem > 1)
1812                          ? dfa->edests[cur_node].elems[1] : -1);
1813             if ((!re_node_set_contains (inv_eclosure, edst1)
1814                  && re_node_set_contains (dest_nodes, edst1))
1815                 || (edst2 > 0
1816                     && !re_node_set_contains (inv_eclosure, edst2)
1817                     && re_node_set_contains (dest_nodes, edst2)))
1818               {
1819                 err = re_node_set_add_intersect (&except_nodes, candidates,
1820                                                  dfa->inveclosures + cur_node);
1821                 if (BE (err != REG_NOERROR, 0))
1822                   {
1823                     re_node_set_free (&except_nodes);
1824                     return err;
1825                   }
1826               }
1827           }
1828       }
1829     for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1830       {
1831         int cur_node = inv_eclosure->elems[ecl_idx];
1832         if (!re_node_set_contains (&except_nodes, cur_node))
1833           {
1834             int idx = re_node_set_contains (dest_nodes, cur_node) - 1;
1835             re_node_set_remove_at (dest_nodes, idx);
1836           }
1837       }
1838     re_node_set_free (&except_nodes);
1839     return REG_NOERROR;
1840 }
1841
1842 static int
1843 check_dst_limits (const re_match_context_t *mctx, re_node_set *limits,
1844                   int dst_node, int dst_idx, int src_node, int src_idx)
1845 {
1846   const re_dfa_t *const dfa = mctx->dfa;
1847   int lim_idx, src_pos, dst_pos;
1848
1849   int dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx);
1850   int src_bkref_idx = search_cur_bkref_entry (mctx, src_idx);
1851   for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
1852     {
1853       int subexp_idx;
1854       struct re_backref_cache_entry *ent;
1855       ent = mctx->bkref_ents + limits->elems[lim_idx];
1856       subexp_idx = dfa->nodes[ent->node].opr.idx;
1857
1858       dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1859                                            subexp_idx, dst_node, dst_idx,
1860                                            dst_bkref_idx);
1861       src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1862                                            subexp_idx, src_node, src_idx,
1863                                            src_bkref_idx);
1864
1865       /* In case of:
1866          <src> <dst> ( <subexp> )
1867          ( <subexp> ) <src> <dst>
1868          ( <subexp1> <src> <subexp2> <dst> <subexp3> )  */
1869       if (src_pos == dst_pos)
1870         continue; /* This is unrelated limitation.  */
1871       else
1872         return 1;
1873     }
1874   return 0;
1875 }
1876
1877 static int
1878 check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries,
1879                              int subexp_idx, int from_node, int bkref_idx)
1880 {
1881   const re_dfa_t *const dfa = mctx->dfa;
1882   const re_node_set *eclosures = dfa->eclosures + from_node;
1883   int node_idx;
1884
1885   /* Else, we are on the boundary: examine the nodes on the epsilon
1886      closure.  */
1887   for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)
1888     {
1889       int node = eclosures->elems[node_idx];
1890       switch (dfa->nodes[node].type)
1891         {
1892         case OP_BACK_REF:
1893           if (bkref_idx != -1)
1894             {
1895               struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;
1896               do
1897                 {
1898                   int dst, cpos;
1899
1900                   if (ent->node != node)
1901                     continue;
1902
1903                   if (subexp_idx < BITSET_WORD_BITS
1904                       && !(ent->eps_reachable_subexps_map
1905                            & ((bitset_word_t) 1 << subexp_idx)))
1906                     continue;
1907
1908                   /* Recurse trying to reach the OP_OPEN_SUBEXP and
1909                      OP_CLOSE_SUBEXP cases below.  But, if the
1910                      destination node is the same node as the source
1911                      node, don't recurse because it would cause an
1912                      infinite loop: a regex that exhibits this behavior
1913                      is ()\1*\1*  */
1914                   dst = dfa->edests[node].elems[0];
1915                   if (dst == from_node)
1916                     {
1917                       if (boundaries & 1)
1918                         return -1;
1919                       else /* if (boundaries & 2) */
1920                         return 0;
1921                     }
1922
1923                   cpos =
1924                     check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
1925                                                  dst, bkref_idx);
1926                   if (cpos == -1 /* && (boundaries & 1) */)
1927                     return -1;
1928                   if (cpos == 0 && (boundaries & 2))
1929                     return 0;
1930
1931                   if (subexp_idx < BITSET_WORD_BITS)
1932                     ent->eps_reachable_subexps_map
1933                       &= ~((bitset_word_t) 1 << subexp_idx);
1934                 }
1935               while (ent++->more);
1936             }
1937           break;
1938
1939         case OP_OPEN_SUBEXP:
1940           if ((boundaries & 1) && subexp_idx == dfa->nodes[node].opr.idx)
1941             return -1;
1942           break;
1943
1944         case OP_CLOSE_SUBEXP:
1945           if ((boundaries & 2) && subexp_idx == dfa->nodes[node].opr.idx)
1946             return 0;
1947           break;
1948
1949         default:
1950             break;
1951         }
1952     }
1953
1954   return (boundaries & 2) ? 1 : 0;
1955 }
1956
1957 static int
1958 check_dst_limits_calc_pos (const re_match_context_t *mctx, int limit,
1959                            int subexp_idx, int from_node, int str_idx,
1960                            int bkref_idx)
1961 {
1962   struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
1963   int boundaries;
1964
1965   /* If we are outside the range of the subexpression, return -1 or 1.  */
1966   if (str_idx < lim->subexp_from)
1967     return -1;
1968
1969   if (lim->subexp_to < str_idx)
1970     return 1;
1971
1972   /* If we are within the subexpression, return 0.  */
1973   boundaries = (str_idx == lim->subexp_from);
1974   boundaries |= (str_idx == lim->subexp_to) << 1;
1975   if (boundaries == 0)
1976     return 0;
1977
1978   /* Else, examine epsilon closure.  */
1979   return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
1980                                       from_node, bkref_idx);
1981 }
1982
1983 /* Check the limitations of sub expressions LIMITS, and remove the nodes
1984    which are against limitations from DEST_NODES. */
1985
1986 static reg_errcode_t
1987 check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes,
1988                      const re_node_set *candidates, re_node_set *limits,
1989                      struct re_backref_cache_entry *bkref_ents, int str_idx)
1990 {
1991   reg_errcode_t err;
1992   int node_idx, lim_idx;
1993
1994   for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
1995     {
1996       int subexp_idx;
1997       struct re_backref_cache_entry *ent;
1998       ent = bkref_ents + limits->elems[lim_idx];
1999
2000       if (str_idx <= ent->subexp_from || ent->str_idx < str_idx)
2001         continue; /* This is unrelated limitation.  */
2002
2003       subexp_idx = dfa->nodes[ent->node].opr.idx;
2004       if (ent->subexp_to == str_idx)
2005         {
2006           int ops_node = -1;
2007           int cls_node = -1;
2008           for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2009             {
2010               int node = dest_nodes->elems[node_idx];
2011               re_token_type_t type = dfa->nodes[node].type;
2012               if (type == OP_OPEN_SUBEXP
2013                   && subexp_idx == dfa->nodes[node].opr.idx)
2014                 ops_node = node;
2015               else if (type == OP_CLOSE_SUBEXP
2016                        && subexp_idx == dfa->nodes[node].opr.idx)
2017                 cls_node = node;
2018             }
2019
2020           /* Check the limitation of the open subexpression.  */
2021           /* Note that (ent->subexp_to = str_idx != ent->subexp_from).  */
2022           if (ops_node >= 0)
2023             {
2024               err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes,
2025                                            candidates);
2026               if (BE (err != REG_NOERROR, 0))
2027                 return err;
2028             }
2029
2030           /* Check the limitation of the close subexpression.  */
2031           if (cls_node >= 0)
2032             for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2033               {
2034                 int node = dest_nodes->elems[node_idx];
2035                 if (!re_node_set_contains (dfa->inveclosures + node,
2036                                            cls_node)
2037                     && !re_node_set_contains (dfa->eclosures + node,
2038                                               cls_node))
2039                   {
2040                     /* It is against this limitation.
2041                        Remove it form the current sifted state.  */
2042                     err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2043                                                  candidates);
2044                     if (BE (err != REG_NOERROR, 0))
2045                       return err;
2046                     --node_idx;
2047                   }
2048               }
2049         }
2050       else /* (ent->subexp_to != str_idx)  */
2051         {
2052           for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2053             {
2054               int node = dest_nodes->elems[node_idx];
2055               re_token_type_t type = dfa->nodes[node].type;
2056               if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP)
2057                 {
2058                   if (subexp_idx != dfa->nodes[node].opr.idx)
2059                     continue;
2060                   /* It is against this limitation.
2061                      Remove it form the current sifted state.  */
2062                   err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2063                                                candidates);
2064                   if (BE (err != REG_NOERROR, 0))
2065                     return err;
2066                 }
2067             }
2068         }
2069     }
2070   return REG_NOERROR;
2071 }
2072
2073 static reg_errcode_t
2074 __attribute_warn_unused_result__
2075 sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx,
2076                    int str_idx, const re_node_set *candidates)
2077 {
2078   const re_dfa_t *const dfa = mctx->dfa;
2079   reg_errcode_t err;
2080   int node_idx, node;
2081   re_sift_context_t local_sctx;
2082   int first_idx = search_cur_bkref_entry (mctx, str_idx);
2083
2084   if (first_idx == -1)
2085     return REG_NOERROR;
2086
2087   local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized.  */
2088
2089   for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)
2090     {
2091       int enabled_idx;
2092       re_token_type_t type;
2093       struct re_backref_cache_entry *entry;
2094       node = candidates->elems[node_idx];
2095       type = dfa->nodes[node].type;
2096       /* Avoid infinite loop for the REs like "()\1+".  */
2097       if (node == sctx->last_node && str_idx == sctx->last_str_idx)
2098         continue;
2099       if (type != OP_BACK_REF)
2100         continue;
2101
2102       entry = mctx->bkref_ents + first_idx;
2103       enabled_idx = first_idx;
2104       do
2105         {
2106           int subexp_len;
2107           int to_idx;
2108           int dst_node;
2109           int ret;
2110           re_dfastate_t *cur_state;
2111
2112           if (entry->node != node)
2113             continue;
2114           subexp_len = entry->subexp_to - entry->subexp_from;
2115           to_idx = str_idx + subexp_len;
2116           dst_node = (subexp_len ? dfa->nexts[node]
2117                       : dfa->edests[node].elems[0]);
2118
2119           if (to_idx > sctx->last_str_idx
2120               || sctx->sifted_states[to_idx] == NULL
2121               || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node)
2122               || check_dst_limits (mctx, &sctx->limits, node,
2123                                    str_idx, dst_node, to_idx))
2124             continue;
2125
2126           if (local_sctx.sifted_states == NULL)
2127             {
2128               local_sctx = *sctx;
2129               err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits);
2130               if (BE (err != REG_NOERROR, 0))
2131                 goto free_return;
2132             }
2133           local_sctx.last_node = node;
2134           local_sctx.last_str_idx = str_idx;
2135           ret = re_node_set_insert (&local_sctx.limits, enabled_idx);
2136           if (BE (ret < 0, 0))
2137             {
2138               err = REG_ESPACE;
2139               goto free_return;
2140             }
2141           cur_state = local_sctx.sifted_states[str_idx];
2142           err = sift_states_backward (mctx, &local_sctx);
2143           if (BE (err != REG_NOERROR, 0))
2144             goto free_return;
2145           if (sctx->limited_states != NULL)
2146             {
2147               err = merge_state_array (dfa, sctx->limited_states,
2148                                        local_sctx.sifted_states,
2149                                        str_idx + 1);
2150               if (BE (err != REG_NOERROR, 0))
2151                 goto free_return;
2152             }
2153           local_sctx.sifted_states[str_idx] = cur_state;
2154           re_node_set_remove (&local_sctx.limits, enabled_idx);
2155
2156           /* mctx->bkref_ents may have changed, reload the pointer.  */
2157           entry = mctx->bkref_ents + enabled_idx;
2158         }
2159       while (enabled_idx++, entry++->more);
2160     }
2161   err = REG_NOERROR;
2162  free_return:
2163   if (local_sctx.sifted_states != NULL)
2164     {
2165       re_node_set_free (&local_sctx.limits);
2166     }
2167
2168   return err;
2169 }
2170
2171
2172 #ifdef RE_ENABLE_I18N
2173 static int
2174 sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx,
2175                      int node_idx, int str_idx, int max_str_idx)
2176 {
2177   const re_dfa_t *const dfa = mctx->dfa;
2178   int naccepted;
2179   /* Check the node can accept `multi byte'.  */
2180   naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx);
2181   if (naccepted > 0 && str_idx + naccepted <= max_str_idx &&
2182       !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
2183                             dfa->nexts[node_idx]))
2184     /* The node can't accept the `multi byte', or the
2185        destination was already thrown away, then the node
2186        could't accept the current input `multi byte'.   */
2187     naccepted = 0;
2188   /* Otherwise, it is sure that the node could accept
2189      `naccepted' bytes input.  */
2190   return naccepted;
2191 }
2192 #endif /* RE_ENABLE_I18N */
2193
2194 \f
2195 /* Functions for state transition.  */
2196
2197 /* Return the next state to which the current state STATE will transit by
2198    accepting the current input byte, and update STATE_LOG if necessary.
2199    If STATE can accept a multibyte char/collating element/back reference
2200    update the destination of STATE_LOG.  */
2201
2202 static re_dfastate_t *
2203 __attribute_warn_unused_result__
2204 transit_state (reg_errcode_t *err, re_match_context_t *mctx,
2205                re_dfastate_t *state)
2206 {
2207   re_dfastate_t **trtable;
2208   unsigned char ch;
2209
2210 #ifdef RE_ENABLE_I18N
2211   /* If the current state can accept multibyte.  */
2212   if (BE (state->accept_mb, 0))
2213     {
2214       *err = transit_state_mb (mctx, state);
2215       if (BE (*err != REG_NOERROR, 0))
2216         return NULL;
2217     }
2218 #endif /* RE_ENABLE_I18N */
2219
2220   /* Then decide the next state with the single byte.  */
2221 #if 0
2222   if (0)
2223     /* don't use transition table  */
2224     return transit_state_sb (err, mctx, state);
2225 #endif
2226
2227   /* Use transition table  */
2228   ch = re_string_fetch_byte (&mctx->input);
2229   for (;;)
2230     {
2231       trtable = state->trtable;
2232       if (BE (trtable != NULL, 1))
2233         return trtable[ch];
2234
2235       trtable = state->word_trtable;
2236       if (BE (trtable != NULL, 1))
2237         {
2238           unsigned int context;
2239           context
2240             = re_string_context_at (&mctx->input,
2241                                     re_string_cur_idx (&mctx->input) - 1,
2242                                     mctx->eflags);
2243           if (IS_WORD_CONTEXT (context))
2244             return trtable[ch + SBC_MAX];
2245           else
2246             return trtable[ch];
2247         }
2248
2249       if (!build_trtable (mctx->dfa, state))
2250         {
2251           *err = REG_ESPACE;
2252           return NULL;
2253         }
2254
2255       /* Retry, we now have a transition table.  */
2256     }
2257 }
2258
2259 /* Update the state_log if we need */
2260 re_dfastate_t *
2261 merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx,
2262                       re_dfastate_t *next_state)
2263 {
2264   const re_dfa_t *const dfa = mctx->dfa;
2265   int cur_idx = re_string_cur_idx (&mctx->input);
2266
2267   if (cur_idx > mctx->state_log_top)
2268     {
2269       mctx->state_log[cur_idx] = next_state;
2270       mctx->state_log_top = cur_idx;
2271     }
2272   else if (mctx->state_log[cur_idx] == 0)
2273     {
2274       mctx->state_log[cur_idx] = next_state;
2275     }
2276   else
2277     {
2278       re_dfastate_t *pstate;
2279       unsigned int context;
2280       re_node_set next_nodes, *log_nodes, *table_nodes = NULL;
2281       /* If (state_log[cur_idx] != 0), it implies that cur_idx is
2282          the destination of a multibyte char/collating element/
2283          back reference.  Then the next state is the union set of
2284          these destinations and the results of the transition table.  */
2285       pstate = mctx->state_log[cur_idx];
2286       log_nodes = pstate->entrance_nodes;
2287       if (next_state != NULL)
2288         {
2289           table_nodes = next_state->entrance_nodes;
2290           *err = re_node_set_init_union (&next_nodes, table_nodes,
2291                                              log_nodes);
2292           if (BE (*err != REG_NOERROR, 0))
2293             return NULL;
2294         }
2295       else
2296         next_nodes = *log_nodes;
2297       /* Note: We already add the nodes of the initial state,
2298          then we don't need to add them here.  */
2299
2300       context = re_string_context_at (&mctx->input,
2301                                       re_string_cur_idx (&mctx->input) - 1,
2302                                       mctx->eflags);
2303       next_state = mctx->state_log[cur_idx]
2304         = re_acquire_state_context (err, dfa, &next_nodes, context);
2305       /* We don't need to check errors here, since the return value of
2306          this function is next_state and ERR is already set.  */
2307
2308       if (table_nodes != NULL)
2309         re_node_set_free (&next_nodes);
2310     }
2311
2312   if (BE (dfa->nbackref, 0) && next_state != NULL)
2313     {
2314       /* Check OP_OPEN_SUBEXP in the current state in case that we use them
2315          later.  We must check them here, since the back references in the
2316          next state might use them.  */
2317       *err = check_subexp_matching_top (mctx, &next_state->nodes,
2318                                         cur_idx);
2319       if (BE (*err != REG_NOERROR, 0))
2320         return NULL;
2321
2322       /* If the next state has back references.  */
2323       if (next_state->has_backref)
2324         {
2325           *err = transit_state_bkref (mctx, &next_state->nodes);
2326           if (BE (*err != REG_NOERROR, 0))
2327             return NULL;
2328           next_state = mctx->state_log[cur_idx];
2329         }
2330     }
2331
2332   return next_state;
2333 }
2334
2335 /* Skip bytes in the input that correspond to part of a
2336    multi-byte match, then look in the log for a state
2337    from which to restart matching.  */
2338 re_dfastate_t *
2339 find_recover_state (reg_errcode_t *err, re_match_context_t *mctx)
2340 {
2341   re_dfastate_t *cur_state;
2342   do
2343     {
2344       int max = mctx->state_log_top;
2345       int cur_str_idx = re_string_cur_idx (&mctx->input);
2346
2347       do
2348         {
2349           if (++cur_str_idx > max)
2350             return NULL;
2351           re_string_skip_bytes (&mctx->input, 1);
2352         }
2353       while (mctx->state_log[cur_str_idx] == NULL);
2354
2355       cur_state = merge_state_with_log (err, mctx, NULL);
2356     }
2357   while (*err == REG_NOERROR && cur_state == NULL);
2358   return cur_state;
2359 }
2360
2361 /* Helper functions for transit_state.  */
2362
2363 /* From the node set CUR_NODES, pick up the nodes whose types are
2364    OP_OPEN_SUBEXP and which have corresponding back references in the regular
2365    expression. And register them to use them later for evaluating the
2366    correspoding back references.  */
2367
2368 static reg_errcode_t
2369 check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes,
2370                            int str_idx)
2371 {
2372   const re_dfa_t *const dfa = mctx->dfa;
2373   int node_idx;
2374   reg_errcode_t err;
2375
2376   /* TODO: This isn't efficient.
2377            Because there might be more than one nodes whose types are
2378            OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2379            nodes.
2380            E.g. RE: (a){2}  */
2381   for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx)
2382     {
2383       int node = cur_nodes->elems[node_idx];
2384       if (dfa->nodes[node].type == OP_OPEN_SUBEXP
2385           && dfa->nodes[node].opr.idx < BITSET_WORD_BITS
2386           && (dfa->used_bkref_map
2387               & ((bitset_word_t) 1 << dfa->nodes[node].opr.idx)))
2388         {
2389           err = match_ctx_add_subtop (mctx, node, str_idx);
2390           if (BE (err != REG_NOERROR, 0))
2391             return err;
2392         }
2393     }
2394   return REG_NOERROR;
2395 }
2396
2397 #if 0
2398 /* Return the next state to which the current state STATE will transit by
2399    accepting the current input byte.  */
2400
2401 static re_dfastate_t *
2402 transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx,
2403                   re_dfastate_t *state)
2404 {
2405   const re_dfa_t *const dfa = mctx->dfa;
2406   re_node_set next_nodes;
2407   re_dfastate_t *next_state;
2408   int node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input);
2409   unsigned int context;
2410
2411   *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
2412   if (BE (*err != REG_NOERROR, 0))
2413     return NULL;
2414   for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)
2415     {
2416       int cur_node = state->nodes.elems[node_cnt];
2417       if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx))
2418         {
2419           *err = re_node_set_merge (&next_nodes,
2420                                     dfa->eclosures + dfa->nexts[cur_node]);
2421           if (BE (*err != REG_NOERROR, 0))
2422             {
2423               re_node_set_free (&next_nodes);
2424               return NULL;
2425             }
2426         }
2427     }
2428   context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags);
2429   next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
2430   /* We don't need to check errors here, since the return value of
2431      this function is next_state and ERR is already set.  */
2432
2433   re_node_set_free (&next_nodes);
2434   re_string_skip_bytes (&mctx->input, 1);
2435   return next_state;
2436 }
2437 #endif
2438
2439 #ifdef RE_ENABLE_I18N
2440 static reg_errcode_t
2441 transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate)
2442 {
2443   const re_dfa_t *const dfa = mctx->dfa;
2444   reg_errcode_t err;
2445   int i;
2446
2447   for (i = 0; i < pstate->nodes.nelem; ++i)
2448     {
2449       re_node_set dest_nodes, *new_nodes;
2450       int cur_node_idx = pstate->nodes.elems[i];
2451       int naccepted, dest_idx;
2452       unsigned int context;
2453       re_dfastate_t *dest_state;
2454
2455       if (!dfa->nodes[cur_node_idx].accept_mb)
2456         continue;
2457
2458       if (dfa->nodes[cur_node_idx].constraint)
2459         {
2460           context = re_string_context_at (&mctx->input,
2461                                           re_string_cur_idx (&mctx->input),
2462                                           mctx->eflags);
2463           if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
2464                                            context))
2465             continue;
2466         }
2467
2468       /* How many bytes the node can accept?  */
2469       naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input,
2470                                            re_string_cur_idx (&mctx->input));
2471       if (naccepted == 0)
2472         continue;
2473
2474       /* The node can accepts `naccepted' bytes.  */
2475       dest_idx = re_string_cur_idx (&mctx->input) + naccepted;
2476       mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
2477                                : mctx->max_mb_elem_len);
2478       err = clean_state_log_if_needed (mctx, dest_idx);
2479       if (BE (err != REG_NOERROR, 0))
2480         return err;
2481 #ifdef DEBUG
2482       assert (dfa->nexts[cur_node_idx] != -1);
2483 #endif
2484       new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx];
2485
2486       dest_state = mctx->state_log[dest_idx];
2487       if (dest_state == NULL)
2488         dest_nodes = *new_nodes;
2489       else
2490         {
2491           err = re_node_set_init_union (&dest_nodes,
2492                                         dest_state->entrance_nodes, new_nodes);
2493           if (BE (err != REG_NOERROR, 0))
2494             return err;
2495         }
2496       context = re_string_context_at (&mctx->input, dest_idx - 1,
2497                                       mctx->eflags);
2498       mctx->state_log[dest_idx]
2499         = re_acquire_state_context (&err, dfa, &dest_nodes, context);
2500       if (dest_state != NULL)
2501         re_node_set_free (&dest_nodes);
2502       if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0))
2503         return err;
2504     }
2505   return REG_NOERROR;
2506 }
2507 #endif /* RE_ENABLE_I18N */
2508
2509 static reg_errcode_t
2510 transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes)
2511 {
2512   const re_dfa_t *const dfa = mctx->dfa;
2513   reg_errcode_t err;
2514   int i;
2515   int cur_str_idx = re_string_cur_idx (&mctx->input);
2516
2517   for (i = 0; i < nodes->nelem; ++i)
2518     {
2519       int dest_str_idx, prev_nelem, bkc_idx;
2520       int node_idx = nodes->elems[i];
2521       unsigned int context;
2522       const re_token_t *node = dfa->nodes + node_idx;
2523       re_node_set *new_dest_nodes;
2524
2525       /* Check whether `node' is a backreference or not.  */
2526       if (node->type != OP_BACK_REF)
2527         continue;
2528
2529       if (node->constraint)
2530         {
2531           context = re_string_context_at (&mctx->input, cur_str_idx,
2532                                           mctx->eflags);
2533           if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
2534             continue;
2535         }
2536
2537       /* `node' is a backreference.
2538          Check the substring which the substring matched.  */
2539       bkc_idx = mctx->nbkref_ents;
2540       err = get_subexp (mctx, node_idx, cur_str_idx);
2541       if (BE (err != REG_NOERROR, 0))
2542         goto free_return;
2543
2544       /* And add the epsilon closures (which is `new_dest_nodes') of
2545          the backreference to appropriate state_log.  */
2546 #ifdef DEBUG
2547       assert (dfa->nexts[node_idx] != -1);
2548 #endif
2549       for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
2550         {
2551           int subexp_len;
2552           re_dfastate_t *dest_state;
2553           struct re_backref_cache_entry *bkref_ent;
2554           bkref_ent = mctx->bkref_ents + bkc_idx;
2555           if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx)
2556             continue;
2557           subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
2558           new_dest_nodes = (subexp_len == 0
2559                             ? dfa->eclosures + dfa->edests[node_idx].elems[0]
2560                             : dfa->eclosures + dfa->nexts[node_idx]);
2561           dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
2562                           - bkref_ent->subexp_from);
2563           context = re_string_context_at (&mctx->input, dest_str_idx - 1,
2564                                           mctx->eflags);
2565           dest_state = mctx->state_log[dest_str_idx];
2566           prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0
2567                         : mctx->state_log[cur_str_idx]->nodes.nelem);
2568           /* Add `new_dest_node' to state_log.  */
2569           if (dest_state == NULL)
2570             {
2571               mctx->state_log[dest_str_idx]
2572                 = re_acquire_state_context (&err, dfa, new_dest_nodes,
2573                                             context);
2574               if (BE (mctx->state_log[dest_str_idx] == NULL
2575                       && err != REG_NOERROR, 0))
2576                 goto free_return;
2577             }
2578           else
2579             {
2580               re_node_set dest_nodes;
2581               err = re_node_set_init_union (&dest_nodes,
2582                                             dest_state->entrance_nodes,
2583                                             new_dest_nodes);
2584               if (BE (err != REG_NOERROR, 0))
2585                 {
2586                   re_node_set_free (&dest_nodes);
2587                   goto free_return;
2588                 }
2589               mctx->state_log[dest_str_idx]
2590                 = re_acquire_state_context (&err, dfa, &dest_nodes, context);
2591               re_node_set_free (&dest_nodes);
2592               if (BE (mctx->state_log[dest_str_idx] == NULL
2593                       && err != REG_NOERROR, 0))
2594                 goto free_return;
2595             }
2596           /* We need to check recursively if the backreference can epsilon
2597              transit.  */
2598           if (subexp_len == 0
2599               && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
2600             {
2601               err = check_subexp_matching_top (mctx, new_dest_nodes,
2602                                                cur_str_idx);
2603               if (BE (err != REG_NOERROR, 0))
2604                 goto free_return;
2605               err = transit_state_bkref (mctx, new_dest_nodes);
2606               if (BE (err != REG_NOERROR, 0))
2607                 goto free_return;
2608             }
2609         }
2610     }
2611   err = REG_NOERROR;
2612  free_return:
2613   return err;
2614 }
2615
2616 /* Enumerate all the candidates which the backreference BKREF_NODE can match
2617    at BKREF_STR_IDX, and register them by match_ctx_add_entry().
2618    Note that we might collect inappropriate candidates here.
2619    However, the cost of checking them strictly here is too high, then we
2620    delay these checking for prune_impossible_nodes().  */
2621
2622 static reg_errcode_t
2623 __attribute_warn_unused_result__
2624 get_subexp (re_match_context_t *mctx, int bkref_node, int bkref_str_idx)
2625 {
2626   const re_dfa_t *const dfa = mctx->dfa;
2627   int subexp_num, sub_top_idx;
2628   const char *buf = (const char *) re_string_get_buffer (&mctx->input);
2629   /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX.  */
2630   int cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
2631   if (cache_idx != -1)
2632     {
2633       const struct re_backref_cache_entry *entry
2634         = mctx->bkref_ents + cache_idx;
2635       do
2636         if (entry->node == bkref_node)
2637           return REG_NOERROR; /* We already checked it.  */
2638       while (entry++->more);
2639     }
2640
2641   subexp_num = dfa->nodes[bkref_node].opr.idx;
2642
2643   /* For each sub expression  */
2644   for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)
2645     {
2646       reg_errcode_t err;
2647       re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx];
2648       re_sub_match_last_t *sub_last;
2649       int sub_last_idx, sl_str, bkref_str_off;
2650
2651       if (dfa->nodes[sub_top->node].opr.idx != subexp_num)
2652         continue; /* It isn't related.  */
2653
2654       sl_str = sub_top->str_idx;
2655       bkref_str_off = bkref_str_idx;
2656       /* At first, check the last node of sub expressions we already
2657          evaluated.  */
2658       for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx)
2659         {
2660           int sl_str_diff;
2661           sub_last = sub_top->lasts[sub_last_idx];
2662           sl_str_diff = sub_last->str_idx - sl_str;
2663           /* The matched string by the sub expression match with the substring
2664              at the back reference?  */
2665           if (sl_str_diff > 0)
2666             {
2667               if (BE (bkref_str_off + sl_str_diff > mctx->input.valid_len, 0))
2668                 {
2669                   /* Not enough chars for a successful match.  */
2670                   if (bkref_str_off + sl_str_diff > mctx->input.len)
2671                     break;
2672
2673                   err = clean_state_log_if_needed (mctx,
2674                                                    bkref_str_off
2675                                                    + sl_str_diff);
2676                   if (BE (err != REG_NOERROR, 0))
2677                     return err;
2678                   buf = (const char *) re_string_get_buffer (&mctx->input);
2679                 }
2680               if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0)
2681                 /* We don't need to search this sub expression any more.  */
2682                 break;
2683             }
2684           bkref_str_off += sl_str_diff;
2685           sl_str += sl_str_diff;
2686           err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2687                                 bkref_str_idx);
2688
2689           /* Reload buf, since the preceding call might have reallocated
2690              the buffer.  */
2691           buf = (const char *) re_string_get_buffer (&mctx->input);
2692
2693           if (err == REG_NOMATCH)
2694             continue;
2695           if (BE (err != REG_NOERROR, 0))
2696             return err;
2697         }
2698
2699       if (sub_last_idx < sub_top->nlasts)
2700         continue;
2701       if (sub_last_idx > 0)
2702         ++sl_str;
2703       /* Then, search for the other last nodes of the sub expression.  */
2704       for (; sl_str <= bkref_str_idx; ++sl_str)
2705         {
2706           int cls_node, sl_str_off;
2707           const re_node_set *nodes;
2708           sl_str_off = sl_str - sub_top->str_idx;
2709           /* The matched string by the sub expression match with the substring
2710              at the back reference?  */
2711           if (sl_str_off > 0)
2712             {
2713               if (BE (bkref_str_off >= mctx->input.valid_len, 0))
2714                 {
2715                   /* If we are at the end of the input, we cannot match.  */
2716                   if (bkref_str_off >= mctx->input.len)
2717                     break;
2718
2719                   err = extend_buffers (mctx, bkref_str_off + 1);
2720                   if (BE (err != REG_NOERROR, 0))
2721                     return err;
2722
2723                   buf = (const char *) re_string_get_buffer (&mctx->input);
2724                 }
2725               if (buf [bkref_str_off++] != buf[sl_str - 1])
2726                 break; /* We don't need to search this sub expression
2727                           any more.  */
2728             }
2729           if (mctx->state_log[sl_str] == NULL)
2730             continue;
2731           /* Does this state have a ')' of the sub expression?  */
2732           nodes = &mctx->state_log[sl_str]->nodes;
2733           cls_node = find_subexp_node (dfa, nodes, subexp_num,
2734                                        OP_CLOSE_SUBEXP);
2735           if (cls_node == -1)
2736             continue; /* No.  */
2737           if (sub_top->path == NULL)
2738             {
2739               sub_top->path = calloc (sizeof (state_array_t),
2740                                       sl_str - sub_top->str_idx + 1);
2741               if (sub_top->path == NULL)
2742                 return REG_ESPACE;
2743             }
2744           /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node
2745              in the current context?  */
2746           err = check_arrival (mctx, sub_top->path, sub_top->node,
2747                                sub_top->str_idx, cls_node, sl_str,
2748                                OP_CLOSE_SUBEXP);
2749           if (err == REG_NOMATCH)
2750               continue;
2751           if (BE (err != REG_NOERROR, 0))
2752               return err;
2753           sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str);
2754           if (BE (sub_last == NULL, 0))
2755             return REG_ESPACE;
2756           err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2757                                 bkref_str_idx);
2758           if (err == REG_NOMATCH)
2759             continue;
2760         }
2761     }
2762   return REG_NOERROR;
2763 }
2764
2765 /* Helper functions for get_subexp().  */
2766
2767 /* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR.
2768    If it can arrive, register the sub expression expressed with SUB_TOP
2769    and SUB_LAST.  */
2770
2771 static reg_errcode_t
2772 get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top,
2773                 re_sub_match_last_t *sub_last, int bkref_node, int bkref_str)
2774 {
2775   reg_errcode_t err;
2776   int to_idx;
2777   /* Can the subexpression arrive the back reference?  */
2778   err = check_arrival (mctx, &sub_last->path, sub_last->node,
2779                        sub_last->str_idx, bkref_node, bkref_str,
2780                        OP_OPEN_SUBEXP);
2781   if (err != REG_NOERROR)
2782     return err;
2783   err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,
2784                              sub_last->str_idx);
2785   if (BE (err != REG_NOERROR, 0))
2786     return err;
2787   to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;
2788   return clean_state_log_if_needed (mctx, to_idx);
2789 }
2790
2791 /* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX.
2792    Search '(' if FL_OPEN, or search ')' otherwise.
2793    TODO: This function isn't efficient...
2794          Because there might be more than one nodes whose types are
2795          OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2796          nodes.
2797          E.g. RE: (a){2}  */
2798
2799 static int
2800 find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
2801                   int subexp_idx, int type)
2802 {
2803   int cls_idx;
2804   for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx)
2805     {
2806       int cls_node = nodes->elems[cls_idx];
2807       const re_token_t *node = dfa->nodes + cls_node;
2808       if (node->type == type
2809           && node->opr.idx == subexp_idx)
2810         return cls_node;
2811     }
2812   return -1;
2813 }
2814
2815 /* Check whether the node TOP_NODE at TOP_STR can arrive to the node
2816    LAST_NODE at LAST_STR.  We record the path onto PATH since it will be
2817    heavily reused.
2818    Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise.  */
2819
2820 static reg_errcode_t
2821 __attribute_warn_unused_result__
2822 check_arrival (re_match_context_t *mctx, state_array_t *path, int top_node,
2823                int top_str, int last_node, int last_str, int type)
2824 {
2825   const re_dfa_t *const dfa = mctx->dfa;
2826   reg_errcode_t err = REG_NOERROR;
2827   int subexp_num, backup_cur_idx, str_idx, null_cnt;
2828   re_dfastate_t *cur_state = NULL;
2829   re_node_set *cur_nodes, next_nodes;
2830   re_dfastate_t **backup_state_log;
2831   unsigned int context;
2832
2833   subexp_num = dfa->nodes[top_node].opr.idx;
2834   /* Extend the buffer if we need.  */
2835   if (BE (path->alloc < last_str + mctx->max_mb_elem_len + 1, 0))
2836     {
2837       re_dfastate_t **new_array;
2838       int old_alloc = path->alloc;
2839       path->alloc += last_str + mctx->max_mb_elem_len + 1;
2840       new_array = re_realloc (path->array, re_dfastate_t *, path->alloc);
2841       if (BE (new_array == NULL, 0))
2842         {
2843           path->alloc = old_alloc;
2844           return REG_ESPACE;
2845         }
2846       path->array = new_array;
2847       memset (new_array + old_alloc, '\0',
2848               sizeof (re_dfastate_t *) * (path->alloc - old_alloc));
2849     }
2850
2851   str_idx = path->next_idx ?: top_str;
2852
2853   /* Temporary modify MCTX.  */
2854   backup_state_log = mctx->state_log;
2855   backup_cur_idx = mctx->input.cur_idx;
2856   mctx->state_log = path->array;
2857   mctx->input.cur_idx = str_idx;
2858
2859   /* Setup initial node set.  */
2860   context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
2861   if (str_idx == top_str)
2862     {
2863       err = re_node_set_init_1 (&next_nodes, top_node);
2864       if (BE (err != REG_NOERROR, 0))
2865         return err;
2866       err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
2867       if (BE (err != REG_NOERROR, 0))
2868         {
2869           re_node_set_free (&next_nodes);
2870           return err;
2871         }
2872     }
2873   else
2874     {
2875       cur_state = mctx->state_log[str_idx];
2876       if (cur_state && cur_state->has_backref)
2877         {
2878           err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);
2879           if (BE (err != REG_NOERROR, 0))
2880             return err;
2881         }
2882       else
2883         re_node_set_init_empty (&next_nodes);
2884     }
2885   if (str_idx == top_str || (cur_state && cur_state->has_backref))
2886     {
2887       if (next_nodes.nelem)
2888         {
2889           err = expand_bkref_cache (mctx, &next_nodes, str_idx,
2890                                     subexp_num, type);
2891           if (BE (err != REG_NOERROR, 0))
2892             {
2893               re_node_set_free (&next_nodes);
2894               return err;
2895             }
2896         }
2897       cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
2898       if (BE (cur_state == NULL && err != REG_NOERROR, 0))
2899         {
2900           re_node_set_free (&next_nodes);
2901           return err;
2902         }
2903       mctx->state_log[str_idx] = cur_state;
2904     }
2905
2906   for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;)
2907     {
2908       re_node_set_empty (&next_nodes);
2909       if (mctx->state_log[str_idx + 1])
2910         {
2911           err = re_node_set_merge (&next_nodes,
2912                                    &mctx->state_log[str_idx + 1]->nodes);
2913           if (BE (err != REG_NOERROR, 0))
2914             {
2915               re_node_set_free (&next_nodes);
2916               return err;
2917             }
2918         }
2919       if (cur_state)
2920         {
2921           err = check_arrival_add_next_nodes (mctx, str_idx,
2922                                               &cur_state->non_eps_nodes,
2923                                               &next_nodes);
2924           if (BE (err != REG_NOERROR, 0))
2925             {
2926               re_node_set_free (&next_nodes);
2927               return err;
2928             }
2929         }
2930       ++str_idx;
2931       if (next_nodes.nelem)
2932         {
2933           err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
2934           if (BE (err != REG_NOERROR, 0))
2935             {
2936               re_node_set_free (&next_nodes);
2937               return err;
2938             }
2939           err = expand_bkref_cache (mctx, &next_nodes, str_idx,
2940                                     subexp_num, type);
2941           if (BE (err != REG_NOERROR, 0))
2942             {
2943               re_node_set_free (&next_nodes);
2944               return err;
2945             }
2946         }
2947       context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
2948       cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
2949       if (BE (cur_state == NULL && err != REG_NOERROR, 0))
2950         {
2951           re_node_set_free (&next_nodes);
2952           return err;
2953         }
2954       mctx->state_log[str_idx] = cur_state;
2955       null_cnt = cur_state == NULL ? null_cnt + 1 : 0;
2956     }
2957   re_node_set_free (&next_nodes);
2958   cur_nodes = (mctx->state_log[last_str] == NULL ? NULL
2959                : &mctx->state_log[last_str]->nodes);
2960   path->next_idx = str_idx;
2961
2962   /* Fix MCTX.  */
2963   mctx->state_log = backup_state_log;
2964   mctx->input.cur_idx = backup_cur_idx;
2965
2966   /* Then check the current node set has the node LAST_NODE.  */
2967   if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node))
2968     return REG_NOERROR;
2969
2970   return REG_NOMATCH;
2971 }
2972
2973 /* Helper functions for check_arrival.  */
2974
2975 /* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them
2976    to NEXT_NODES.
2977    TODO: This function is similar to the functions transit_state*(),
2978          however this function has many additional works.
2979          Can't we unify them?  */
2980
2981 static reg_errcode_t
2982 __attribute_warn_unused_result__
2983 check_arrival_add_next_nodes (re_match_context_t *mctx, int str_idx,
2984                               re_node_set *cur_nodes, re_node_set *next_nodes)
2985 {
2986   const re_dfa_t *const dfa = mctx->dfa;
2987   int result;
2988   int cur_idx;
2989   reg_errcode_t err = REG_NOERROR;
2990   re_node_set union_set;
2991   re_node_set_init_empty (&union_set);
2992   for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)
2993     {
2994       int naccepted = 0;
2995       int cur_node = cur_nodes->elems[cur_idx];
2996 #ifdef DEBUG
2997       re_token_type_t type = dfa->nodes[cur_node].type;
2998       assert (!IS_EPSILON_NODE (type));
2999 #endif
3000 #ifdef RE_ENABLE_I18N
3001       /* If the node may accept `multi byte'.  */
3002       if (dfa->nodes[cur_node].accept_mb)
3003         {
3004           naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input,
3005                                                str_idx);
3006           if (naccepted > 1)
3007             {
3008               re_dfastate_t *dest_state;
3009               int next_node = dfa->nexts[cur_node];
3010               int next_idx = str_idx + naccepted;
3011               dest_state = mctx->state_log[next_idx];
3012               re_node_set_empty (&union_set);
3013               if (dest_state)
3014                 {
3015                   err = re_node_set_merge (&union_set, &dest_state->nodes);
3016                   if (BE (err != REG_NOERROR, 0))
3017                     {
3018                       re_node_set_free (&union_set);
3019                       return err;
3020                     }
3021                 }
3022               result = re_node_set_insert (&union_set, next_node);
3023               if (BE (result < 0, 0))
3024                 {
3025                   re_node_set_free (&union_set);
3026                   return REG_ESPACE;
3027                 }
3028               mctx->state_log[next_idx] = re_acquire_state (&err, dfa,
3029                                                             &union_set);
3030               if (BE (mctx->state_log[next_idx] == NULL
3031                       && err != REG_NOERROR, 0))
3032                 {
3033                   re_node_set_free (&union_set);
3034                   return err;
3035                 }
3036             }
3037         }
3038 #endif /* RE_ENABLE_I18N */
3039       if (naccepted
3040           || check_node_accept (mctx, dfa->nodes + cur_node, str_idx))
3041         {
3042           result = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);
3043           if (BE (result < 0, 0))
3044             {
3045               re_node_set_free (&union_set);
3046               return REG_ESPACE;
3047             }
3048         }
3049     }
3050   re_node_set_free (&union_set);
3051   return REG_NOERROR;
3052 }
3053
3054 /* For all the nodes in CUR_NODES, add the epsilon closures of them to
3055    CUR_NODES, however exclude the nodes which are:
3056     - inside the sub expression whose number is EX_SUBEXP, if FL_OPEN.
3057     - out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN.
3058 */
3059
3060 static reg_errcode_t
3061 check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes,
3062                           int ex_subexp, int type)
3063 {
3064   reg_errcode_t err;
3065   int idx, outside_node;
3066   re_node_set new_nodes;
3067 #ifdef DEBUG
3068   assert (cur_nodes->nelem);
3069 #endif
3070   err = re_node_set_alloc (&new_nodes, cur_nodes->nelem);
3071   if (BE (err != REG_NOERROR, 0))
3072     return err;
3073   /* Create a new node set NEW_NODES with the nodes which are epsilon
3074      closures of the node in CUR_NODES.  */
3075
3076   for (idx = 0; idx < cur_nodes->nelem; ++idx)
3077     {
3078       int cur_node = cur_nodes->elems[idx];
3079       const re_node_set *eclosure = dfa->eclosures + cur_node;
3080       outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type);
3081       if (outside_node == -1)
3082         {
3083           /* There are no problematic nodes, just merge them.  */
3084           err = re_node_set_merge (&new_nodes, eclosure);
3085           if (BE (err != REG_NOERROR, 0))
3086             {
3087               re_node_set_free (&new_nodes);
3088               return err;
3089             }
3090         }
3091       else
3092         {
3093           /* There are problematic nodes, re-calculate incrementally.  */
3094           err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node,
3095                                               ex_subexp, type);
3096           if (BE (err != REG_NOERROR, 0))
3097             {
3098               re_node_set_free (&new_nodes);
3099               return err;
3100             }
3101         }
3102     }
3103   re_node_set_free (cur_nodes);
3104   *cur_nodes = new_nodes;
3105   return REG_NOERROR;
3106 }
3107
3108 /* Helper function for check_arrival_expand_ecl.
3109    Check incrementally the epsilon closure of TARGET, and if it isn't
3110    problematic append it to DST_NODES.  */
3111
3112 static reg_errcode_t
3113 __attribute_warn_unused_result__
3114 check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes,
3115                               int target, int ex_subexp, int type)
3116 {
3117   int cur_node;
3118   for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)
3119     {
3120       int err;
3121
3122       if (dfa->nodes[cur_node].type == type
3123           && dfa->nodes[cur_node].opr.idx == ex_subexp)
3124         {
3125           if (type == OP_CLOSE_SUBEXP)
3126             {
3127               err = re_node_set_insert (dst_nodes, cur_node);
3128               if (BE (err == -1, 0))
3129                 return REG_ESPACE;
3130             }
3131           break;
3132         }
3133       err = re_node_set_insert (dst_nodes, cur_node);
3134       if (BE (err == -1, 0))
3135         return REG_ESPACE;
3136       if (dfa->edests[cur_node].nelem == 0)
3137         break;
3138       if (dfa->edests[cur_node].nelem == 2)
3139         {
3140           err = check_arrival_expand_ecl_sub (dfa, dst_nodes,
3141                                               dfa->edests[cur_node].elems[1],
3142                                               ex_subexp, type);
3143           if (BE (err != REG_NOERROR, 0))
3144             return err;
3145         }
3146       cur_node = dfa->edests[cur_node].elems[0];
3147     }
3148   return REG_NOERROR;
3149 }
3150
3151
3152 /* For all the back references in the current state, calculate the
3153    destination of the back references by the appropriate entry
3154    in MCTX->BKREF_ENTS.  */
3155
3156 static reg_errcode_t
3157 __attribute_warn_unused_result__
3158 expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes,
3159                     int cur_str, int subexp_num, int type)
3160 {
3161   const re_dfa_t *const dfa = mctx->dfa;
3162   reg_errcode_t err;
3163   int cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
3164   struct re_backref_cache_entry *ent;
3165
3166   if (cache_idx_start == -1)
3167     return REG_NOERROR;
3168
3169  restart:
3170   ent = mctx->bkref_ents + cache_idx_start;
3171   do
3172     {
3173       int to_idx, next_node;
3174
3175       /* Is this entry ENT is appropriate?  */
3176       if (!re_node_set_contains (cur_nodes, ent->node))
3177         continue; /* No.  */
3178
3179       to_idx = cur_str + ent->subexp_to - ent->subexp_from;
3180       /* Calculate the destination of the back reference, and append it
3181          to MCTX->STATE_LOG.  */
3182       if (to_idx == cur_str)
3183         {
3184           /* The backreference did epsilon transit, we must re-check all the
3185              node in the current state.  */
3186           re_node_set new_dests;
3187           reg_errcode_t err2, err3;
3188           next_node = dfa->edests[ent->node].elems[0];
3189           if (re_node_set_contains (cur_nodes, next_node))
3190             continue;
3191           err = re_node_set_init_1 (&new_dests, next_node);
3192           err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type);
3193           err3 = re_node_set_merge (cur_nodes, &new_dests);
3194           re_node_set_free (&new_dests);
3195           if (BE (err != REG_NOERROR || err2 != REG_NOERROR
3196                   || err3 != REG_NOERROR, 0))
3197             {
3198               err = (err != REG_NOERROR ? err
3199                      : (err2 != REG_NOERROR ? err2 : err3));
3200               return err;
3201             }
3202           /* TODO: It is still inefficient...  */
3203           goto restart;
3204         }
3205       else
3206         {
3207           re_node_set union_set;
3208           next_node = dfa->nexts[ent->node];
3209           if (mctx->state_log[to_idx])
3210             {
3211               int ret;
3212               if (re_node_set_contains (&mctx->state_log[to_idx]->nodes,
3213                                         next_node))
3214                 continue;
3215               err = re_node_set_init_copy (&union_set,
3216                                            &mctx->state_log[to_idx]->nodes);
3217               ret = re_node_set_insert (&union_set, next_node);
3218               if (BE (err != REG_NOERROR || ret < 0, 0))
3219                 {
3220                   re_node_set_free (&union_set);
3221                   err = err != REG_NOERROR ? err : REG_ESPACE;
3222                   return err;
3223                 }
3224             }
3225           else
3226             {
3227               err = re_node_set_init_1 (&union_set, next_node);
3228               if (BE (err != REG_NOERROR, 0))
3229                 return err;
3230             }
3231           mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set);
3232           re_node_set_free (&union_set);
3233           if (BE (mctx->state_log[to_idx] == NULL
3234                   && err != REG_NOERROR, 0))
3235             return err;
3236         }
3237     }
3238   while (ent++->more);
3239   return REG_NOERROR;
3240 }
3241
3242 /* Build transition table for the state.
3243    Return 1 if succeeded, otherwise return NULL.  */
3244
3245 static int
3246 build_trtable (const re_dfa_t *dfa, re_dfastate_t *state)
3247 {
3248   reg_errcode_t err;
3249   int i, j, ch, need_word_trtable = 0;
3250   bitset_word_t elem, mask;
3251   bool dests_node_malloced = false;
3252   bool dest_states_malloced = false;
3253   int ndests; /* Number of the destination states from `state'.  */
3254   re_dfastate_t **trtable;
3255   re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl;
3256   re_node_set follows, *dests_node;
3257   bitset_t *dests_ch;
3258   bitset_t acceptable;
3259
3260   struct dests_alloc
3261   {
3262     re_node_set dests_node[SBC_MAX];
3263     bitset_t dests_ch[SBC_MAX];
3264   } *dests_alloc;
3265
3266   /* We build DFA states which corresponds to the destination nodes
3267      from `state'.  `dests_node[i]' represents the nodes which i-th
3268      destination state contains, and `dests_ch[i]' represents the
3269      characters which i-th destination state accepts.  */
3270   if (__libc_use_alloca (sizeof (struct dests_alloc)))
3271     dests_alloc = (struct dests_alloc *) alloca (sizeof (struct dests_alloc));
3272   else
3273     {
3274       dests_alloc = re_malloc (struct dests_alloc, 1);
3275       if (BE (dests_alloc == NULL, 0))
3276         return 0;
3277       dests_node_malloced = true;
3278     }
3279   dests_node = dests_alloc->dests_node;
3280   dests_ch = dests_alloc->dests_ch;
3281
3282   /* Initialize transiton table.  */
3283   state->word_trtable = state->trtable = NULL;
3284
3285   /* At first, group all nodes belonging to `state' into several
3286      destinations.  */
3287   ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);
3288   if (BE (ndests <= 0, 0))
3289     {
3290       if (dests_node_malloced)
3291         free (dests_alloc);
3292       /* Return 0 in case of an error, 1 otherwise.  */
3293       if (ndests == 0)
3294         {
3295           state->trtable = (re_dfastate_t **)
3296             calloc (sizeof (re_dfastate_t *), SBC_MAX);
3297           if (BE (state->trtable == NULL, 0))
3298             return 0;
3299           return 1;
3300         }
3301       return 0;
3302     }
3303
3304   err = re_node_set_alloc (&follows, ndests + 1);
3305   if (BE (err != REG_NOERROR, 0))
3306     goto out_free;
3307
3308   /* Avoid arithmetic overflow in size calculation.  */
3309   if (BE ((((SIZE_MAX - (sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX)
3310             / (3 * sizeof (re_dfastate_t *)))
3311            < ndests),
3312           0))
3313     goto out_free;
3314
3315   if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX
3316                          + ndests * 3 * sizeof (re_dfastate_t *)))
3317     dest_states = (re_dfastate_t **)
3318       alloca (ndests * 3 * sizeof (re_dfastate_t *));
3319   else
3320     {
3321       dest_states = (re_dfastate_t **)
3322         malloc (ndests * 3 * sizeof (re_dfastate_t *));
3323       if (BE (dest_states == NULL, 0))
3324         {
3325 out_free:
3326           if (dest_states_malloced)
3327             free (dest_states);
3328           re_node_set_free (&follows);
3329           for (i = 0; i < ndests; ++i)
3330             re_node_set_free (dests_node + i);
3331           if (dests_node_malloced)
3332             free (dests_alloc);
3333           return 0;
3334         }
3335       dest_states_malloced = true;
3336     }
3337   dest_states_word = dest_states + ndests;
3338   dest_states_nl = dest_states_word + ndests;
3339   bitset_empty (acceptable);
3340
3341   /* Then build the states for all destinations.  */
3342   for (i = 0; i < ndests; ++i)
3343     {
3344       int next_node;
3345       re_node_set_empty (&follows);
3346       /* Merge the follows of this destination states.  */
3347       for (j = 0; j < dests_node[i].nelem; ++j)
3348         {
3349           next_node = dfa->nexts[dests_node[i].elems[j]];
3350           if (next_node != -1)
3351             {
3352               err = re_node_set_merge (&follows, dfa->eclosures + next_node);
3353               if (BE (err != REG_NOERROR, 0))
3354                 goto out_free;
3355             }
3356         }
3357       dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);
3358       if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0))
3359         goto out_free;
3360       /* If the new state has context constraint,
3361          build appropriate states for these contexts.  */
3362       if (dest_states[i]->has_constraint)
3363         {
3364           dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
3365                                                           CONTEXT_WORD);
3366           if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0))
3367             goto out_free;
3368
3369           if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1)
3370             need_word_trtable = 1;
3371
3372           dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
3373                                                         CONTEXT_NEWLINE);
3374           if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0))
3375             goto out_free;
3376         }
3377       else
3378         {
3379           dest_states_word[i] = dest_states[i];
3380           dest_states_nl[i] = dest_states[i];
3381         }
3382       bitset_merge (acceptable, dests_ch[i]);
3383     }
3384
3385   if (!BE (need_word_trtable, 0))
3386     {
3387       /* We don't care about whether the following character is a word
3388          character, or we are in a single-byte character set so we can
3389          discern by looking at the character code: allocate a
3390          256-entry transition table.  */
3391       trtable = state->trtable =
3392         (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);
3393       if (BE (trtable == NULL, 0))
3394         goto out_free;
3395
3396       /* For all characters ch...:  */
3397       for (i = 0; i < BITSET_WORDS; ++i)
3398         for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
3399              elem;
3400              mask <<= 1, elem >>= 1, ++ch)
3401           if (BE (elem & 1, 0))
3402             {
3403               /* There must be exactly one destination which accepts
3404                  character ch.  See group_nodes_into_DFAstates.  */
3405               for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
3406                 ;
3407
3408               /* j-th destination accepts the word character ch.  */
3409               if (dfa->word_char[i] & mask)
3410                 trtable[ch] = dest_states_word[j];
3411               else
3412                 trtable[ch] = dest_states[j];
3413             }
3414     }
3415   else
3416     {
3417       /* We care about whether the following character is a word
3418          character, and we are in a multi-byte character set: discern
3419          by looking at the character code: build two 256-entry
3420          transition tables, one starting at trtable[0] and one
3421          starting at trtable[SBC_MAX].  */
3422       trtable = state->word_trtable =
3423         (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), 2 * SBC_MAX);
3424       if (BE (trtable == NULL, 0))
3425         goto out_free;
3426
3427       /* For all characters ch...:  */
3428       for (i = 0; i < BITSET_WORDS; ++i)
3429         for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
3430              elem;
3431              mask <<= 1, elem >>= 1, ++ch)
3432           if (BE (elem & 1, 0))
3433             {
3434               /* There must be exactly one destination which accepts
3435                  character ch.  See group_nodes_into_DFAstates.  */
3436               for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
3437                 ;
3438
3439               /* j-th destination accepts the word character ch.  */
3440               trtable[ch] = dest_states[j];
3441               trtable[ch + SBC_MAX] = dest_states_word[j];
3442             }
3443     }
3444
3445   /* new line */
3446   if (bitset_contain (acceptable, NEWLINE_CHAR))
3447     {
3448       /* The current state accepts newline character.  */
3449       for (j = 0; j < ndests; ++j)
3450         if (bitset_contain (dests_ch[j], NEWLINE_CHAR))
3451           {
3452             /* k-th destination accepts newline character.  */
3453             trtable[NEWLINE_CHAR] = dest_states_nl[j];
3454             if (need_word_trtable)
3455               trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j];
3456             /* There must be only one destination which accepts
3457                newline.  See group_nodes_into_DFAstates.  */
3458             break;
3459           }
3460     }
3461
3462   if (dest_states_malloced)
3463     free (dest_states);
3464
3465   re_node_set_free (&follows);
3466   for (i = 0; i < ndests; ++i)
3467     re_node_set_free (dests_node + i);
3468
3469   if (dests_node_malloced)
3470     free (dests_alloc);
3471
3472   return 1;
3473 }
3474
3475 /* Group all nodes belonging to STATE into several destinations.
3476    Then for all destinations, set the nodes belonging to the destination
3477    to DESTS_NODE[i] and set the characters accepted by the destination
3478    to DEST_CH[i].  This function return the number of destinations.  */
3479
3480 static int
3481 group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
3482                             re_node_set *dests_node, bitset_t *dests_ch)
3483 {
3484   reg_errcode_t err;
3485   int result;
3486   int i, j, k;
3487   int ndests; /* Number of the destinations from `state'.  */
3488   bitset_t accepts; /* Characters a node can accept.  */
3489   const re_node_set *cur_nodes = &state->nodes;
3490   bitset_empty (accepts);
3491   ndests = 0;
3492
3493   /* For all the nodes belonging to `state',  */
3494   for (i = 0; i < cur_nodes->nelem; ++i)
3495     {
3496       re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
3497       re_token_type_t type = node->type;
3498       unsigned int constraint = node->constraint;
3499
3500       /* Enumerate all single byte character this node can accept.  */
3501       if (type == CHARACTER)
3502         bitset_set (accepts, node->opr.c);
3503       else if (type == SIMPLE_BRACKET)
3504         {
3505           bitset_merge (accepts, node->opr.sbcset);
3506         }
3507       else if (type == OP_PERIOD)
3508         {
3509 #ifdef RE_ENABLE_I18N
3510           if (dfa->mb_cur_max > 1)
3511             bitset_merge (accepts, dfa->sb_char);
3512           else
3513 #endif
3514             bitset_set_all (accepts);
3515           if (!(dfa->syntax & RE_DOT_NEWLINE))
3516             bitset_clear (accepts, '\n');
3517           if (dfa->syntax & RE_DOT_NOT_NULL)
3518             bitset_clear (accepts, '\0');
3519         }
3520 #ifdef RE_ENABLE_I18N
3521       else if (type == OP_UTF8_PERIOD)
3522         {
3523           memset (accepts, '\xff', sizeof (bitset_t) / 2);
3524           if (!(dfa->syntax & RE_DOT_NEWLINE))
3525             bitset_clear (accepts, '\n');
3526           if (dfa->syntax & RE_DOT_NOT_NULL)
3527             bitset_clear (accepts, '\0');
3528         }
3529 #endif
3530       else
3531         continue;
3532
3533       /* Check the `accepts' and sift the characters which are not
3534          match it the context.  */
3535       if (constraint)
3536         {
3537           if (constraint & NEXT_NEWLINE_CONSTRAINT)
3538             {
3539               bool accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
3540               bitset_empty (accepts);
3541               if (accepts_newline)
3542                 bitset_set (accepts, NEWLINE_CHAR);
3543               else
3544                 continue;
3545             }
3546           if (constraint & NEXT_ENDBUF_CONSTRAINT)
3547             {
3548               bitset_empty (accepts);
3549               continue;
3550             }
3551
3552           if (constraint & NEXT_WORD_CONSTRAINT)
3553             {
3554               bitset_word_t any_set = 0;
3555               if (type == CHARACTER && !node->word_char)
3556                 {
3557                   bitset_empty (accepts);
3558                   continue;
3559                 }
3560 #ifdef RE_ENABLE_I18N
3561               if (dfa->mb_cur_max > 1)
3562                 for (j = 0; j < BITSET_WORDS; ++j)
3563                   any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j]));
3564               else
3565 #endif
3566                 for (j = 0; j < BITSET_WORDS; ++j)
3567                   any_set |= (accepts[j] &= dfa->word_char[j]);
3568               if (!any_set)
3569                 continue;
3570             }
3571           if (constraint & NEXT_NOTWORD_CONSTRAINT)
3572             {
3573               bitset_word_t any_set = 0;
3574               if (type == CHARACTER && node->word_char)
3575                 {
3576                   bitset_empty (accepts);
3577                   continue;
3578                 }
3579 #ifdef RE_ENABLE_I18N
3580               if (dfa->mb_cur_max > 1)
3581                 for (j = 0; j < BITSET_WORDS; ++j)
3582                   any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]));
3583               else
3584 #endif
3585                 for (j = 0; j < BITSET_WORDS; ++j)
3586                   any_set |= (accepts[j] &= ~dfa->word_char[j]);
3587               if (!any_set)
3588                 continue;
3589             }
3590         }
3591
3592       /* Then divide `accepts' into DFA states, or create a new
3593          state.  Above, we make sure that accepts is not empty.  */
3594       for (j = 0; j < ndests; ++j)
3595         {
3596           bitset_t intersec; /* Intersection sets, see below.  */
3597           bitset_t remains;
3598           /* Flags, see below.  */
3599           bitset_word_t has_intersec, not_subset, not_consumed;
3600
3601           /* Optimization, skip if this state doesn't accept the character.  */
3602           if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
3603             continue;
3604
3605           /* Enumerate the intersection set of this state and `accepts'.  */
3606           has_intersec = 0;
3607           for (k = 0; k < BITSET_WORDS; ++k)
3608             has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];
3609           /* And skip if the intersection set is empty.  */
3610           if (!has_intersec)
3611             continue;
3612
3613           /* Then check if this state is a subset of `accepts'.  */
3614           not_subset = not_consumed = 0;
3615           for (k = 0; k < BITSET_WORDS; ++k)
3616             {
3617               not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k];
3618               not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k];
3619             }
3620
3621           /* If this state isn't a subset of `accepts', create a
3622              new group state, which has the `remains'. */
3623           if (not_subset)
3624             {
3625               bitset_copy (dests_ch[ndests], remains);
3626               bitset_copy (dests_ch[j], intersec);
3627               err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
3628               if (BE (err != REG_NOERROR, 0))
3629                 goto error_return;
3630               ++ndests;
3631             }
3632
3633           /* Put the position in the current group. */
3634           result = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
3635           if (BE (result < 0, 0))
3636             goto error_return;
3637
3638           /* If all characters are consumed, go to next node. */
3639           if (!not_consumed)
3640             break;
3641         }
3642       /* Some characters remain, create a new group. */
3643       if (j == ndests)
3644         {
3645           bitset_copy (dests_ch[ndests], accepts);
3646           err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
3647           if (BE (err != REG_NOERROR, 0))
3648             goto error_return;
3649           ++ndests;
3650           bitset_empty (accepts);
3651         }
3652     }
3653   return ndests;
3654  error_return:
3655   for (j = 0; j < ndests; ++j)
3656     re_node_set_free (dests_node + j);
3657   return -1;
3658 }
3659
3660 #ifdef RE_ENABLE_I18N
3661 /* Check how many bytes the node `dfa->nodes[node_idx]' accepts.
3662    Return the number of the bytes the node accepts.
3663    STR_IDX is the current index of the input string.
3664
3665    This function handles the nodes which can accept one character, or
3666    one collating element like '.', '[a-z]', opposite to the other nodes
3667    can only accept one byte.  */
3668
3669 # ifdef _LIBC
3670 #  include <locale/weight.h>
3671 # endif
3672
3673 static int
3674 check_node_accept_bytes (const re_dfa_t *dfa, int node_idx,
3675                          const re_string_t *input, int str_idx)
3676 {
3677   const re_token_t *node = dfa->nodes + node_idx;
3678   int char_len, elem_len;
3679   int i;
3680
3681   if (BE (node->type == OP_UTF8_PERIOD, 0))
3682     {
3683       unsigned char c = re_string_byte_at (input, str_idx), d;
3684       if (BE (c < 0xc2, 1))
3685         return 0;
3686
3687       if (str_idx + 2 > input->len)
3688         return 0;
3689
3690       d = re_string_byte_at (input, str_idx + 1);
3691       if (c < 0xe0)
3692         return (d < 0x80 || d > 0xbf) ? 0 : 2;
3693       else if (c < 0xf0)
3694         {
3695           char_len = 3;
3696           if (c == 0xe0 && d < 0xa0)
3697             return 0;
3698         }
3699       else if (c < 0xf8)
3700         {
3701           char_len = 4;
3702           if (c == 0xf0 && d < 0x90)
3703             return 0;
3704         }
3705       else if (c < 0xfc)
3706         {
3707           char_len = 5;
3708           if (c == 0xf8 && d < 0x88)
3709             return 0;
3710         }
3711       else if (c < 0xfe)
3712         {
3713           char_len = 6;
3714           if (c == 0xfc && d < 0x84)
3715             return 0;
3716         }
3717       else
3718         return 0;
3719
3720       if (str_idx + char_len > input->len)
3721         return 0;
3722
3723       for (i = 1; i < char_len; ++i)
3724         {
3725           d = re_string_byte_at (input, str_idx + i);
3726           if (d < 0x80 || d > 0xbf)
3727             return 0;
3728         }
3729       return char_len;
3730     }
3731
3732   char_len = re_string_char_size_at (input, str_idx);
3733   if (node->type == OP_PERIOD)
3734     {
3735       if (char_len <= 1)
3736         return 0;
3737       /* FIXME: I don't think this if is needed, as both '\n'
3738          and '\0' are char_len == 1.  */
3739       /* '.' accepts any one character except the following two cases.  */
3740       if ((!(dfa->syntax & RE_DOT_NEWLINE) &&
3741            re_string_byte_at (input, str_idx) == '\n') ||
3742           ((dfa->syntax & RE_DOT_NOT_NULL) &&
3743            re_string_byte_at (input, str_idx) == '\0'))
3744         return 0;
3745       return char_len;
3746     }
3747
3748   elem_len = re_string_elem_size_at (input, str_idx);
3749   if ((elem_len <= 1 && char_len <= 1) || char_len == 0)
3750     return 0;
3751
3752   if (node->type == COMPLEX_BRACKET)
3753     {
3754       const re_charset_t *cset = node->opr.mbcset;
3755 # ifdef _LIBC
3756       const unsigned char *pin
3757         = ((const unsigned char *) re_string_get_buffer (input) + str_idx);
3758       int j;
3759       uint32_t nrules;
3760 # endif /* _LIBC */
3761       int match_len = 0;
3762       wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)
3763                     ? re_string_wchar_at (input, str_idx) : 0);
3764
3765       /* match with multibyte character?  */
3766       for (i = 0; i < cset->nmbchars; ++i)
3767         if (wc == cset->mbchars[i])
3768           {
3769             match_len = char_len;
3770             goto check_node_accept_bytes_match;
3771           }
3772       /* match with character_class?  */
3773       for (i = 0; i < cset->nchar_classes; ++i)
3774         {
3775           wctype_t wt = cset->char_classes[i];
3776           if (__iswctype (wc, wt))
3777             {
3778               match_len = char_len;
3779               goto check_node_accept_bytes_match;
3780             }
3781         }
3782
3783 # ifdef _LIBC
3784       nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3785       if (nrules != 0)
3786         {
3787           unsigned int in_collseq = 0;
3788           const int32_t *table, *indirect;
3789           const unsigned char *weights, *extra;
3790           const char *collseqwc;
3791
3792           /* match with collating_symbol?  */
3793           if (cset->ncoll_syms)
3794             extra = (const unsigned char *)
3795               _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3796           for (i = 0; i < cset->ncoll_syms; ++i)
3797             {
3798               const unsigned char *coll_sym = extra + cset->coll_syms[i];
3799               /* Compare the length of input collating element and
3800                  the length of current collating element.  */
3801               if (*coll_sym != elem_len)
3802                 continue;
3803               /* Compare each bytes.  */
3804               for (j = 0; j < *coll_sym; j++)
3805                 if (pin[j] != coll_sym[1 + j])
3806                   break;
3807               if (j == *coll_sym)
3808                 {
3809                   /* Match if every bytes is equal.  */
3810                   match_len = j;
3811                   goto check_node_accept_bytes_match;
3812                 }
3813             }
3814
3815           if (cset->nranges)
3816             {
3817               if (elem_len <= char_len)
3818                 {
3819                   collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
3820                   in_collseq = __collseq_table_lookup (collseqwc, wc);
3821                 }
3822               else
3823                 in_collseq = find_collation_sequence_value (pin, elem_len);
3824             }
3825           /* match with range expression?  */
3826           for (i = 0; i < cset->nranges; ++i)
3827             if (cset->range_starts[i] <= in_collseq
3828                 && in_collseq <= cset->range_ends[i])
3829               {
3830                 match_len = elem_len;
3831                 goto check_node_accept_bytes_match;
3832               }
3833
3834           /* match with equivalence_class?  */
3835           if (cset->nequiv_classes)
3836             {
3837               const unsigned char *cp = pin;
3838               table = (const int32_t *)
3839                 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
3840               weights = (const unsigned char *)
3841                 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
3842               extra = (const unsigned char *)
3843                 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
3844               indirect = (const int32_t *)
3845                 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
3846               int32_t idx = findidx (table, indirect, extra, &cp, elem_len);
3847               if (idx > 0)
3848                 for (i = 0; i < cset->nequiv_classes; ++i)
3849                   {
3850                     int32_t equiv_class_idx = cset->equiv_classes[i];
3851                     size_t weight_len = weights[idx & 0xffffff];
3852                     if (weight_len == weights[equiv_class_idx & 0xffffff]
3853                         && (idx >> 24) == (equiv_class_idx >> 24))
3854                       {
3855                         int cnt = 0;
3856
3857                         idx &= 0xffffff;
3858                         equiv_class_idx &= 0xffffff;
3859
3860                         while (cnt <= weight_len
3861                                && (weights[equiv_class_idx + 1 + cnt]
3862                                    == weights[idx + 1 + cnt]))
3863                           ++cnt;
3864                         if (cnt > weight_len)
3865                           {
3866                             match_len = elem_len;
3867                             goto check_node_accept_bytes_match;
3868                           }
3869                       }
3870                   }
3871             }
3872         }
3873       else
3874 # endif /* _LIBC */
3875         {
3876           /* match with range expression?  */
3877 #if __GNUC__ >= 2
3878           wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'};
3879 #else
3880           wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
3881           cmp_buf[2] = wc;
3882 #endif
3883           for (i = 0; i < cset->nranges; ++i)
3884             {
3885               cmp_buf[0] = cset->range_starts[i];
3886               cmp_buf[4] = cset->range_ends[i];
3887               if (__wcscoll (cmp_buf, cmp_buf + 2) <= 0
3888                   && __wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
3889                 {
3890                   match_len = char_len;
3891                   goto check_node_accept_bytes_match;
3892                 }
3893             }
3894         }
3895     check_node_accept_bytes_match:
3896       if (!cset->non_match)
3897         return match_len;
3898       else
3899         {
3900           if (match_len > 0)
3901             return 0;
3902           else
3903             return (elem_len > char_len) ? elem_len : char_len;
3904         }
3905     }
3906   return 0;
3907 }
3908
3909 # ifdef _LIBC
3910 static unsigned int
3911 find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len)
3912 {
3913   uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3914   if (nrules == 0)
3915     {
3916       if (mbs_len == 1)
3917         {
3918           /* No valid character.  Match it as a single byte character.  */
3919           const unsigned char *collseq = (const unsigned char *)
3920             _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
3921           return collseq[mbs[0]];
3922         }
3923       return UINT_MAX;
3924     }
3925   else
3926     {
3927       int32_t idx;
3928       const unsigned char *extra = (const unsigned char *)
3929         _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3930       int32_t extrasize = (const unsigned char *)
3931         _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + 1) - extra;
3932
3933       for (idx = 0; idx < extrasize;)
3934         {
3935           int mbs_cnt, found = 0;
3936           int32_t elem_mbs_len;
3937           /* Skip the name of collating element name.  */
3938           idx = idx + extra[idx] + 1;
3939           elem_mbs_len = extra[idx++];
3940           if (mbs_len == elem_mbs_len)
3941             {
3942               for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt)
3943                 if (extra[idx + mbs_cnt] != mbs[mbs_cnt])
3944                   break;
3945               if (mbs_cnt == elem_mbs_len)
3946                 /* Found the entry.  */
3947                 found = 1;
3948             }
3949           /* Skip the byte sequence of the collating element.  */
3950           idx += elem_mbs_len;
3951           /* Adjust for the alignment.  */
3952           idx = (idx + 3) & ~3;
3953           /* Skip the collation sequence value.  */
3954           idx += sizeof (uint32_t);
3955           /* Skip the wide char sequence of the collating element.  */
3956           idx = idx + sizeof (uint32_t) * (*(int32_t *) (extra + idx) + 1);
3957           /* If we found the entry, return the sequence value.  */
3958           if (found)
3959             return *(uint32_t *) (extra + idx);
3960           /* Skip the collation sequence value.  */
3961           idx += sizeof (uint32_t);
3962         }
3963       return UINT_MAX;
3964     }
3965 }
3966 # endif /* _LIBC */
3967 #endif /* RE_ENABLE_I18N */
3968
3969 /* Check whether the node accepts the byte which is IDX-th
3970    byte of the INPUT.  */
3971
3972 static int
3973 check_node_accept (const re_match_context_t *mctx, const re_token_t *node,
3974                    int idx)
3975 {
3976   unsigned char ch;
3977   ch = re_string_byte_at (&mctx->input, idx);
3978   switch (node->type)
3979     {
3980     case CHARACTER:
3981       if (node->opr.c != ch)
3982         return 0;
3983       break;
3984
3985     case SIMPLE_BRACKET:
3986       if (!bitset_contain (node->opr.sbcset, ch))
3987         return 0;
3988       break;
3989
3990 #ifdef RE_ENABLE_I18N
3991     case OP_UTF8_PERIOD:
3992       if (ch >= 0x80)
3993         return 0;
3994       /* FALLTHROUGH */
3995 #endif
3996     case OP_PERIOD:
3997       if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE))
3998           || (ch == '\0' && (mctx->dfa->syntax & RE_DOT_NOT_NULL)))
3999         return 0;
4000       break;
4001
4002     default:
4003       return 0;
4004     }
4005
4006   if (node->constraint)
4007     {
4008       /* The node has constraints.  Check whether the current context
4009          satisfies the constraints.  */
4010       unsigned int context = re_string_context_at (&mctx->input, idx,
4011                                                    mctx->eflags);
4012       if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
4013         return 0;
4014     }
4015
4016   return 1;
4017 }
4018
4019 /* Extend the buffers, if the buffers have run out.  */
4020
4021 static reg_errcode_t
4022 __attribute_warn_unused_result__
4023 extend_buffers (re_match_context_t *mctx, int min_len)
4024 {
4025   reg_errcode_t ret;
4026   re_string_t *pstr = &mctx->input;
4027
4028   /* Avoid overflow.  */
4029   if (BE (INT_MAX / 2 / sizeof (re_dfastate_t *) <= pstr->bufs_len, 0))
4030     return REG_ESPACE;
4031
4032   /* Double the lengthes of the buffers, but allocate at least MIN_LEN.  */
4033   ret = re_string_realloc_buffers (pstr,
4034                                    MAX (min_len,
4035                                         MIN (pstr->len, pstr->bufs_len * 2)));
4036   if (BE (ret != REG_NOERROR, 0))
4037     return ret;
4038
4039   if (mctx->state_log != NULL)
4040     {
4041       /* And double the length of state_log.  */
4042       /* XXX We have no indication of the size of this buffer.  If this
4043          allocation fail we have no indication that the state_log array
4044          does not have the right size.  */
4045       re_dfastate_t **new_array = re_realloc (mctx->state_log, re_dfastate_t *,
4046                                               pstr->bufs_len + 1);
4047       if (BE (new_array == NULL, 0))
4048         return REG_ESPACE;
4049       mctx->state_log = new_array;
4050     }
4051
4052   /* Then reconstruct the buffers.  */
4053   if (pstr->icase)
4054     {
4055 #ifdef RE_ENABLE_I18N
4056       if (pstr->mb_cur_max > 1)
4057         {
4058           ret = build_wcs_upper_buffer (pstr);
4059           if (BE (ret != REG_NOERROR, 0))
4060             return ret;
4061         }
4062       else
4063 #endif /* RE_ENABLE_I18N  */
4064         build_upper_buffer (pstr);
4065     }
4066   else
4067     {
4068 #ifdef RE_ENABLE_I18N
4069       if (pstr->mb_cur_max > 1)
4070         build_wcs_buffer (pstr);
4071       else
4072 #endif /* RE_ENABLE_I18N  */
4073         {
4074           if (pstr->trans != NULL)
4075             re_string_translate_buffer (pstr);
4076         }
4077     }
4078   return REG_NOERROR;
4079 }
4080
4081 \f
4082 /* Functions for matching context.  */
4083
4084 /* Initialize MCTX.  */
4085
4086 static reg_errcode_t
4087 __attribute_warn_unused_result__
4088 match_ctx_init (re_match_context_t *mctx, int eflags, int n)
4089 {
4090   mctx->eflags = eflags;
4091   mctx->match_last = -1;
4092   if (n > 0)
4093     {
4094       mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
4095       mctx->sub_tops = re_malloc (re_sub_match_top_t *, n);
4096       if (BE (mctx->bkref_ents == NULL || mctx->sub_tops == NULL, 0))
4097         return REG_ESPACE;
4098     }
4099   /* Already zero-ed by the caller.
4100      else
4101        mctx->bkref_ents = NULL;
4102      mctx->nbkref_ents = 0;
4103      mctx->nsub_tops = 0;  */
4104   mctx->abkref_ents = n;
4105   mctx->max_mb_elem_len = 1;
4106   mctx->asub_tops = n;
4107   return REG_NOERROR;
4108 }
4109
4110 /* Clean the entries which depend on the current input in MCTX.
4111    This function must be invoked when the matcher changes the start index
4112    of the input, or changes the input string.  */
4113
4114 static void
4115 match_ctx_clean (re_match_context_t *mctx)
4116 {
4117   int st_idx;
4118   for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx)
4119     {
4120       int sl_idx;
4121       re_sub_match_top_t *top = mctx->sub_tops[st_idx];
4122       for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx)
4123         {
4124           re_sub_match_last_t *last = top->lasts[sl_idx];
4125           re_free (last->path.array);
4126           re_free (last);
4127         }
4128       re_free (top->lasts);
4129       if (top->path)
4130         {
4131           re_free (top->path->array);
4132           re_free (top->path);
4133         }
4134       free (top);
4135     }
4136
4137   mctx->nsub_tops = 0;
4138   mctx->nbkref_ents = 0;
4139 }
4140
4141 /* Free all the memory associated with MCTX.  */
4142
4143 static void
4144 match_ctx_free (re_match_context_t *mctx)
4145 {
4146   /* First, free all the memory associated with MCTX->SUB_TOPS.  */
4147   match_ctx_clean (mctx);
4148   re_free (mctx->sub_tops);
4149   re_free (mctx->bkref_ents);
4150 }
4151
4152 /* Add a new backreference entry to MCTX.
4153    Note that we assume that caller never call this function with duplicate
4154    entry, and call with STR_IDX which isn't smaller than any existing entry.
4155 */
4156
4157 static reg_errcode_t
4158 __attribute_warn_unused_result__
4159 match_ctx_add_entry (re_match_context_t *mctx, int node, int str_idx, int from,
4160                      int to)
4161 {
4162   if (mctx->nbkref_ents >= mctx->abkref_ents)
4163     {
4164       struct re_backref_cache_entry* new_entry;
4165       new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry,
4166                               mctx->abkref_ents * 2);
4167       if (BE (new_entry == NULL, 0))
4168         {
4169           re_free (mctx->bkref_ents);
4170           return REG_ESPACE;
4171         }
4172       mctx->bkref_ents = new_entry;
4173       memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',
4174               sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);
4175       mctx->abkref_ents *= 2;
4176     }
4177   if (mctx->nbkref_ents > 0
4178       && mctx->bkref_ents[mctx->nbkref_ents - 1].str_idx == str_idx)
4179     mctx->bkref_ents[mctx->nbkref_ents - 1].more = 1;
4180
4181   mctx->bkref_ents[mctx->nbkref_ents].node = node;
4182   mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
4183   mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
4184   mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
4185
4186   /* This is a cache that saves negative results of check_dst_limits_calc_pos.
4187      If bit N is clear, means that this entry won't epsilon-transition to
4188      an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression.  If
4189      it is set, check_dst_limits_calc_pos_1 will recurse and try to find one
4190      such node.
4191
4192      A backreference does not epsilon-transition unless it is empty, so set
4193      to all zeros if FROM != TO.  */
4194   mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map
4195     = (from == to ? ~0 : 0);
4196
4197   mctx->bkref_ents[mctx->nbkref_ents++].more = 0;
4198   if (mctx->max_mb_elem_len < to - from)
4199     mctx->max_mb_elem_len = to - from;
4200   return REG_NOERROR;
4201 }
4202
4203 /* Search for the first entry which has the same str_idx, or -1 if none is
4204    found.  Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX.  */
4205
4206 static int
4207 search_cur_bkref_entry (const re_match_context_t *mctx, int str_idx)
4208 {
4209   int left, right, mid, last;
4210   last = right = mctx->nbkref_ents;
4211   for (left = 0; left < right;)
4212     {
4213       mid = (left + right) / 2;
4214       if (mctx->bkref_ents[mid].str_idx < str_idx)
4215         left = mid + 1;
4216       else
4217         right = mid;
4218     }
4219   if (left < last && mctx->bkref_ents[left].str_idx == str_idx)
4220     return left;
4221   else
4222     return -1;
4223 }
4224
4225 /* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches
4226    at STR_IDX.  */
4227
4228 static reg_errcode_t
4229 __attribute_warn_unused_result__
4230 match_ctx_add_subtop (re_match_context_t *mctx, int node, int str_idx)
4231 {
4232 #ifdef DEBUG
4233   assert (mctx->sub_tops != NULL);
4234   assert (mctx->asub_tops > 0);
4235 #endif
4236   if (BE (mctx->nsub_tops == mctx->asub_tops, 0))
4237     {
4238       int new_asub_tops = mctx->asub_tops * 2;
4239       re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops,
4240                                                    re_sub_match_top_t *,
4241                                                    new_asub_tops);
4242       if (BE (new_array == NULL, 0))
4243         return REG_ESPACE;
4244       mctx->sub_tops = new_array;
4245       mctx->asub_tops = new_asub_tops;
4246     }
4247   mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t));
4248   if (BE (mctx->sub_tops[mctx->nsub_tops] == NULL, 0))
4249     return REG_ESPACE;
4250   mctx->sub_tops[mctx->nsub_tops]->node = node;
4251   mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;
4252   return REG_NOERROR;
4253 }
4254
4255 /* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches
4256    at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP.  */
4257
4258 static re_sub_match_last_t *
4259 match_ctx_add_sublast (re_sub_match_top_t *subtop, int node, int str_idx)
4260 {
4261   re_sub_match_last_t *new_entry;
4262   if (BE (subtop->nlasts == subtop->alasts, 0))
4263     {
4264       int new_alasts = 2 * subtop->alasts + 1;
4265       re_sub_match_last_t **new_array = re_realloc (subtop->lasts,
4266                                                     re_sub_match_last_t *,
4267                                                     new_alasts);
4268       if (BE (new_array == NULL, 0))
4269         return NULL;
4270       subtop->lasts = new_array;
4271       subtop->alasts = new_alasts;
4272     }
4273   new_entry = calloc (1, sizeof (re_sub_match_last_t));
4274   if (BE (new_entry != NULL, 1))
4275     {
4276       subtop->lasts[subtop->nlasts] = new_entry;
4277       new_entry->node = node;
4278       new_entry->str_idx = str_idx;
4279       ++subtop->nlasts;
4280     }
4281   return new_entry;
4282 }
4283
4284 static void
4285 sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
4286                re_dfastate_t **limited_sts, int last_node, int last_str_idx)
4287 {
4288   sctx->sifted_states = sifted_sts;
4289   sctx->limited_states = limited_sts;
4290   sctx->last_node = last_node;
4291   sctx->last_str_idx = last_str_idx;
4292   re_node_set_init_empty (&sctx->limits);
4293 }