posix/regexec.c

   1 /* Extended regular expression matching and search library.
   2    Copyright (C) 2002 Free Software Foundation, Inc.
   3    This file is part of the GNU C Library.
   4    Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
   5
   6    The GNU C Library is free software; you can redistribute it and/or
   7    modify it under the terms of the GNU Lesser General Public
   8    License as published by the Free Software Foundation; either
   9    version 2.1 of the License, or (at your option) any later version.
  10
  11    The GNU C Library is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14    Lesser General Public License for more details.
  15
  16    You should have received a copy of the GNU Lesser General Public
  17    License along with the GNU C Library; if not, write to the Free
  18    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
  19    02111-1307 USA.  */
  20
  21 #include <assert.h>
  22 #include <ctype.h>
  23 #include <stdio.h>
  24 #include <stdlib.h>
  25 #include <string.h>
  26
  27 #if defined HAVE_WCHAR_H || defined _LIBC
  28 # include <wchar.h>
  29 #endif /* HAVE_WCHAR_H || _LIBC */
  30 #if defined HAVE_WCTYPE_H || defined _LIBC
  31 # include <wctype.h>
  32 #endif /* HAVE_WCTYPE_H || _LIBC */
  33
  34 #ifdef _LIBC
  35 # ifndef _RE_DEFINE_LOCALE_FUNCTIONS
  36 #  define _RE_DEFINE_LOCALE_FUNCTIONS 1
  37 #  include <locale/localeinfo.h>
  38 #  include <locale/elem-hash.h>
  39 #  include <locale/coll-lookup.h>
  40 # endif
  41 #endif
  42
  43 #include "regex.h"
  44 #include "regex_internal.h"
  45
  46 static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
  47                                      re_string_t *input, int n);
  48 static void match_ctx_free (re_match_context_t *cache);
  49 static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, int node,
  50                                           int str_idx, int from, int to);
  51 static void match_ctx_clear_flag (re_match_context_t *mctx);
  52 static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
  53                            re_dfastate_t **limited_sts, int last_node,
  54                            int last_str_idx, int check_subexp);
  55 static reg_errcode_t re_search_internal (const regex_t *preg,
  56                                          const char *string, int length,
  57                                          int start, int range, int stop,
  58                                          size_t nmatch, regmatch_t pmatch[],
  59                                          int eflags);
  60 static int re_search_2_stub (struct re_pattern_buffer *bufp,
  61                              const char *string1, int length1,
  62                              const char *string2, int length2,
  63                              int start, int range, struct re_registers *regs,
  64                              int stop, int ret_len);
  65 static int re_search_stub (struct re_pattern_buffer *bufp,
  66                            const char *string, int length, int start,
  67                            int range, int stop, struct re_registers *regs,
  68                            int ret_len);
  69 static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,
  70                               int nregs, int regs_allocated);
  71 static inline re_dfastate_t *acquire_init_state_context (reg_errcode_t *err,
  72                                                          const regex_t *preg,
  73                                                          const re_match_context_t *mctx,
  74                                                          int idx);
  75 static int check_matching (const regex_t *preg, re_match_context_t *mctx,
  76                            int fl_search, int fl_longest_match);
  77 static int check_halt_node_context (const re_dfa_t *dfa, int node,
  78                                     unsigned int context);
  79 static int check_halt_state_context (const regex_t *preg,
  80                                      const re_dfastate_t *state,
  81                                      const re_match_context_t *mctx, int idx);
  82 static void update_regs (re_dfa_t *dfa, regmatch_t *pmatch, int cur_node,
  83                          int cur_idx, int nmatch);
  84 static int proceed_next_node (const regex_t *preg, int nregs, regmatch_t *regs,
  85                               const re_match_context_t *mctx,
  86                               int *pidx, int node, re_node_set *eps_via_nodes,
  87                               struct re_fail_stack_t *fs);
  88 static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
  89                                       int str_idx, int *dests, int nregs,
  90                                       regmatch_t *regs,
  91                                       re_node_set *eps_via_nodes);
  92 static int pop_fail_stack (struct re_fail_stack_t *fs, int *pidx, int nregs,
  93                            regmatch_t *regs, re_node_set *eps_via_nodes);
  94 static reg_errcode_t set_regs (const regex_t *preg,
  95                                const re_match_context_t *mctx,
  96                                size_t nmatch, regmatch_t *pmatch,
  97                                int fl_backtrack);
  98 #ifdef RE_ENABLE_I18N
  99 static int sift_states_iter_mb (const regex_t *preg,
 100                                 const re_match_context_t *mctx,
 101                                 re_sift_context_t *sctx,
 102                                 int node_idx, int str_idx, int max_str_idx);
 103 #endif /* RE_ENABLE_I18N */
 104 static reg_errcode_t sift_states_backward (const regex_t *preg,
 105                                            re_match_context_t *mctx,
 106                                            re_sift_context_t *sctx);
 107 static reg_errcode_t update_cur_sifted_state (const regex_t *preg,
 108                                               re_match_context_t *mctx,
 109                                               re_sift_context_t *sctx,
 110                                               int str_idx,
 111                                               re_node_set *dest_nodes);
 112 static reg_errcode_t add_epsilon_src_nodes (re_dfa_t *dfa,
 113                                             re_node_set *dest_nodes,
 114                                             const re_node_set *candidates);
 115 static reg_errcode_t sub_epsilon_src_nodes (re_dfa_t *dfa, int node,
 116                                             re_node_set *dest_nodes,
 117                                             const re_node_set *and_nodes);
 118 static int check_dst_limits (re_dfa_t *dfa, re_node_set *limits,
 119                              re_match_context_t *mctx, int dst_node,
 120                              int dst_idx, int src_node, int src_idx);
 121 static int check_dst_limits_calc_pos (re_dfa_t *dfa, re_match_context_t *mctx,
 122                                       int limit, re_node_set *eclosures,
 123                                       int subexp_idx, int node, int str_idx);
 124 static reg_errcode_t check_subexp_limits (re_dfa_t *dfa,
 125                                           re_node_set *dest_nodes,
 126                                           const re_node_set *candidates,
 127                                           re_node_set *limits,
 128                                           struct re_backref_cache_entry *bkref_ents,
 129                                           int str_idx);
 130 static reg_errcode_t search_subexp (const regex_t *preg,
 131                                     re_match_context_t *mctx,
 132                                     re_sift_context_t *sctx, int str_idx,
 133                                     re_node_set *dest_nodes);
 134 static reg_errcode_t sift_states_bkref (const regex_t *preg,
 135                                         re_match_context_t *mctx,
 136                                         re_sift_context_t *sctx,
 137                                         int str_idx, re_node_set *dest_nodes);
 138 static reg_errcode_t clean_state_log_if_need (re_match_context_t *mctx,
 139                                               int next_state_log_idx);
 140 static reg_errcode_t merge_state_array (re_dfa_t *dfa, re_dfastate_t **dst,
 141                                         re_dfastate_t **src, int num);
 142 static re_dfastate_t *transit_state (reg_errcode_t *err, const regex_t *preg,
 143                                      re_match_context_t *mctx,
 144                                      re_dfastate_t *state, int fl_search);
 145 static re_dfastate_t *transit_state_sb (reg_errcode_t *err, const regex_t *preg,
 146                                         re_dfastate_t *pstate,
 147                                         int fl_search,
 148                                         re_match_context_t *mctx);
 149 #ifdef RE_ENABLE_I18N
 150 static reg_errcode_t transit_state_mb (const regex_t *preg,
 151                                        re_dfastate_t *pstate,
 152                                        re_match_context_t *mctx);
 153 #endif /* RE_ENABLE_I18N */
 154 static reg_errcode_t transit_state_bkref (const regex_t *preg,
 155                                           re_dfastate_t *pstate,
 156                                           re_match_context_t *mctx);
 157 static reg_errcode_t transit_state_bkref_loop (const regex_t *preg,
 158                                                re_node_set *nodes,
 159                                                re_dfastate_t **work_state_log,
 160                                                re_match_context_t *mctx);
 161 static re_dfastate_t **build_trtable (const regex_t *dfa,
 162                                       const re_dfastate_t *state,
 163                                       int fl_search);
 164 #ifdef RE_ENABLE_I18N
 165 static int check_node_accept_bytes (const regex_t *preg, int node_idx,
 166                                     const re_string_t *input, int idx);
 167 # ifdef _LIBC
 168 static unsigned int find_collation_sequence_value (const unsigned char *mbs,
 169                                                    size_t name_len);
 170 # endif /* _LIBC */
 171 #endif /* RE_ENABLE_I18N */
 172 static int group_nodes_into_DFAstates (const regex_t *dfa,
 173                                        const re_dfastate_t *state,
 174                                        re_node_set *states_node,
 175                                        bitset *states_ch);
 176 static int check_node_accept (const regex_t *preg, const re_token_t *node,
 177                               const re_match_context_t *mctx, int idx);
 178 static reg_errcode_t extend_buffers (re_match_context_t *mctx);
 179 \f
 180 /* Entry point for POSIX code.  */
 181
 182 /* regexec searches for a given pattern, specified by PREG, in the
 183    string STRING.
 184
 185    If NMATCH is zero or REG_NOSUB was set in the cflags argument to
 186    `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
 187    least NMATCH elements, and we set them to the offsets of the
 188    corresponding matched substrings.
 189
 190    EFLAGS specifies `execution flags' which affect matching: if
 191    REG_NOTBOL is set, then ^ does not match at the beginning of the
 192    string; if REG_NOTEOL is set, then $ does not match at the end.
 193
 194    We return 0 if we find a match and REG_NOMATCH if not.  */
 195
 196 int
 197 regexec (preg, string, nmatch, pmatch, eflags)
 198     const regex_t *__restrict preg;
 199     const char *__restrict string;
 200     size_t nmatch;
 201     regmatch_t pmatch[];
 202     int eflags;
 203 {
 204   reg_errcode_t err;
 205   int length = strlen (string);
 206   if (preg->no_sub)
 207     err = re_search_internal (preg, string, length, 0, length, length, 0,
 208                               NULL, eflags);
 209   else
 210     err = re_search_internal (preg, string, length, 0, length, length, nmatch,
 211                               pmatch, eflags);
 212   return err != REG_NOERROR;
 213 }
 214 #ifdef _LIBC
 215 weak_alias (__regexec, regexec)
 216 #endif
 217
 218 /* Entry points for GNU code.  */
 219
 220 /* re_match, re_search, re_match_2, re_search_2
 221
 222    The former two functions operate on STRING with length LENGTH,
 223    while the later two operate on concatenation of STRING1 and STRING2
 224    with lengths LENGTH1 and LENGTH2, respectively.
 225
 226    re_match() matches the compiled pattern in BUFP against the string,
 227    starting at index START.
 228
 229    re_search() first tries matching at index START, then it tries to match
 230    starting from index START + 1, and so on.  The last start position tried
 231    is START + RANGE.  (Thus RANGE = 0 forces re_search to operate the same
 232    way as re_match().)
 233
 234    The parameter STOP of re_{match,search}_2 specifies that no match exceeding
 235    the first STOP characters of the concatenation of the strings should be
 236    concerned.
 237
 238    If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match
 239    and all groups is stroed in REGS.  (For the "_2" variants, the offsets are
 240    computed relative to the concatenation, not relative to the individual
 241    strings.)
 242
 243    On success, re_match* functions return the length of the match, re_search*
 244    return the position of the start of the match.  Return value -1 means no
 245    match was found and -2 indicates an internal error.  */
 246
 247 int
 248 re_match (bufp, string, length, start, regs)
 249     struct re_pattern_buffer *bufp;
 250     const char *string;
 251     int length, start;
 252     struct re_registers *regs;
 253 {
 254   return re_search_stub (bufp, string, length, start, 0, length, regs, 1);
 255 }
 256 #ifdef _LIBC
 257 weak_alias (__re_match, re_match)
 258 #endif
 259
 260 int
 261 re_search (bufp, string, length, start, range, regs)
 262     struct re_pattern_buffer *bufp;
 263     const char *string;
 264     int length, start, range;
 265     struct re_registers *regs;
 266 {
 267   return re_search_stub (bufp, string, length, start, range, length, regs, 0);
 268 }
 269 #ifdef _LIBC
 270 weak_alias (__re_search, re_search)
 271 #endif
 272
 273 int
 274 re_match_2 (bufp, string1, length1, string2, length2, start, regs, stop)
 275     struct re_pattern_buffer *bufp;
 276     const char *string1, *string2;
 277     int length1, length2, start, stop;
 278     struct re_registers *regs;
 279 {
 280   return re_search_2_stub (bufp, string1, length1, string2, length2,
 281                            start, 0, regs, stop, 1);
 282 }
 283 #ifdef _LIBC
 284 weak_alias (__re_match_2, re_match_2)
 285 #endif
 286
 287 int
 288 re_search_2 (bufp, string1, length1, string2, length2, start, range, regs, stop)
 289     struct re_pattern_buffer *bufp;
 290     const char *string1, *string2;
 291     int length1, length2, start, range, stop;
 292     struct re_registers *regs;
 293 {
 294   return re_search_2_stub (bufp, string1, length1, string2, length2,
 295                            start, range, regs, stop, 0);
 296 }
 297 #ifdef _LIBC
 298 weak_alias (__re_search_2, re_search_2)
 299 #endif
 300
 301 static int
 302 re_search_2_stub (bufp, string1, length1, string2, length2, start, range, regs,
 303                   stop, ret_len)
 304     struct re_pattern_buffer *bufp;
 305     const char *string1, *string2;
 306     int length1, length2, start, range, stop, ret_len;
 307     struct re_registers *regs;
 308 {
 309   const char *str;
 310   int rval;
 311   int len = length1 + length2;
 312   int free_str = 0;
 313
 314   if (BE (length1 < 0 || length2 < 0 || stop < 0, 0))
 315     return -2;
 316
 317   /* Concatenate the strings.  */
 318   if (length2 > 0)
 319     if (length1 > 0)
 320       {
 321         char *s = re_malloc (char, len);
 322
 323         if (BE (s == NULL, 0))
 324           return -2;
 325         memcpy (s, string1, length1);
 326         memcpy (s + length1, string2, length2);
 327         str = s;
 328         free_str = 1;
 329       }
 330     else
 331       str = string2;
 332   else
 333     str = string1;
 334
 335   rval = re_search_stub (bufp, str, len, start, range, stop, regs,
 336                          ret_len);
 337   if (free_str)
 338       re_free ((char *) str);
 339   return rval;
 340 }
 341
 342 /* The parameters have the same meaning as those of re_search.
 343    Additional parameters:
 344    If RET_LEN is nonzero the length of the match is returned (re_match style);
 345    otherwise the position of the match is returned.  */
 346
 347 static int
 348 re_search_stub (bufp, string, length, start, range, stop, regs, ret_len)
 349     struct re_pattern_buffer *bufp;
 350     const char *string;
 351     int length, start, range, stop, ret_len;
 352     struct re_registers *regs;
 353 {
 354   reg_errcode_t result;
 355   regmatch_t *pmatch;
 356   int nregs, rval;
 357   int eflags = 0;
 358
 359   /* Check for out-of-range.  */
 360   if (BE (start < 0 || start > length || range < 0, 0))
 361     return -1;
 362   if (BE (start + range > length, 0))
 363     range = length - start;
 364
 365   eflags |= (bufp->not_bol) ? REG_NOTBOL : 0;
 366   eflags |= (bufp->not_eol) ? REG_NOTEOL : 0;
 367
 368   /* Compile fastmap if we haven't yet.  */
 369   if (range > 0 && bufp->fastmap != NULL && !bufp->fastmap_accurate)
 370     re_compile_fastmap (bufp);
 371
 372   if (BE (bufp->no_sub, 0))
 373     regs = NULL;
 374
 375   /* We need at least 1 register.  */
 376   if (regs == NULL)
 377     nregs = 1;
 378   else if (BE (bufp->regs_allocated == REGS_FIXED &&
 379                regs->num_regs < bufp->re_nsub + 1, 0))
 380     {
 381       nregs = regs->num_regs;
 382       if (BE (nregs < 1, 0))
 383         {
 384           /* Nothing can be copied to regs.  */
 385           regs = NULL;
 386           nregs = 1;
 387         }
 388     }
 389   else
 390     nregs = bufp->re_nsub + 1;
 391   pmatch = re_malloc (regmatch_t, nregs);
 392   if (BE (pmatch == NULL, 0))
 393     return -2;
 394
 395   result = re_search_internal (bufp, string, length, start, range, stop,
 396                                nregs, pmatch, eflags);
 397
 398   rval = 0;
 399
 400   /* I hope we needn't fill ther regs with -1's when no match was found.  */
 401   if (result != REG_NOERROR)
 402     rval = -1;
 403   else if (regs != NULL)
 404     {
 405       /* If caller wants register contents data back, copy them.  */
 406       bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,
 407                                            bufp->regs_allocated);
 408       if (BE (bufp->regs_allocated == REGS_UNALLOCATED, 0))
 409         rval = -2;
 410     }
 411
 412   if (BE (rval == 0, 1))
 413     {
 414       if (ret_len)
 415         {
 416           assert (pmatch[0].rm_so == start);
 417           rval = pmatch[0].rm_eo - start;
 418         }
 419       else
 420         rval = pmatch[0].rm_so;
 421     }
 422   re_free (pmatch);
 423   return rval;
 424 }
 425
 426 static unsigned
 427 re_copy_regs (regs, pmatch, nregs, regs_allocated)
 428     struct re_registers *regs;
 429     regmatch_t *pmatch;
 430     int nregs, regs_allocated;
 431 {
 432   int rval = REGS_REALLOCATE;
 433   int i;
 434   int need_regs = nregs + 1;
 435   /* We need one extra element beyond `num_regs' for the `-1' marker GNU code
 436      uses.  */
 437
 438   /* Have the register data arrays been allocated?  */
 439   if (regs_allocated == REGS_UNALLOCATED)
 440     { /* No.  So allocate them with malloc.  */
 441       regs->start = re_malloc (regoff_t, need_regs);
 442       if (BE (regs->start == NULL, 0))
 443         return REGS_UNALLOCATED;
 444       regs->end = re_malloc (regoff_t, need_regs);
 445       if (BE (regs->end == NULL, 0))
 446         {
 447           re_free (regs->start);
 448           return REGS_UNALLOCATED;
 449         }
 450       regs->num_regs = need_regs;
 451     }
 452   else if (regs_allocated == REGS_REALLOCATE)
 453     { /* Yes.  If we need more elements than were already
 454          allocated, reallocate them.  If we need fewer, just
 455          leave it alone.  */
 456       if (need_regs > regs->num_regs)
 457         {
 458           regs->start = re_realloc (regs->start, regoff_t, need_regs);
 459           if (BE (regs->start == NULL, 0))
 460             {
 461               if (regs->end != NULL)
 462                 re_free (regs->end);
 463               return REGS_UNALLOCATED;
 464             }
 465           regs->end = re_realloc (regs->end, regoff_t, need_regs);
 466           if (BE (regs->end == NULL, 0))
 467             {
 468               re_free (regs->start);
 469               return REGS_UNALLOCATED;
 470             }
 471           regs->num_regs = need_regs;
 472         }
 473     }
 474   else
 475     {
 476       assert (regs_allocated == REGS_FIXED);
 477       /* This function may not be called with REGS_FIXED and nregs too big.  */
 478       assert (regs->num_regs >= nregs);
 479       rval = REGS_FIXED;
 480     }
 481
 482   /* Copy the regs.  */
 483   for (i = 0; i < nregs; ++i)
 484     {
 485       regs->start[i] = pmatch[i].rm_so;
 486       regs->end[i] = pmatch[i].rm_eo;
 487     }
 488   for ( ; i < regs->num_regs; ++i)
 489     regs->start[i] = regs->end[i] = -1;
 490
 491   return rval;
 492 }
 493
 494 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
 495    ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
 496    this memory for recording register information.  STARTS and ENDS
 497    must be allocated using the malloc library routine, and must each
 498    be at least NUM_REGS * sizeof (regoff_t) bytes long.
 499
 500    If NUM_REGS == 0, then subsequent matches should allocate their own
 501    register data.
 502
 503    Unless this function is called, the first search or match using
 504    PATTERN_BUFFER will allocate its own register data, without
 505    freeing the old data.  */
 506
 507 void
 508 re_set_registers (bufp, regs, num_regs, starts, ends)
 509     struct re_pattern_buffer *bufp;
 510     struct re_registers *regs;
 511     unsigned num_regs;
 512     regoff_t *starts, *ends;
 513 {
 514   if (num_regs)
 515     {
 516       bufp->regs_allocated = REGS_REALLOCATE;
 517       regs->num_regs = num_regs;
 518       regs->start = starts;
 519       regs->end = ends;
 520     }
 521   else
 522     {
 523       bufp->regs_allocated = REGS_UNALLOCATED;
 524       regs->num_regs = 0;
 525       regs->start = regs->end = (regoff_t *) 0;
 526     }
 527 }
 528 #ifdef _LIBC
 529 weak_alias (__re_set_registers, re_set_registers)
 530 #endif
 531 \f
 532 /* Entry points compatible with 4.2 BSD regex library.  We don't define
 533    them unless specifically requested.  */
 534
 535 #if defined _REGEX_RE_COMP || defined _LIBC
 536 int
 537 # ifdef _LIBC
 538 weak_function
 539 # endif
 540 re_exec (s)
 541      const char *s;
 542 {
 543   return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);
 544 }
 545 #endif /* _REGEX_RE_COMP */
 546 \f
 547 static re_node_set empty_set;
 548
 549 /* Internal entry point.  */
 550
 551 /* Searches for a compiled pattern PREG in the string STRING, whose
 552    length is LENGTH.  NMATCH, PMATCH, and EFLAGS have the same
 553    mingings with regexec.  START, and RANGE have the same meanings
 554    with re_search.
 555    Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
 556    otherwise return the error code.
 557    Note: We assume front end functions already check ranges.
 558    (START + RANGE >= 0 && START + RANGE <= LENGTH)  */
 559
 560 static reg_errcode_t
 561 re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch,
 562                     eflags)
 563     const regex_t *preg;
 564     const char *string;
 565     int length, start, range, stop, eflags;
 566     size_t nmatch;
 567     regmatch_t pmatch[];
 568 {
 569   reg_errcode_t err;
 570   re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
 571   re_string_t input;
 572   int left_lim, right_lim, incr;
 573   int fl_longest_match, match_first, match_last = -1;
 574   re_match_context_t mctx;
 575   char *fastmap = ((preg->fastmap != NULL && preg->fastmap_accurate)
 576                    ? preg->fastmap : NULL);
 577
 578   /* Check if the DFA haven't been compiled.  */
 579   if (BE (preg->used == 0 || dfa->init_state == NULL
 580           || dfa->init_state_word == NULL || dfa->init_state_nl == NULL
 581           || dfa->init_state_begbuf == NULL, 0))
 582     return REG_NOMATCH;
 583
 584   re_node_set_init_empty (&empty_set);
 585
 586   /* We must check the longest matching, if nmatch > 0.  */
 587   fl_longest_match = (nmatch != 0);
 588
 589   err = re_string_allocate (&input, string, length, dfa->nodes_len + 1,
 590                             preg->translate, preg->syntax & RE_ICASE);
 591   if (BE (err != REG_NOERROR, 0))
 592     return err;
 593   input.stop = stop;
 594
 595   err = match_ctx_init (&mctx, eflags, &input, dfa->nbackref * 2);
 596   if (BE (err != REG_NOERROR, 0))
 597     return err;
 598
 599   /* We will log all the DFA states through which the dfa pass,
 600      if nmatch > 1, or this dfa has "multibyte node", which is a
 601      back-reference or a node which can accept multibyte character or
 602      multi character collating element.  */
 603   if (nmatch > 1 || dfa->has_mb_node)
 604     {
 605       mctx.state_log = re_malloc (re_dfastate_t *, dfa->nodes_len + 1);
 606       if (BE (mctx.state_log == NULL, 0))
 607         return REG_ESPACE;
 608     }
 609   else
 610     mctx.state_log = NULL;
 611
 612 #ifdef DEBUG
 613   /* We assume front-end functions already check them.  */
 614   assert (start + range >= 0 && start + range <= length);
 615 #endif
 616
 617   match_first = start;
 618   input.tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
 619                        : CONTEXT_NEWLINE | CONTEXT_BEGBUF);
 620
 621   /* Check incrementally whether of not the input string match.  */
 622   incr = (range < 0) ? -1 : 1;
 623   left_lim = (range < 0) ? start + range : start;
 624   right_lim = (range < 0) ? start : start + range;
 625
 626   for (;;)
 627     {
 628       /* At first get the current byte from input string.  */
 629       int ch;
 630       if (MB_CUR_MAX > 1 && (preg->syntax & RE_ICASE || preg->translate))
 631         {
 632           /* In this case, we can't determin easily the current byte,
 633              since it might be a component byte of a multibyte character.
 634              Then we use the constructed buffer instead.  */
 635           /* If MATCH_FIRST is out of the valid range, reconstruct the
 636              buffers.  */
 637           if (input.raw_mbs_idx + input.valid_len <= match_first)
 638             re_string_reconstruct (&input, match_first, eflags,
 639                                    preg->newline_anchor);
 640           /* If MATCH_FIRST is out of the buffer, leave it as '\0'.
 641              Note that MATCH_FIRST must not be smaller than 0.  */
 642           ch = ((match_first >= length) ? 0
 643                 : re_string_byte_at (&input, match_first - input.raw_mbs_idx));
 644         }
 645       else
 646         {
 647           /* We apply translate/conversion manually, since it is trivial
 648              in this case.  */
 649           /* If MATCH_FIRST is out of the buffer, leave it as '\0'.
 650              Note that MATCH_FIRST must not be smaller than 0.  */
 651           ch = (match_first < length) ? (unsigned char)string[match_first] : 0;
 652           /* Apply translation if we need.  */
 653           ch = preg->translate ? preg->translate[ch] : ch;
 654           /* In case of case insensitive mode, convert to upper case.  */
 655           ch = ((preg->syntax & RE_ICASE) && islower (ch)) ? toupper (ch) : ch;
 656         }
 657
 658       /* Eliminate inappropriate one by fastmap.  */
 659       if (preg->can_be_null || fastmap == NULL || fastmap[ch])
 660         {
 661           /* Reconstruct the buffers so that the matcher can assume that
 662              the matching starts from the begining of the buffer.  */
 663           re_string_reconstruct (&input, match_first, eflags,
 664                                  preg->newline_anchor);
 665 #ifdef RE_ENABLE_I18N
 666           /* Eliminate it when it is a component of a multibyte character
 667              and isn't the head of a multibyte character.  */
 668           if (MB_CUR_MAX == 1 || re_string_first_byte (&input, 0))
 669 #endif
 670             {
 671               /* It seems to be appropriate one, then use the matcher.  */
 672               /* We assume that the matching starts from 0.  */
 673               mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
 674               match_last = check_matching (preg, &mctx, 0, fl_longest_match);
 675               if (match_last != -1)
 676                 {
 677                   if (BE (match_last == -2, 0))
 678                     return REG_ESPACE;
 679                   else
 680                     break; /* We found a matching.  */
 681                 }
 682             }
 683         }
 684       /* Update counter.  */
 685       match_first += incr;
 686       if (match_first < left_lim || right_lim < match_first)
 687         break;
 688     }
 689
 690   /* Set pmatch[] if we need.  */
 691   if (match_last != -1 && nmatch > 0)
 692     {
 693       int reg_idx;
 694
 695       /* Initialize registers.  */
 696       for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
 697         pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;
 698
 699       /* Set the points where matching start/end.  */
 700       pmatch[0].rm_so = 0;
 701       mctx.match_last = pmatch[0].rm_eo = match_last;
 702
 703       if (!preg->no_sub && nmatch > 1)
 704         {
 705           /* We need the ranges of all the subexpressions.  */
 706           int halt_node;
 707           re_dfastate_t **sifted_states;
 708           re_dfastate_t **lim_states = NULL;
 709           re_dfastate_t *pstate = mctx.state_log[match_last];
 710           re_sift_context_t sctx;
 711 #ifdef DEBUG
 712           assert (mctx.state_log != NULL);
 713 #endif
 714           halt_node = check_halt_state_context (preg, pstate, &mctx,
 715                                                 match_last);
 716           if (dfa->has_plural_match)
 717             {
 718               match_ctx_clear_flag (&mctx);
 719               sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
 720               if (BE (sifted_states == NULL, 0))
 721                 return REG_ESPACE;
 722               if (dfa->nbackref)
 723                 {
 724                   lim_states = calloc (sizeof (re_dfastate_t *),
 725                                        match_last + 1);
 726                   if (BE (lim_states == NULL, 0))
 727                     return REG_ESPACE;
 728                 }
 729               sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
 730                              mctx.match_last, 0);
 731               err = sift_states_backward (preg, &mctx, &sctx);
 732               if (BE (err != REG_NOERROR, 0))
 733                 return err;
 734               if (lim_states != NULL)
 735                 {
 736                   err = merge_state_array (dfa, sifted_states, lim_states,
 737                                            match_last + 1);
 738                   if (BE (err != REG_NOERROR, 0))
 739                     return err;
 740                   re_free (lim_states);
 741                 }
 742               re_node_set_free (&sctx.limits);
 743               re_free (mctx.state_log);
 744               mctx.state_log = sifted_states;
 745             }
 746           mctx.last_node = halt_node;
 747           err = set_regs (preg, &mctx, nmatch, pmatch,
 748                           dfa->has_plural_match && dfa->nbackref > 0);
 749           if (BE (err != REG_NOERROR, 0))
 750             return err;
 751         }
 752
 753       /* At last, add the offset to the each registers, since we slided
 754          the buffers so that We can assume that the matching starts from 0.  */
 755       for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
 756         if (pmatch[reg_idx].rm_so != -1)
 757           {
 758             pmatch[reg_idx].rm_so += match_first;
 759             pmatch[reg_idx].rm_eo += match_first;
 760           }
 761     }
 762
 763   re_free (mctx.state_log);
 764   if (dfa->nbackref)
 765     match_ctx_free (&mctx);
 766   re_string_destruct (&input);
 767
 768   return (match_last == -1) ? REG_NOMATCH : REG_NOERROR;
 769 }
 770
 771 /* Acquire an initial state and return it.
 772    We must select appropriate initial state depending on the context,
 773    since initial states may have constraints like "\<", "^", etc..  */
 774
 775 static inline re_dfastate_t *
 776 acquire_init_state_context (err, preg, mctx, idx)
 777      reg_errcode_t *err;
 778      const regex_t *preg;
 779      const re_match_context_t *mctx;
 780      int idx;
 781 {
 782   re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
 783
 784   *err = REG_NOERROR;
 785   if (dfa->init_state->has_constraint)
 786     {
 787       unsigned int context;
 788       context =  re_string_context_at (mctx->input, idx - 1, mctx->eflags,
 789                                        preg->newline_anchor);
 790       if (IS_WORD_CONTEXT (context))
 791         return dfa->init_state_word;
 792       else if (IS_ORDINARY_CONTEXT (context))
 793         return dfa->init_state;
 794       else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
 795         return dfa->init_state_begbuf;
 796       else if (IS_NEWLINE_CONTEXT (context))
 797         return dfa->init_state_nl;
 798       else if (IS_BEGBUF_CONTEXT (context))
 799         {
 800           /* It is relatively rare case, then calculate on demand.  */
 801           return  re_acquire_state_context (err, dfa,
 802                                             dfa->init_state->entrance_nodes,
 803                                             context);
 804         }
 805       else
 806         /* Must not happen?  */
 807         return dfa->init_state;
 808     }
 809   else
 810     return dfa->init_state;
 811 }
 812
 813 /* Check whether the regular expression match input string INPUT or not,
 814    and return the index where the matching end, return -1 if not match,
 815    or return -2 in case of an error.
 816    FL_SEARCH means we must search where the matching starts,
 817    FL_LONGEST_MATCH means we want the POSIX longest matching.
 818    Note that the matcher assume that the maching starts from the current
 819    index of the buffer.  */
 820
 821 static int
 822 check_matching (preg, mctx, fl_search, fl_longest_match)
 823     const regex_t *preg;
 824     re_match_context_t *mctx;
 825     int fl_search, fl_longest_match;
 826 {
 827   reg_errcode_t err;
 828   int match = 0;
 829   int match_last = -1;
 830   int cur_str_idx = re_string_cur_idx (mctx->input);
 831   re_dfastate_t *cur_state;
 832
 833   cur_state = acquire_init_state_context (&err, preg, mctx, cur_str_idx);
 834   /* An initial state must not be NULL(invalid state).  */
 835   if (BE (cur_state == NULL, 0))
 836     return -2;
 837   if (mctx->state_log != NULL)
 838     mctx->state_log[cur_str_idx] = cur_state;
 839
 840   if (cur_state->has_backref)
 841     {
 842       int i;
 843       re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
 844       for (i = 0; i < cur_state->nodes.nelem; ++i)
 845         {
 846           re_token_type_t type;
 847           int node = cur_state->nodes.elems[i];
 848           int entity = (dfa->nodes[node].type != OP_CONTEXT_NODE ? node
 849                         : dfa->nodes[node].opr.ctx_info->entity);
 850           type = dfa->nodes[entity].type;
 851           if (type == OP_BACK_REF)
 852             {
 853               int clexp_idx;
 854               for (clexp_idx = 0; clexp_idx < cur_state->nodes.nelem;
 855                    ++clexp_idx)
 856                 {
 857                   re_token_t *clexp_node;
 858                   clexp_node = dfa->nodes + cur_state->nodes.elems[clexp_idx];
 859                   if (clexp_node->type == OP_CLOSE_SUBEXP
 860                       && clexp_node->opr.idx + 1== dfa->nodes[entity].opr.idx)
 861                     {
 862                       err = match_ctx_add_entry (mctx, node, 0, 0, 0);
 863                       if (BE (err != REG_NOERROR, 0))
 864                         return -2;
 865                       break;
 866                     }
 867                 }
 868             }
 869         }
 870     }
 871
 872   /* If the RE accepts NULL string.  */
 873   if (cur_state->halt)
 874     {
 875       if (!cur_state->has_constraint
 876           || check_halt_state_context (preg, cur_state, mctx, cur_str_idx))
 877         {
 878           if (!fl_longest_match)
 879             return cur_str_idx;
 880           else
 881             {
 882               match_last = cur_str_idx;
 883               match = 1;
 884             }
 885         }
 886     }
 887
 888   while (!re_string_eoi (mctx->input))
 889     {
 890       cur_state = transit_state (&err, preg, mctx, cur_state,
 891                                  fl_search && !match);
 892       if (cur_state == NULL) /* Reached at the invalid state or an error.  */
 893         {
 894           cur_str_idx = re_string_cur_idx (mctx->input);
 895           if (BE (err != REG_NOERROR, 0))
 896             return -2;
 897           if (fl_search && !match)
 898             {
 899               /* Restart from initial state, since we are searching
 900                  the point from where matching start.  */
 901 #ifdef RE_ENABLE_I18N
 902               if (MB_CUR_MAX == 1
 903                   || re_string_first_byte (mctx->input, cur_str_idx))
 904 #endif /* RE_ENABLE_I18N */
 905                 cur_state = acquire_init_state_context (&err, preg, mctx,
 906                                                         cur_str_idx);
 907               if (BE (cur_state == NULL && err != REG_NOERROR, 0))
 908                 return -2;
 909               if (mctx->state_log != NULL)
 910                 mctx->state_log[cur_str_idx] = cur_state;
 911             }
 912           else if (!fl_longest_match && match)
 913             break;
 914           else /* (fl_longest_match && match) || (!fl_search && !match)  */
 915             {
 916               if (mctx->state_log == NULL)
 917                 break;
 918               else
 919                 {
 920                   int max = mctx->state_log_top;
 921                   for (; cur_str_idx <= max; ++cur_str_idx)
 922                     if (mctx->state_log[cur_str_idx] != NULL)
 923                       break;
 924                   if (cur_str_idx > max)
 925                     break;
 926                 }
 927             }
 928         }
 929
 930       if (cur_state != NULL && cur_state->halt)
 931         {
 932           /* Reached at a halt state.
 933              Check the halt state can satisfy the current context.  */
 934           if (!cur_state->has_constraint
 935               || check_halt_state_context (preg, cur_state, mctx,
 936                                            re_string_cur_idx (mctx->input)))
 937             {
 938               /* We found an appropriate halt state.  */
 939               match_last = re_string_cur_idx (mctx->input);
 940               match = 1;
 941               if (!fl_longest_match)
 942                 break;
 943             }
 944         }
 945    }
 946   return match_last;
 947 }
 948
 949 /* Check NODE match the current context.  */
 950
 951 static int check_halt_node_context (dfa, node, context)
 952     const re_dfa_t *dfa;
 953     int node;
 954     unsigned int context;
 955 {
 956   int entity;
 957   re_token_type_t type = dfa->nodes[node].type;
 958   if (type == END_OF_RE)
 959     return 1;
 960   if (type != OP_CONTEXT_NODE)
 961     return 0;
 962   entity = dfa->nodes[node].opr.ctx_info->entity;
 963   if (dfa->nodes[entity].type != END_OF_RE
 964       || NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[node].constraint, context))
 965     return 0;
 966   return 1;
 967 }
 968
 969 /* Check the halt state STATE match the current context.
 970    Return 0 if not match, if the node, STATE has, is a halt node and
 971    match the context, return the node.  */
 972
 973 static int
 974 check_halt_state_context (preg, state, mctx, idx)
 975     const regex_t *preg;
 976     const re_dfastate_t *state;
 977     const re_match_context_t *mctx;
 978     int idx;
 979 {
 980   re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
 981   int i;
 982   unsigned int context;
 983 #ifdef DEBUG
 984   assert (state->halt);
 985 #endif
 986   context = re_string_context_at (mctx->input, idx, mctx->eflags,
 987                                   preg->newline_anchor);
 988   for (i = 0; i < state->nodes.nelem; ++i)
 989     if (check_halt_node_context (dfa, state->nodes.elems[i], context))
 990       return state->nodes.elems[i];
 991   return 0;
 992 }
 993
 994 /* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
 995    corresponding to the DFA).
 996    Return the destination node, and update EPS_VIA_NODES, return -1 in case
 997    of errors.  */
 998
 999 static int
1000 proceed_next_node (preg, nregs, regs, mctx, pidx, node, eps_via_nodes, fs)
1001     const regex_t *preg;
1002     regmatch_t *regs;
1003     const re_match_context_t *mctx;
1004     int nregs, *pidx, node;
1005     re_node_set *eps_via_nodes;
1006     struct re_fail_stack_t *fs;
1007 {
1008   re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
1009   int i, err, dest_node, cur_entity;
1010   dest_node = -1;
1011   cur_entity = ((dfa->nodes[node].type == OP_CONTEXT_NODE)
1012                 ? dfa->nodes[node].opr.ctx_info->entity : node);
1013   if (IS_EPSILON_NODE (dfa->nodes[node].type))
1014     {
1015       int ndest, dest_nodes[2], dest_entities[2];
1016       err = re_node_set_insert (eps_via_nodes, node);
1017       if (BE (err < 0, 0))
1018         return -1;
1019       /* Pick up valid destinations.  */
1020       for (ndest = 0, i = 0; i < mctx->state_log[*pidx]->nodes.nelem; ++i)
1021         {
1022           int candidate = mctx->state_log[*pidx]->nodes.elems[i];
1023           int entity;
1024           entity = ((dfa->nodes[candidate].type == OP_CONTEXT_NODE)
1025                     ? dfa->nodes[candidate].opr.ctx_info->entity : candidate);
1026           if (!re_node_set_contains (dfa->edests + node, entity))
1027             continue;
1028           dest_nodes[0] = (ndest == 0) ? candidate : dest_nodes[0];
1029           dest_entities[0] = (ndest == 0) ? entity : dest_entities[0];
1030           dest_nodes[1] = (ndest == 1) ? candidate : dest_nodes[1];
1031           dest_entities[1] = (ndest == 1) ? entity : dest_entities[1];
1032           ++ndest;
1033         }
1034       if (ndest <= 1)
1035         return ndest == 0 ? -1 : (ndest == 1 ? dest_nodes[0] : 0);
1036       if (dest_entities[0] > dest_entities[1])
1037         {
1038           int swap_work = dest_nodes[0];
1039           dest_nodes[0] = dest_nodes[1];
1040           dest_nodes[1] = swap_work;
1041         }
1042       /* In order to avoid infinite loop like "(a*)*".  */
1043       if (re_node_set_contains (eps_via_nodes, dest_nodes[0]))
1044         return dest_nodes[1];
1045       if (fs != NULL)
1046         push_fail_stack (fs, *pidx, dest_nodes, nregs, regs, eps_via_nodes);
1047       return dest_nodes[0];
1048     }
1049   else
1050     {
1051       int naccepted = 0, entity = node;
1052       re_token_type_t type = dfa->nodes[node].type;
1053       if (type == OP_CONTEXT_NODE)
1054         {
1055           entity = dfa->nodes[node].opr.ctx_info->entity;
1056           type = dfa->nodes[entity].type;
1057         }
1058
1059 #ifdef RE_ENABLE_I18N
1060       if (ACCEPT_MB_NODE (type))
1061         naccepted = check_node_accept_bytes (preg, entity, mctx->input, *pidx);
1062       else
1063 #endif /* RE_ENABLE_I18N */
1064       if (type == OP_BACK_REF)
1065         {
1066           int subexp_idx = dfa->nodes[entity].opr.idx;
1067           naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
1068           if (fs != NULL)
1069             {
1070               if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1)
1071                 return -1;
1072               else if (naccepted)
1073                 {
1074                   char *buf = re_string_get_buffer (mctx->input);
1075                   if (strncmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
1076                                naccepted) != 0)
1077                     return -1;
1078                 }
1079             }
1080
1081           if (naccepted == 0)
1082             {
1083               err = re_node_set_insert (eps_via_nodes, node);
1084               if (BE (err < 0, 0))
1085                 return -2;
1086               dest_node = dfa->nexts[node];
1087               if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1088                                         dest_node))
1089                 return dest_node;
1090               for (i = 0; i < mctx->state_log[*pidx]->nodes.nelem; ++i)
1091                 {
1092                   dest_node = mctx->state_log[*pidx]->nodes.elems[i];
1093                   if ((dfa->nodes[dest_node].type == OP_CONTEXT_NODE
1094                        && (dfa->nexts[node]
1095                            == dfa->nodes[dest_node].opr.ctx_info->entity)))
1096                     return dest_node;
1097                 }
1098             }
1099         }
1100
1101       if (naccepted != 0
1102           || check_node_accept (preg, dfa->nodes + node, mctx, *pidx))
1103         {
1104           dest_node = dfa->nexts[node];
1105           *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted;
1106           if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL
1107                      || !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1108                                                dest_node)))
1109             return -1;
1110           re_node_set_empty (eps_via_nodes);
1111           return dest_node;
1112         }
1113     }
1114   return -1;
1115 }
1116
1117 static reg_errcode_t
1118 push_fail_stack (fs, str_idx, dests, nregs, regs, eps_via_nodes)
1119      struct re_fail_stack_t *fs;
1120      int str_idx, *dests, nregs;
1121      regmatch_t *regs;
1122      re_node_set *eps_via_nodes;
1123 {
1124   reg_errcode_t err;
1125   int num = fs->num++;
1126   if (fs->num == fs->alloc)
1127     {
1128       fs->alloc *= 2;
1129       fs->stack = realloc (fs->stack, (sizeof (struct re_fail_stack_ent_t)
1130                                        * fs->alloc));
1131       if (fs->stack == NULL)
1132         return REG_ESPACE;
1133     }
1134   fs->stack[num].idx = str_idx;
1135   fs->stack[num].node = dests[1];
1136   fs->stack[num].regs = re_malloc (regmatch_t, nregs);
1137   memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
1138   err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
1139   return err;
1140 }
1141
1142 static int
1143 pop_fail_stack (fs, pidx, nregs, regs, eps_via_nodes)
1144      struct re_fail_stack_t *fs;
1145      int *pidx, nregs;
1146      regmatch_t *regs;
1147      re_node_set *eps_via_nodes;
1148 {
1149   int num = --fs->num;
1150   assert (num >= 0);
1151  *pidx = fs->stack[num].idx;
1152   memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
1153   re_node_set_free (eps_via_nodes);
1154   *eps_via_nodes = fs->stack[num].eps_via_nodes;
1155   return fs->stack[num].node;
1156 }
1157
1158 /* Set the positions where the subexpressions are starts/ends to registers
1159    PMATCH.
1160    Note: We assume that pmatch[0] is already set, and
1161    pmatch[i].rm_so == pmatch[i].rm_eo == -1 (i > 1).  */
1162
1163 static reg_errcode_t
1164 set_regs (preg, mctx, nmatch, pmatch, fl_backtrack)
1165      const regex_t *preg;
1166      const re_match_context_t *mctx;
1167      size_t nmatch;
1168      regmatch_t *pmatch;
1169      int fl_backtrack;
1170 {
1171   re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
1172   int idx, cur_node, real_nmatch;
1173   re_node_set eps_via_nodes;
1174   struct re_fail_stack_t *fs;
1175   struct re_fail_stack_t fs_body = {0, 2, NULL};
1176 #ifdef DEBUG
1177   assert (nmatch > 1);
1178   assert (mctx->state_log != NULL);
1179 #endif
1180   if (fl_backtrack)
1181     {
1182       fs = &fs_body;
1183       fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc);
1184     }
1185   else
1186     fs = NULL;
1187   cur_node = dfa->init_node;
1188   real_nmatch = (nmatch <= preg->re_nsub) ? nmatch : preg->re_nsub + 1;
1189   re_node_set_init_empty (&eps_via_nodes);
1190   for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
1191     {
1192       update_regs (dfa, pmatch, cur_node, idx, real_nmatch);
1193       if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
1194         {
1195           int reg_idx;
1196           if (fs)
1197             {
1198               for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
1199                 if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)
1200                   break;
1201               if (reg_idx == nmatch)
1202                 return REG_NOERROR;
1203               cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1204                                          &eps_via_nodes);
1205             }
1206           else
1207             return REG_NOERROR;
1208         }
1209
1210       /* Proceed to next node.  */
1211       cur_node = proceed_next_node (preg, nmatch, pmatch, mctx, &idx, cur_node,
1212                                     &eps_via_nodes, fs);
1213
1214       if (BE (cur_node < 0, 0))
1215         {
1216           if (cur_node == -2)
1217             return REG_ESPACE;
1218           if (fs)
1219             cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1220                                        &eps_via_nodes);
1221           else
1222             return REG_NOMATCH;
1223         }
1224     }
1225   re_node_set_free (&eps_via_nodes);
1226   return REG_NOERROR;
1227 }
1228
1229 static void
1230 update_regs (dfa, pmatch, cur_node, cur_idx, nmatch)
1231      re_dfa_t *dfa;
1232      regmatch_t *pmatch;
1233      int cur_node, cur_idx, nmatch;
1234 {
1235   int type = dfa->nodes[cur_node].type;
1236   int reg_num;
1237   if (type != OP_OPEN_SUBEXP && type != OP_CLOSE_SUBEXP)
1238     return;
1239   reg_num = dfa->nodes[cur_node].opr.idx + 1;
1240   if (reg_num >= nmatch)
1241     return;
1242   if (type == OP_OPEN_SUBEXP)
1243     {
1244       /* We are at the first node of this sub expression.  */
1245       pmatch[reg_num].rm_so = cur_idx;
1246       pmatch[reg_num].rm_eo = -1;
1247     }
1248   else if (type == OP_CLOSE_SUBEXP)
1249     /* We are at the first node of this sub expression.  */
1250     pmatch[reg_num].rm_eo = cur_idx;
1251 }
1252
1253 #define NUMBER_OF_STATE 1
1254
1255 /* This function checks the STATE_LOG from the SCTX->last_str_idx to 0
1256    and sift the nodes in each states according to the following rules.
1257    Updated state_log will be wrote to STATE_LOG.
1258
1259    Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if...
1260      1. When STR_IDX == MATCH_LAST(the last index in the state_log):
1261         If `a' isn't the LAST_NODE and `a' can't epsilon transit to
1262         the LAST_NODE, we throw away the node `a'.
1263      2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts
1264         string `s' and transit to `b':
1265         i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
1266            away the node `a'.
1267         ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
1268             throwed away, we throw away the node `a'.
1269      3. When 0 <= STR_IDX < n and 'a' epsilon transit to 'b':
1270         i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
1271            node `a'.
1272         ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is throwed away,
1273             we throw away the node `a'.  */
1274
1275 #define STATE_NODE_CONTAINS(state,node) \
1276   ((state) != NULL && re_node_set_contains (&(state)->nodes, node))
1277
1278 static reg_errcode_t
1279 sift_states_backward (preg, mctx, sctx)
1280      const regex_t *preg;
1281      re_match_context_t *mctx;
1282      re_sift_context_t *sctx;
1283 {
1284   reg_errcode_t err;
1285   re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
1286   int null_cnt = 0;
1287   int str_idx = sctx->last_str_idx;
1288   re_node_set cur_dest;
1289   re_node_set *cur_src; /* Points the state_log[str_idx]->nodes  */
1290
1291 #ifdef DEBUG
1292   assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
1293 #endif
1294   cur_src = &mctx->state_log[str_idx]->nodes;
1295
1296   /* Build sifted state_log[str_idx].  It has the nodes which can epsilon
1297      transit to the last_node and the last_node itself.  */
1298   err = re_node_set_init_1 (&cur_dest, sctx->last_node);
1299   if (BE (err != REG_NOERROR, 0))
1300     return err;
1301   err = update_cur_sifted_state (preg, mctx, sctx, str_idx, &cur_dest);
1302   if (BE (err != REG_NOERROR, 0))
1303     return err;
1304
1305   /* Then check each states in the state_log.  */
1306   while (str_idx > 0)
1307     {
1308       int i, ret;
1309       /* Update counters.  */
1310       null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0;
1311       if (null_cnt > mctx->max_mb_elem_len)
1312         {
1313           memset (sctx->sifted_states, '\0',
1314                   sizeof (re_dfastate_t *) * str_idx);
1315           return REG_NOERROR;
1316         }
1317       re_node_set_empty (&cur_dest);
1318       --str_idx;
1319       cur_src = ((mctx->state_log[str_idx] == NULL) ? &empty_set
1320                  : &mctx->state_log[str_idx]->nodes);
1321
1322       /* Then build the next sifted state.
1323          We build the next sifted state on `cur_dest', and update
1324          `sifted_states[str_idx]' with `cur_dest'.
1325          Note:
1326          `cur_dest' is the sifted state from `state_log[str_idx + 1]'.
1327          `cur_src' points the node_set of the old `state_log[str_idx]'.  */
1328       for (i = 0; i < cur_src->nelem; i++)
1329         {
1330           int prev_node = cur_src->elems[i];
1331           int entity = prev_node;
1332           int naccepted = 0;
1333           re_token_type_t type = dfa->nodes[prev_node].type;
1334
1335           if (IS_EPSILON_NODE(type))
1336             continue;
1337           if (type == OP_CONTEXT_NODE)
1338             {
1339               entity = dfa->nodes[prev_node].opr.ctx_info->entity;
1340               type = dfa->nodes[entity].type;
1341             }
1342 #ifdef RE_ENABLE_I18N
1343           /* If the node may accept `multi byte'.  */
1344           if (ACCEPT_MB_NODE (type))
1345             naccepted = sift_states_iter_mb (preg, mctx, sctx, entity, str_idx,
1346                                              sctx->last_str_idx);
1347
1348 #endif /* RE_ENABLE_I18N */
1349           /* We don't check backreferences here.
1350              See update_cur_sifted_state().  */
1351
1352           if (!naccepted
1353               && check_node_accept (preg, dfa->nodes + prev_node, mctx,
1354                                     str_idx)
1355               && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1],
1356                                       dfa->nexts[prev_node]))
1357             naccepted = 1;
1358
1359           if (naccepted == 0)
1360             continue;
1361
1362           if (sctx->limits.nelem)
1363             {
1364               int to_idx = str_idx + naccepted;
1365               if (check_dst_limits (dfa, &sctx->limits, mctx,
1366                                     dfa->nexts[prev_node], to_idx,
1367                                     prev_node, str_idx))
1368                 continue;
1369             }
1370           ret = re_node_set_insert (&cur_dest, prev_node);
1371           if (BE (ret == -1, 0))
1372             return err;
1373         }
1374
1375       /* Add all the nodes which satisfy the following conditions:
1376          - It can epsilon transit to a node in CUR_DEST.
1377          - It is in CUR_SRC.
1378          And update state_log.  */
1379       err = update_cur_sifted_state (preg, mctx, sctx, str_idx, &cur_dest);
1380       if (BE (err != REG_NOERROR, 0))
1381         return err;
1382     }
1383
1384   re_node_set_free (&cur_dest);
1385   return REG_NOERROR;
1386 }
1387
1388 /* Helper functions.  */
1389
1390 static inline reg_errcode_t
1391 clean_state_log_if_need (mctx, next_state_log_idx)
1392     re_match_context_t *mctx;
1393     int next_state_log_idx;
1394 {
1395   int top = mctx->state_log_top;
1396
1397   if (next_state_log_idx >= mctx->input->bufs_len
1398       || (next_state_log_idx >= mctx->input->valid_len
1399           && mctx->input->valid_len < mctx->input->len))
1400     {
1401       reg_errcode_t err;
1402       err = extend_buffers (mctx);
1403       if (BE (err != REG_NOERROR, 0))
1404         return err;
1405     }
1406
1407   if (top < next_state_log_idx)
1408     {
1409       memset (mctx->state_log + top + 1, '\0',
1410               sizeof (re_dfastate_t *) * (next_state_log_idx - top));
1411       mctx->state_log_top = next_state_log_idx;
1412     }
1413   return REG_NOERROR;
1414 }
1415
1416 static reg_errcode_t merge_state_array (dfa, dst, src, num)
1417      re_dfa_t *dfa;
1418      re_dfastate_t **dst;
1419      re_dfastate_t **src;
1420      int num;
1421 {
1422   int st_idx;
1423   reg_errcode_t err;
1424   for (st_idx = 0; st_idx < num; ++st_idx)
1425     {
1426       if (dst[st_idx] == NULL)
1427         dst[st_idx] = src[st_idx];
1428       else if (src[st_idx] != NULL)
1429         {
1430           re_node_set merged_set;
1431           err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,
1432                                         &src[st_idx]->nodes);
1433           if (BE (err != REG_NOERROR, 0))
1434             return err;
1435           dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);
1436           if (BE (err != REG_NOERROR, 0))
1437             return err;
1438           re_node_set_free (&merged_set);
1439         }
1440     }
1441   return REG_NOERROR;
1442 }
1443
1444 static reg_errcode_t
1445 update_cur_sifted_state (preg, mctx, sctx, str_idx, dest_nodes)
1446      const regex_t *preg;
1447      re_match_context_t *mctx;
1448      re_sift_context_t *sctx;
1449      int str_idx;
1450      re_node_set *dest_nodes;
1451 {
1452   reg_errcode_t err;
1453   re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
1454   const re_node_set *candidates;
1455   candidates = ((mctx->state_log[str_idx] == NULL) ? &empty_set
1456                 : &mctx->state_log[str_idx]->nodes);
1457
1458   /* At first, add the nodes which can epsilon transit to a node in
1459      DEST_NODE.  */
1460   err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
1461   if (BE (err != REG_NOERROR, 0))
1462     return err;
1463
1464   /* Then, check the limitations in the current sift_context.  */
1465   if (sctx->limits.nelem)
1466     {
1467       err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
1468                                  mctx->bkref_ents, str_idx);
1469       if (BE (err != REG_NOERROR, 0))
1470         return err;
1471     }
1472
1473   /* Update state_log.  */
1474   sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
1475   if (BE (sctx->sifted_states[str_idx] == NULL && err != REG_NOERROR, 0))
1476     return err;
1477
1478   /* If we are searching for the subexpression candidates.
1479      Note that we were from transit_state_bkref_loop() in this case.  */
1480   if (sctx->check_subexp)
1481     {
1482       err = search_subexp (preg, mctx, sctx, str_idx, dest_nodes);
1483       if (BE (err != REG_NOERROR, 0))
1484         return err;
1485     }
1486
1487   if ((mctx->state_log[str_idx] != NULL
1488        && mctx->state_log[str_idx]->has_backref))
1489     {
1490       err = sift_states_bkref (preg, mctx, sctx, str_idx, dest_nodes);
1491       if (BE (err != REG_NOERROR, 0))
1492         return err;
1493     }
1494   return REG_NOERROR;
1495 }
1496
1497 static reg_errcode_t
1498 add_epsilon_src_nodes (dfa, dest_nodes, candidates)
1499      re_dfa_t *dfa;
1500      re_node_set *dest_nodes;
1501      const re_node_set *candidates;
1502 {
1503   reg_errcode_t err;
1504   int src_idx;
1505   re_node_set src_copy;
1506
1507   err = re_node_set_init_copy (&src_copy, dest_nodes);
1508   if (BE (err != REG_NOERROR, 0))
1509     return err;
1510   for (src_idx = 0; src_idx < src_copy.nelem; ++src_idx)
1511     {
1512       err = re_node_set_add_intersect (dest_nodes, candidates,
1513                                        dfa->inveclosures
1514                                        + src_copy.elems[src_idx]);
1515       if (BE (err != REG_NOERROR, 0))
1516         return err;
1517     }
1518   re_node_set_free (&src_copy);
1519   return REG_NOERROR;
1520 }
1521
1522 static reg_errcode_t
1523 sub_epsilon_src_nodes (dfa, node, dest_nodes, candidates)
1524      re_dfa_t *dfa;
1525      int node;
1526      re_node_set *dest_nodes;
1527      const re_node_set *candidates;
1528 {
1529     int ecl_idx;
1530     reg_errcode_t err;
1531     re_node_set *inv_eclosure = dfa->inveclosures + node;
1532     re_node_set except_nodes;
1533     re_node_set_init_empty (&except_nodes);
1534     for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1535       {
1536         int cur_node = inv_eclosure->elems[ecl_idx];
1537         if (cur_node == node)
1538           continue;
1539         if (dfa->edests[cur_node].nelem)
1540           {
1541             int edst1 = dfa->edests[cur_node].elems[0];
1542             int edst2 = ((dfa->edests[cur_node].nelem > 1)
1543                          ? dfa->edests[cur_node].elems[1] : -1);
1544             if ((!re_node_set_contains (inv_eclosure, edst1)
1545                  && re_node_set_contains (dest_nodes, edst1))
1546                 || (edst2 > 0
1547                     && !re_node_set_contains (inv_eclosure, edst2)
1548                     && re_node_set_contains (dest_nodes, edst2)))
1549               {
1550                 err = re_node_set_add_intersect (&except_nodes, candidates,
1551                                                  dfa->inveclosures + cur_node);
1552                 if (BE (err != REG_NOERROR, 0))
1553                   return err;
1554               }
1555           }
1556       }
1557     for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1558       {
1559         int cur_node = inv_eclosure->elems[ecl_idx];
1560         if (!re_node_set_contains (&except_nodes, cur_node))
1561           {
1562             int idx = re_node_set_contains (dest_nodes, cur_node) - 1;
1563             re_node_set_remove_at (dest_nodes, idx);
1564           }
1565       }
1566     re_node_set_free (&except_nodes);
1567     return REG_NOERROR;
1568 }
1569
1570 static int
1571 check_dst_limits (dfa, limits, mctx, dst_node, dst_idx, src_node, src_idx)
1572      re_dfa_t *dfa;
1573      re_node_set *limits;
1574      re_match_context_t *mctx;
1575      int dst_node, dst_idx, src_node, src_idx;
1576 {
1577   int lim_idx, src_pos, dst_pos;
1578
1579   for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
1580     {
1581       int bkref, subexp_idx/*, node_idx, cls_node*/;
1582       struct re_backref_cache_entry *ent;
1583       ent = mctx->bkref_ents + limits->elems[lim_idx];
1584       bkref = (dfa->nodes[ent->node].type == OP_CONTEXT_NODE
1585                ? dfa->nodes[ent->node].opr.ctx_info->entity : ent->node);
1586       subexp_idx = dfa->nodes[bkref].opr.idx - 1;
1587
1588       dst_pos = check_dst_limits_calc_pos (dfa, mctx, limits->elems[lim_idx],
1589                                            dfa->eclosures + dst_node,
1590                                            subexp_idx, dst_node, dst_idx);
1591       src_pos = check_dst_limits_calc_pos (dfa, mctx, limits->elems[lim_idx],
1592                                            dfa->eclosures + src_node,
1593                                            subexp_idx, src_node, src_idx);
1594
1595       /* In case of:
1596          <src> <dst> ( <subexp> )
1597          ( <subexp> ) <src> <dst>
1598          ( <subexp1> <src> <subexp2> <dst> <subexp3> )  */
1599       if (src_pos == dst_pos)
1600         continue; /* This is unrelated limitation.  */
1601       else
1602         return 1;
1603     }
1604   return 0;
1605 }
1606
1607 static int
1608 check_dst_limits_calc_pos (dfa, mctx, limit, eclosures, subexp_idx, node,
1609                            str_idx)
1610      re_dfa_t *dfa;
1611      re_match_context_t *mctx;
1612      re_node_set *eclosures;
1613      int limit, subexp_idx, node, str_idx;
1614 {
1615   struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
1616   int pos = (str_idx < lim->subexp_from ? -1
1617              : (lim->subexp_to < str_idx ? 1 : 0));
1618   if (pos == 0
1619       && (str_idx == lim->subexp_from || str_idx == lim->subexp_to))
1620     {
1621       int node_idx;
1622       for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)
1623         {
1624           int node = eclosures->elems[node_idx];
1625           int entity = node;
1626           re_token_type_t type= dfa->nodes[node].type;
1627           if (type == OP_CONTEXT_NODE)
1628             {
1629               entity = dfa->nodes[node].opr.ctx_info->entity;
1630               type = dfa->nodes[entity].type;
1631             }
1632           if (type == OP_BACK_REF)
1633             {
1634               int bi;
1635               for (bi = 0; bi < mctx->nbkref_ents; ++bi)
1636                 {
1637                   struct re_backref_cache_entry *ent = mctx->bkref_ents + bi;
1638                   if (ent->node == node && ent->subexp_from == ent->subexp_to
1639                       && ent->str_idx == str_idx)
1640                     {
1641                       int cpos, dst;
1642                       dst = dfa->nexts[node];
1643                       cpos = check_dst_limits_calc_pos (dfa, mctx, limit,
1644                                                         dfa->eclosures + dst,
1645                                                         subexp_idx, dst,
1646                                                         str_idx);
1647                       if ((str_idx == lim->subexp_from && cpos == -1)
1648                           || (str_idx == lim->subexp_to && cpos == 0))
1649                         return cpos;
1650                     }
1651                 }
1652             }
1653           if (type == OP_OPEN_SUBEXP && subexp_idx == dfa->nodes[node].opr.idx
1654               && str_idx == lim->subexp_from)
1655             {
1656               pos = -1;
1657               break;
1658             }
1659           if (type == OP_CLOSE_SUBEXP && subexp_idx == dfa->nodes[node].opr.idx
1660               && str_idx == lim->subexp_to)
1661             break;
1662         }
1663       if (node_idx == eclosures->nelem && str_idx == lim->subexp_to)
1664         pos = 1;
1665     }
1666   return pos;
1667 }
1668
1669 /* Check the limitations of sub expressions LIMITS, and remove the nodes
1670    which are against limitations from DEST_NODES. */
1671
1672 static reg_errcode_t
1673 check_subexp_limits (dfa, dest_nodes, candidates, limits, bkref_ents, str_idx)
1674      re_dfa_t *dfa;
1675      re_node_set *dest_nodes;
1676      const re_node_set *candidates;
1677      re_node_set *limits;
1678      struct re_backref_cache_entry *bkref_ents;
1679      int str_idx;
1680 {
1681   reg_errcode_t err;
1682   int node_idx, lim_idx;
1683
1684   for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
1685     {
1686       int bkref, subexp_idx;
1687       struct re_backref_cache_entry *ent;
1688       ent = bkref_ents + limits->elems[lim_idx];
1689
1690       if (str_idx <= ent->subexp_from || ent->str_idx < str_idx)
1691         continue; /* This is unrelated limitation.  */
1692
1693       bkref = (dfa->nodes[ent->node].type == OP_CONTEXT_NODE
1694                ? dfa->nodes[ent->node].opr.ctx_info->entity : ent->node);
1695       subexp_idx = dfa->nodes[bkref].opr.idx - 1;
1696
1697       if (ent->subexp_to == str_idx)
1698         {
1699           int ops_node = -1;
1700           int cls_node = -1;
1701           for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
1702             {
1703               int node = dest_nodes->elems[node_idx];
1704               re_token_type_t type= dfa->nodes[node].type;
1705               if (type == OP_OPEN_SUBEXP
1706                   && subexp_idx == dfa->nodes[node].opr.idx)
1707                 ops_node = node;
1708               else if (type == OP_CLOSE_SUBEXP
1709                        && subexp_idx == dfa->nodes[node].opr.idx)
1710                 cls_node = node;
1711             }
1712
1713           /* Check the limitation of the open subexpression.  */
1714           /* Note that (ent->subexp_to = str_idx != ent->subexp_from).  */
1715           if (ops_node >= 0)
1716             {
1717               err = sub_epsilon_src_nodes(dfa, ops_node, dest_nodes,
1718                                           candidates);
1719               if (BE (err != REG_NOERROR, 0))
1720                 return err;
1721             }
1722           /* Check the limitation of the close subexpression.  */
1723           for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
1724             {
1725               int node = dest_nodes->elems[node_idx];
1726               if (!re_node_set_contains (dfa->inveclosures + node, cls_node)
1727                   && !re_node_set_contains (dfa->eclosures + node, cls_node))
1728                 {
1729                   /* It is against this limitation.
1730                      Remove it form the current sifted state.  */
1731                   err = sub_epsilon_src_nodes(dfa, node, dest_nodes,
1732                                               candidates);
1733                   if (BE (err != REG_NOERROR, 0))
1734                     return err;
1735                   --node_idx;
1736                 }
1737             }
1738         }
1739       else /* (ent->subexp_to != str_idx)  */
1740         {
1741           for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
1742             {
1743               int node = dest_nodes->elems[node_idx];
1744               re_token_type_t type= dfa->nodes[node].type;
1745               if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP)
1746                 {
1747                   if (subexp_idx != dfa->nodes[node].opr.idx)
1748                     continue;
1749                   if ((type == OP_CLOSE_SUBEXP && ent->subexp_to != str_idx)
1750                       || (type == OP_OPEN_SUBEXP))
1751                     {
1752                       /* It is against this limitation.
1753                          Remove it form the current sifted state.  */
1754                       err = sub_epsilon_src_nodes(dfa, node, dest_nodes,
1755                                                   candidates);
1756                       if (BE (err != REG_NOERROR, 0))
1757                         return err;
1758                     }
1759                 }
1760             }
1761         }
1762     }
1763   return REG_NOERROR;
1764 }
1765
1766 /* Search for the top (in case of sctx->check_subexp < 0) or the
1767    bottom (in case of sctx->check_subexp > 0) of the subexpressions
1768    which the backreference sctx->cur_bkref can match.  */
1769
1770 static reg_errcode_t
1771 search_subexp (preg, mctx, sctx, str_idx, dest_nodes)
1772      const regex_t *preg;
1773      re_match_context_t *mctx;
1774      re_sift_context_t *sctx;
1775      int str_idx;
1776      re_node_set *dest_nodes;
1777 {
1778   reg_errcode_t err;
1779   re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
1780   re_sift_context_t local_sctx;
1781   int node_idx, node;
1782   const re_node_set *candidates;
1783   re_dfastate_t **lim_states = NULL;
1784   candidates = ((mctx->state_log[str_idx] == NULL) ? &empty_set
1785                 : &mctx->state_log[str_idx]->nodes);
1786   local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized.  */
1787
1788   for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
1789     {
1790       re_token_type_t type;
1791       int entity;
1792       node = dest_nodes->elems[node_idx];
1793       type = dfa->nodes[node].type;
1794       entity = (type != OP_CONTEXT_NODE ? node
1795                 : dfa->nodes[node].opr.ctx_info->entity);
1796       type = (type != OP_CONTEXT_NODE ? type : dfa->nodes[entity].type);
1797
1798       if (type == OP_CLOSE_SUBEXP
1799           && sctx->check_subexp == dfa->nodes[node].opr.idx + 1)
1800         {
1801           re_dfastate_t *cur_state;
1802           /* Found the bottom of the subexpression, then search for the
1803              top of it.  */
1804           if (local_sctx.sifted_states == NULL)
1805             {
1806               /* It hasn't been initialized yet, initialize it now.  */
1807               local_sctx = *sctx;
1808               err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits);
1809               if (BE (err != REG_NOERROR, 0))
1810                 return err;
1811             }
1812           local_sctx.check_subexp = -sctx->check_subexp;
1813           local_sctx.limited_states = sctx->limited_states;
1814           local_sctx.last_node = node;
1815           local_sctx.last_str_idx = local_sctx.cls_subexp_idx = str_idx;
1816           cur_state = local_sctx.sifted_states[str_idx];
1817           err = sift_states_backward (preg, mctx, &local_sctx);
1818           local_sctx.sifted_states[str_idx] = cur_state;
1819           if (BE (err != REG_NOERROR, 0))
1820             return err;
1821           /* There must not 2 same node in a node set.  */
1822           break;
1823         }
1824       else if (type == OP_OPEN_SUBEXP
1825                && -sctx->check_subexp == dfa->nodes[node].opr.idx + 1)
1826         {
1827           /* Found the top of the subexpression, check that the
1828              backreference can match the input string.  */
1829           char *buf;
1830           int dest_str_idx;
1831           int bkref_str_idx = re_string_cur_idx (mctx->input);
1832           int subexp_len = sctx->cls_subexp_idx - str_idx;
1833           if (subexp_len < 0 || bkref_str_idx + subexp_len > mctx->input->len)
1834             break;
1835
1836           if (bkref_str_idx + subexp_len > mctx->input->valid_len
1837               && mctx->input->valid_len < mctx->input->len)
1838             {
1839               reg_errcode_t err;
1840               err = extend_buffers (mctx);
1841               if (BE (err != REG_NOERROR, 0))
1842                 return err;
1843             }
1844           buf = (char *) re_string_get_buffer (mctx->input);
1845           if (strncmp (buf + str_idx, buf + bkref_str_idx, subexp_len) != 0)
1846             break;
1847
1848           if (sctx->limits.nelem && str_idx > 0)
1849             {
1850               re_dfastate_t *cur_state = sctx->sifted_states[str_idx];
1851               if (lim_states == NULL)
1852                 {
1853                   lim_states = re_malloc (re_dfastate_t *, str_idx + 1);
1854                 }
1855               if (local_sctx.sifted_states == NULL)
1856                 {
1857                   /* It hasn't been initialized yet, initialize it now.  */
1858                   local_sctx = *sctx;
1859                   if (BE (lim_states == NULL, 0))
1860                     return REG_ESPACE;
1861                   err = re_node_set_init_copy (&local_sctx.limits,
1862                                                &sctx->limits);
1863                   if (BE (err != REG_NOERROR, 0))
1864                     return err;
1865                 }
1866               local_sctx.check_subexp = 0;
1867               local_sctx.last_node = node;
1868               local_sctx.last_str_idx = str_idx;
1869               local_sctx.limited_states = lim_states;
1870               memset (lim_states, '\0',
1871                       sizeof (re_dfastate_t*) * (str_idx + 1));
1872               err = sift_states_backward (preg, mctx, &local_sctx);
1873               if (BE (err != REG_NOERROR, 0))
1874                 return err;
1875               if (local_sctx.sifted_states[0] == NULL
1876                   && local_sctx.limited_states[0] == NULL)
1877                 {
1878                   sctx->sifted_states[str_idx] = cur_state;
1879                   break;
1880                 }
1881               sctx->sifted_states[str_idx] = cur_state;
1882             }
1883           /* Successfully matched, add a new cache entry.  */
1884           dest_str_idx = bkref_str_idx + subexp_len;
1885           err = match_ctx_add_entry (mctx, sctx->cur_bkref, bkref_str_idx,
1886                                      str_idx, sctx->cls_subexp_idx);
1887           if (BE (err != REG_NOERROR, 0))
1888             return err;
1889           err = clean_state_log_if_need (mctx, dest_str_idx);
1890           if (BE (err != REG_NOERROR, 0))
1891             return err;
1892           break;
1893         }
1894     }
1895
1896   /* Remove the top/bottom of the sub expression we processed.  */
1897   if (node_idx < dest_nodes->nelem)
1898     {
1899       err = sub_epsilon_src_nodes(dfa, node, dest_nodes, candidates);
1900       if (BE (err != REG_NOERROR, 0))
1901         return err;
1902       /* Update state_log.  */
1903       sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
1904       if (BE (err != REG_NOERROR, 0))
1905         return err;
1906     }
1907
1908   if (local_sctx.sifted_states != NULL)
1909     re_node_set_free (&local_sctx.limits);
1910   if (lim_states != NULL)
1911     re_free (lim_states);
1912   return REG_NOERROR;
1913 }
1914
1915 static reg_errcode_t
1916 sift_states_bkref (preg, mctx, sctx, str_idx, dest_nodes)
1917      const regex_t *preg;
1918      re_match_context_t *mctx;
1919      re_sift_context_t *sctx;
1920      int str_idx;
1921      re_node_set *dest_nodes;
1922 {
1923   reg_errcode_t err;
1924   re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
1925   int node_idx, node;
1926   re_sift_context_t local_sctx;
1927   const re_node_set *candidates;
1928   candidates = ((mctx->state_log[str_idx] == NULL) ? &empty_set
1929                 : &mctx->state_log[str_idx]->nodes);
1930   local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized.  */
1931
1932   for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)
1933     {
1934       int entity;
1935       int cur_bkref_idx = re_string_cur_idx (mctx->input);
1936       re_token_type_t type;
1937       node = candidates->elems[node_idx];
1938       type = dfa->nodes[node].type;
1939       entity = (type != OP_CONTEXT_NODE ? node
1940                 : dfa->nodes[node].opr.ctx_info->entity);
1941       type = (type != OP_CONTEXT_NODE ? type : dfa->nodes[entity].type);
1942       if (node == sctx->cur_bkref && str_idx == cur_bkref_idx)
1943         continue;
1944       /* Avoid infinite loop for the REs like "()\1+".  */
1945       if (node == sctx->last_node && str_idx == sctx->last_str_idx)
1946         continue;
1947       if (type == OP_BACK_REF)
1948         {
1949           int enabled_idx;
1950           for (enabled_idx = 0; enabled_idx < mctx->nbkref_ents; ++enabled_idx)
1951             {
1952               int disabled_idx, subexp_len, to_idx;
1953               struct re_backref_cache_entry *entry;
1954               entry = mctx->bkref_ents + enabled_idx;
1955               subexp_len = entry->subexp_to - entry->subexp_from;
1956               to_idx = str_idx + subexp_len;
1957
1958               if (entry->node != node || entry->str_idx != str_idx
1959                   || to_idx > sctx->last_str_idx
1960                   || sctx->sifted_states[to_idx] == NULL)
1961                 continue;
1962               if (!STATE_NODE_CONTAINS (sctx->sifted_states[to_idx],
1963                                         dfa->nexts[node]))
1964                 {
1965                   int dst_idx;
1966                   re_node_set *dsts = &sctx->sifted_states[to_idx]->nodes;
1967                   for (dst_idx = 0; dst_idx < dsts->nelem; ++dst_idx)
1968                     {
1969                       int dst_node = dsts->elems[dst_idx];
1970                       if (dfa->nodes[dst_node].type == OP_CONTEXT_NODE
1971                           && (dfa->nodes[dst_node].opr.ctx_info->entity
1972                               == dfa->nexts[node]))
1973                         break;
1974                     }
1975                   if (dst_idx == dsts->nelem)
1976                     continue;
1977                 }
1978
1979               if (check_dst_limits (dfa, &sctx->limits, mctx, node,
1980                                     str_idx, dfa->nexts[node], to_idx))
1981                 continue;
1982               if (sctx->check_subexp == dfa->nodes[entity].opr.idx)
1983                 {
1984                   char *buf;
1985                   buf = (char *) re_string_get_buffer (mctx->input);
1986                   if (strncmp (buf + entry->subexp_from,
1987                                buf + cur_bkref_idx, subexp_len) != 0)
1988                     continue;
1989                   err = match_ctx_add_entry (mctx, sctx->cur_bkref,
1990                                              cur_bkref_idx, entry->subexp_from,
1991                                              entry->subexp_to);
1992                   if (BE (err != REG_NOERROR, 0))
1993                     return err;
1994                   err = clean_state_log_if_need (mctx, cur_bkref_idx
1995                                                  + subexp_len);
1996                   if (BE (err != REG_NOERROR, 0))
1997                     return err;
1998                 }
1999               else
2000                 {
2001                   re_dfastate_t *cur_state;
2002                   entry->flag = 0;
2003                   for (disabled_idx = enabled_idx + 1;
2004                        disabled_idx < mctx->nbkref_ents; ++disabled_idx)
2005                     {
2006                       struct re_backref_cache_entry *entry2;
2007                       entry2 = mctx->bkref_ents + disabled_idx;
2008                       if (entry2->node != node || entry2->str_idx != str_idx)
2009                         continue;
2010                       entry2->flag = 1;
2011                     }
2012
2013                   if (local_sctx.sifted_states == NULL)
2014                     {
2015                       local_sctx = *sctx;
2016                       err = re_node_set_init_copy (&local_sctx.limits,
2017                                                    &sctx->limits);
2018                       if (BE (err != REG_NOERROR, 0))
2019                         return err;
2020                     }
2021                   local_sctx.last_node = node;
2022                   local_sctx.last_str_idx = str_idx;
2023                   err = re_node_set_insert (&local_sctx.limits, enabled_idx);
2024                   if (BE (err < 0, 0))
2025                     return REG_ESPACE;
2026                   cur_state = local_sctx.sifted_states[str_idx];
2027                   err = sift_states_backward (preg, mctx, &local_sctx);
2028                   if (BE (err != REG_NOERROR, 0))
2029                     return err;
2030                   if (sctx->limited_states != NULL)
2031                     {
2032                       err = merge_state_array (dfa, sctx->limited_states,
2033                                                local_sctx.sifted_states,
2034                                                str_idx + 1);
2035                       if (BE (err != REG_NOERROR, 0))
2036                         return err;
2037                     }
2038                   local_sctx.sifted_states[str_idx] = cur_state;
2039                   re_node_set_remove_at (&local_sctx.limits,
2040                                          local_sctx.limits.nelem - 1);
2041                   entry->flag = 1;
2042                 }
2043             }
2044           for (enabled_idx = 0; enabled_idx < mctx->nbkref_ents; ++enabled_idx)
2045             {
2046               struct re_backref_cache_entry *entry;
2047               entry = mctx->bkref_ents + enabled_idx;
2048               if (entry->node == node && entry->str_idx == str_idx)
2049                 entry->flag = 0;
2050             }
2051         }
2052     }
2053   if (local_sctx.sifted_states != NULL)
2054     {
2055       re_node_set_free (&local_sctx.limits);
2056     }
2057
2058   return REG_NOERROR;
2059 }
2060
2061
2062 #ifdef RE_ENABLE_I18N
2063 static int
2064 sift_states_iter_mb (preg, mctx, sctx, node_idx, str_idx, max_str_idx)
2065     const regex_t *preg;
2066     const re_match_context_t *mctx;
2067     re_sift_context_t *sctx;
2068     int node_idx, str_idx, max_str_idx;
2069 {
2070   re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2071   int naccepted;
2072   /* Check the node can accept `multi byte'.  */
2073   naccepted = check_node_accept_bytes (preg, node_idx, mctx->input, str_idx);
2074   if (naccepted > 0 && str_idx + naccepted <= max_str_idx &&
2075       !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
2076                             dfa->nexts[node_idx]))
2077     /* The node can't accept the `multi byte', or the
2078        destination was already throwed away, then the node
2079        could't accept the current input `multi byte'.   */
2080     naccepted = 0;
2081   /* Otherwise, it is sure that the node could accept
2082      `naccepted' bytes input.  */
2083   return naccepted;
2084 }
2085 #endif /* RE_ENABLE_I18N */
2086
2087 \f
2088 /* Functions for state transition.  */
2089
2090 /* Return the next state to which the current state STATE will transit by
2091    accepting the current input byte, and update STATE_LOG if necessary.
2092    If STATE can accept a multibyte char/collating element/back reference
2093    update the destination of STATE_LOG.  */
2094
2095 static re_dfastate_t *
2096 transit_state (err, preg, mctx, state, fl_search)
2097      reg_errcode_t *err;
2098      const regex_t *preg;
2099      re_match_context_t *mctx;
2100      re_dfastate_t *state;
2101      int fl_search;
2102 {
2103   re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2104   re_dfastate_t **trtable, *next_state;
2105   unsigned char ch;
2106
2107   if (re_string_cur_idx (mctx->input) + 1 >= mctx->input->bufs_len
2108       || (re_string_cur_idx (mctx->input) + 1 >= mctx->input->valid_len
2109           && mctx->input->valid_len < mctx->input->len))
2110     {
2111       *err = extend_buffers (mctx);
2112       if (BE (*err != REG_NOERROR, 0))
2113         return NULL;
2114     }
2115
2116   *err = REG_NOERROR;
2117   if (state == NULL)
2118     {
2119       next_state = state;
2120       re_string_skip_bytes (mctx->input, 1);
2121     }
2122   else
2123     {
2124 #ifdef RE_ENABLE_I18N
2125       /* If the current state can accept multibyte.  */
2126       if (state->accept_mb)
2127         {
2128           *err = transit_state_mb (preg, state, mctx);
2129           if (BE (*err != REG_NOERROR, 0))
2130             return NULL;
2131         }
2132 #endif /* RE_ENABLE_I18N */
2133
2134       /* Then decide the next state with the single byte.  */
2135       if (1)
2136         {
2137           /* Use transition table  */
2138           ch = re_string_fetch_byte (mctx->input);
2139           trtable = fl_search ? state->trtable_search : state->trtable;
2140           if (trtable == NULL)
2141             {
2142               trtable = build_trtable (preg, state, fl_search);
2143               if (fl_search)
2144                 state->trtable_search = trtable;
2145               else
2146                 state->trtable = trtable;
2147             }
2148           next_state = trtable[ch];
2149         }
2150       else
2151         {
2152           /* don't use transition table  */
2153           next_state = transit_state_sb (err, preg, state, fl_search, mctx);
2154           if (BE (next_state == NULL && err != REG_NOERROR, 0))
2155             return NULL;
2156         }
2157     }
2158
2159   /* Update the state_log if we need.  */
2160   if (mctx->state_log != NULL)
2161     {
2162       int cur_idx = re_string_cur_idx (mctx->input);
2163       if (cur_idx > mctx->state_log_top)
2164         {
2165           mctx->state_log[cur_idx] = next_state;
2166           mctx->state_log_top = cur_idx;
2167         }
2168       else if (mctx->state_log[cur_idx] == 0)
2169         {
2170           mctx->state_log[cur_idx] = next_state;
2171         }
2172       else
2173         {
2174           re_dfastate_t *pstate;
2175           unsigned int context;
2176           re_node_set next_nodes, *log_nodes, *table_nodes = NULL;
2177           /* If (state_log[cur_idx] != 0), it implies that cur_idx is
2178              the destination of a multibyte char/collating element/
2179              back reference.  Then the next state is the union set of
2180              these destinations and the results of the transition table.  */
2181           pstate = mctx->state_log[cur_idx];
2182           log_nodes = pstate->entrance_nodes;
2183           if (next_state != NULL)
2184             {
2185               table_nodes = next_state->entrance_nodes;
2186               *err = re_node_set_init_union (&next_nodes, table_nodes,
2187                                              log_nodes);
2188               if (BE (*err != REG_NOERROR, 0))
2189                 return NULL;
2190             }
2191           else
2192             next_nodes = *log_nodes;
2193           /* Note: We already add the nodes of the initial state,
2194                    then we don't need to add them here.  */
2195
2196           context = re_string_context_at (mctx->input,
2197                                           re_string_cur_idx (mctx->input) - 1,
2198                                           mctx->eflags, preg->newline_anchor);
2199           next_state = mctx->state_log[cur_idx]
2200             = re_acquire_state_context (err, dfa, &next_nodes, context);
2201           /* We don't need to check errors here, since the return value of
2202              this function is next_state and ERR is already set.  */
2203
2204           if (table_nodes != NULL)
2205             re_node_set_free (&next_nodes);
2206         }
2207       /* If the next state has back references.  */
2208       if (next_state != NULL && next_state->has_backref)
2209         {
2210           *err = transit_state_bkref (preg, next_state, mctx);
2211           if (BE (*err != REG_NOERROR, 0))
2212             return NULL;
2213           next_state = mctx->state_log[cur_idx];
2214         }
2215     }
2216   return next_state;
2217 }
2218
2219 /* Helper functions for transit_state.  */
2220
2221 /* Return the next state to which the current state STATE will transit by
2222    accepting the current input byte.  */
2223
2224 static re_dfastate_t *
2225 transit_state_sb (err, preg, state, fl_search, mctx)
2226      reg_errcode_t *err;
2227      const regex_t *preg;
2228      re_dfastate_t *state;
2229      int fl_search;
2230      re_match_context_t *mctx;
2231 {
2232   re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2233   re_node_set next_nodes;
2234   re_dfastate_t *next_state;
2235   int node_cnt, cur_str_idx = re_string_cur_idx (mctx->input);
2236   unsigned int context;
2237
2238   *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
2239   if (BE (*err != REG_NOERROR, 0))
2240     return NULL;
2241   for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)
2242     {
2243       int cur_node = state->nodes.elems[node_cnt];
2244       if (check_node_accept (preg, dfa->nodes + cur_node, mctx, cur_str_idx))
2245         {
2246           *err = re_node_set_merge (&next_nodes,
2247                                     dfa->eclosures + dfa->nexts[cur_node]);
2248           if (BE (*err != REG_NOERROR, 0))
2249             return NULL;
2250         }
2251     }
2252   if (fl_search)
2253     {
2254 #ifdef RE_ENABLE_I18N
2255       int not_initial = 0;
2256       if (MB_CUR_MAX > 1)
2257         for (node_cnt = 0; node_cnt < next_nodes.nelem; ++node_cnt)
2258           if (dfa->nodes[next_nodes.elems[node_cnt]].type == CHARACTER)
2259             {
2260               not_initial = dfa->nodes[next_nodes.elems[node_cnt]].mb_partial;
2261               break;
2262             }
2263       if (!not_initial)
2264 #endif
2265         {
2266           *err = re_node_set_merge (&next_nodes,
2267                                     dfa->init_state->entrance_nodes);
2268           if (BE (*err != REG_NOERROR, 0))
2269             return NULL;
2270         }
2271     }
2272   context = re_string_context_at (mctx->input, cur_str_idx, mctx->eflags,
2273                                   preg->newline_anchor);
2274   next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
2275   /* We don't need to check errors here, since the return value of
2276      this function is next_state and ERR is already set.  */
2277
2278   re_node_set_free (&next_nodes);
2279   re_string_skip_bytes (mctx->input, 1);
2280   return next_state;
2281 }
2282
2283 #ifdef RE_ENABLE_I18N
2284 static reg_errcode_t
2285 transit_state_mb (preg, pstate, mctx)
2286     const regex_t *preg;
2287     re_dfastate_t *pstate;
2288     re_match_context_t *mctx;
2289 {
2290   reg_errcode_t err;
2291   re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2292   int i;
2293
2294   for (i = 0; i < pstate->nodes.nelem; ++i)
2295     {
2296       re_node_set dest_nodes, *new_nodes;
2297       int cur_node_idx = pstate->nodes.elems[i];
2298       int naccepted = 0, dest_idx;
2299       unsigned int context;
2300       re_dfastate_t *dest_state;
2301
2302       if (dfa->nodes[cur_node_idx].type == OP_CONTEXT_NODE)
2303         {
2304           context = re_string_context_at (mctx->input,
2305                                           re_string_cur_idx (mctx->input),
2306                                           mctx->eflags, preg->newline_anchor);
2307           if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
2308                                         context))
2309             continue;
2310           cur_node_idx = dfa->nodes[cur_node_idx].opr.ctx_info->entity;
2311         }
2312
2313       /* How many bytes the node can accepts?  */
2314       if (ACCEPT_MB_NODE (dfa->nodes[cur_node_idx].type))
2315         naccepted = check_node_accept_bytes (preg, cur_node_idx, mctx->input,
2316                                              re_string_cur_idx (mctx->input));
2317       if (naccepted == 0)
2318         continue;
2319
2320       /* The node can accepts `naccepted' bytes.  */
2321       dest_idx = re_string_cur_idx (mctx->input) + naccepted;
2322       mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
2323                                : mctx->max_mb_elem_len);
2324       err = clean_state_log_if_need (mctx, dest_idx);
2325       if (BE (err != REG_NOERROR, 0))
2326         return err;
2327 #ifdef DEBUG
2328       assert (dfa->nexts[cur_node_idx] != -1);
2329 #endif
2330       /* `cur_node_idx' may point the entity of the OP_CONTEXT_NODE,
2331          then we use pstate->nodes.elems[i] instead.  */
2332       new_nodes = dfa->eclosures + dfa->nexts[pstate->nodes.elems[i]];
2333
2334       dest_state = mctx->state_log[dest_idx];
2335       if (dest_state == NULL)
2336         dest_nodes = *new_nodes;
2337       else
2338         {
2339           err = re_node_set_init_union (&dest_nodes,
2340                                         dest_state->entrance_nodes, new_nodes);
2341           if (BE (err != REG_NOERROR, 0))
2342             return err;
2343         }
2344       context = re_string_context_at (mctx->input, dest_idx - 1, mctx->eflags,
2345                                       preg->newline_anchor);
2346       mctx->state_log[dest_idx]
2347         = re_acquire_state_context (&err, dfa, &dest_nodes, context);
2348       if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0))
2349         return err;
2350       if (dest_state != NULL)
2351         re_node_set_free (&dest_nodes);
2352     }
2353   return REG_NOERROR;
2354 }
2355 #endif /* RE_ENABLE_I18N */
2356
2357 static reg_errcode_t
2358 transit_state_bkref (preg, pstate, mctx)
2359     const regex_t *preg;
2360     re_dfastate_t *pstate;
2361     re_match_context_t *mctx;
2362 {
2363   reg_errcode_t err;
2364   re_dfastate_t **work_state_log;
2365
2366   work_state_log = re_malloc (re_dfastate_t *,
2367                               re_string_cur_idx (mctx->input) + 1);
2368   if (BE (work_state_log == NULL, 0))
2369     return REG_ESPACE;
2370
2371   err = transit_state_bkref_loop (preg, &pstate->nodes, work_state_log, mctx);
2372   re_free (work_state_log);
2373   return err;
2374 }
2375
2376 /* Caller must allocate `work_state_log'.  */
2377
2378 static reg_errcode_t
2379 transit_state_bkref_loop (preg, nodes, work_state_log, mctx)
2380     const regex_t *preg;
2381     re_node_set *nodes;
2382     re_dfastate_t **work_state_log;
2383     re_match_context_t *mctx;
2384 {
2385   reg_errcode_t err;
2386   re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2387   int i;
2388   regmatch_t *cur_regs = re_malloc (regmatch_t, preg->re_nsub + 1);
2389   int cur_str_idx = re_string_cur_idx (mctx->input);
2390   if (BE (cur_regs == NULL, 0))
2391     return REG_ESPACE;
2392
2393   for (i = 0; i < nodes->nelem; ++i)
2394     {
2395       int dest_str_idx, subexp_idx, prev_nelem, bkc_idx;
2396       int node_idx = nodes->elems[i];
2397       unsigned int context;
2398       re_token_t *node = dfa->nodes + node_idx;
2399       re_node_set *new_dest_nodes;
2400       re_sift_context_t sctx;
2401
2402       /* Check whether `node' is a backreference or not.  */
2403       if (node->type == OP_BACK_REF)
2404         subexp_idx = node->opr.idx;
2405       else if (node->type == OP_CONTEXT_NODE &&
2406                dfa->nodes[node->opr.ctx_info->entity].type == OP_BACK_REF)
2407         {
2408           context = re_string_context_at (mctx->input, cur_str_idx,
2409                                           mctx->eflags, preg->newline_anchor);
2410           if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
2411             continue;
2412           subexp_idx = dfa->nodes[node->opr.ctx_info->entity].opr.idx;
2413         }
2414       else
2415         continue;
2416
2417       /* `node' is a backreference.
2418          Check the substring which the substring matched.  */
2419       sift_ctx_init (&sctx, work_state_log, NULL, node_idx, cur_str_idx,
2420                      subexp_idx);
2421       sctx.cur_bkref = node_idx;
2422       match_ctx_clear_flag (mctx);
2423       err = sift_states_backward (preg, mctx, &sctx);
2424       if (BE (err != REG_NOERROR, 0))
2425         return err;
2426
2427       /* And add the epsilon closures (which is `new_dest_nodes') of
2428          the backreference to appropriate state_log.  */
2429 #ifdef DEBUG
2430       assert (dfa->nexts[node_idx] != -1);
2431 #endif
2432       for (bkc_idx = 0; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
2433         {
2434           int subexp_len;
2435           re_dfastate_t *dest_state;
2436           struct re_backref_cache_entry *bkref_ent;
2437           bkref_ent = mctx->bkref_ents + bkc_idx;
2438           if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx)
2439             continue;
2440           subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
2441           new_dest_nodes = ((node->type == OP_CONTEXT_NODE && subexp_len == 0)
2442                             ? dfa->nodes[node_idx].opr.ctx_info->bkref_eclosure
2443                             : dfa->eclosures + dfa->nexts[node_idx]);
2444           dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
2445                           - bkref_ent->subexp_from);
2446           context = (IS_WORD_CHAR (re_string_byte_at (mctx->input,
2447                                                       dest_str_idx - 1))
2448                      ? CONTEXT_WORD : 0);
2449           dest_state = mctx->state_log[dest_str_idx];
2450           prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0
2451                         : mctx->state_log[cur_str_idx]->nodes.nelem);
2452           /* Add `new_dest_node' to state_log.  */
2453           if (dest_state == NULL)
2454             {
2455               mctx->state_log[dest_str_idx]
2456                 = re_acquire_state_context (&err, dfa, new_dest_nodes,
2457                                             context);
2458               if (BE (mctx->state_log[dest_str_idx] == NULL
2459                       && err != REG_NOERROR, 0))
2460                 return err;
2461             }
2462           else
2463             {
2464               re_node_set dest_nodes;
2465               err = re_node_set_init_union (&dest_nodes,
2466                                             dest_state->entrance_nodes,
2467                                             new_dest_nodes);
2468               if (BE (err != REG_NOERROR, 0))
2469                 return err;
2470               mctx->state_log[dest_str_idx]
2471                 = re_acquire_state_context (&err, dfa, &dest_nodes, context);
2472               if (BE (mctx->state_log[dest_str_idx] == NULL
2473                       && err != REG_NOERROR, 0))
2474                 return err;
2475               re_node_set_free (&dest_nodes);
2476             }
2477           /* We need to check recursively if the backreference can epsilon
2478              transit.  */
2479           if (subexp_len == 0
2480               && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
2481             {
2482               err = transit_state_bkref_loop (preg, new_dest_nodes,
2483                                               work_state_log, mctx);
2484               if (BE (err != REG_NOERROR, 0))
2485                 return err;
2486             }
2487         }
2488     }
2489   re_free (cur_regs);
2490   return REG_NOERROR;
2491 }
2492
2493 /* Build transition table for the state.
2494    Return the new table if succeeded, otherwise return NULL.  */
2495
2496 static re_dfastate_t **
2497 build_trtable (preg, state, fl_search)
2498     const regex_t *preg;
2499     const re_dfastate_t *state;
2500     int fl_search;
2501 {
2502   reg_errcode_t err;
2503   re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2504   int i, j, k, ch;
2505   int ndests; /* Number of the destination states from `state'.  */
2506   re_dfastate_t **trtable, **dest_states, **dest_states_word, **dest_states_nl;
2507   re_node_set follows, *dests_node;
2508   bitset *dests_ch;
2509   bitset acceptable;
2510
2511   /* We build DFA states which corresponds to the destination nodes
2512      from `state'.  `dests_node[i]' represents the nodes which i-th
2513      destination state contains, and `dests_ch[i]' represents the
2514      characters which i-th destination state accepts.  */
2515   dests_node = re_malloc (re_node_set, SBC_MAX);
2516   dests_ch = re_malloc (bitset, SBC_MAX);
2517
2518   /* Initialize transiton table.  */
2519   trtable = (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);
2520   if (BE (dests_node == NULL || dests_ch == NULL || trtable == NULL, 0))
2521     return NULL;
2522
2523   /* At first, group all nodes belonging to `state' into several
2524      destinations.  */
2525   ndests = group_nodes_into_DFAstates (preg, state, dests_node, dests_ch);
2526   if (BE (ndests <= 0, 0))
2527     {
2528       re_free (dests_node);
2529       re_free (dests_ch);
2530       /* Return NULL in case of an error, trtable otherwise.  */
2531       return (ndests < 0) ? NULL : trtable;
2532     }
2533
2534   dest_states = re_malloc (re_dfastate_t *, ndests);
2535   dest_states_word = re_malloc (re_dfastate_t *, ndests);
2536   dest_states_nl = re_malloc (re_dfastate_t *, ndests);
2537   bitset_empty (acceptable);
2538
2539   err = re_node_set_alloc (&follows, ndests + 1);
2540   if (BE (dest_states == NULL || dest_states_word == NULL
2541           || dest_states_nl == NULL || err != REG_NOERROR, 0))
2542     return NULL;
2543
2544   /* Then build the states for all destinations.  */
2545   for (i = 0; i < ndests; ++i)
2546     {
2547       int next_node;
2548       re_node_set_empty (&follows);
2549       /* Merge the follows of this destination states.  */
2550       for (j = 0; j < dests_node[i].nelem; ++j)
2551         {
2552           next_node = dfa->nexts[dests_node[i].elems[j]];
2553           if (next_node != -1)
2554             {
2555               err = re_node_set_merge (&follows, dfa->eclosures + next_node);
2556               if (BE (err != REG_NOERROR, 0))
2557                 return NULL;
2558             }
2559         }
2560       /* If search flag is set, merge the initial state.  */
2561       if (fl_search)
2562         {
2563 #ifdef RE_ENABLE_I18N
2564           int not_initial = 0;
2565           for (j = 0; j < follows.nelem; ++j)
2566             if (dfa->nodes[follows.elems[j]].type == CHARACTER)
2567               {
2568                 not_initial = dfa->nodes[follows.elems[j]].mb_partial;
2569                 break;
2570               }
2571           if (!not_initial)
2572 #endif
2573             {
2574               err = re_node_set_merge (&follows,
2575                                        dfa->init_state->entrance_nodes);
2576               if (BE (err != REG_NOERROR, 0))
2577                 return NULL;
2578             }
2579         }
2580       dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);
2581       if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0))
2582         return NULL;
2583       /* If the new state has context constraint,
2584          build appropriate states for these contexts.  */
2585       if (dest_states[i]->has_constraint)
2586         {
2587           dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
2588                                                           CONTEXT_WORD);
2589           if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0))
2590             return NULL;
2591           dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
2592                                                         CONTEXT_NEWLINE);
2593           if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0))
2594             return NULL;
2595         }
2596       else
2597         {
2598           dest_states_word[i] = dest_states[i];
2599           dest_states_nl[i] = dest_states[i];
2600         }
2601       bitset_merge (acceptable, dests_ch[i]);
2602     }
2603
2604   /* Update the transition table.  */
2605   /* For all characters ch...:  */
2606   for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
2607     for (j = 0; j < UINT_BITS; ++j, ++ch)
2608       if ((acceptable[i] >> j) & 1)
2609         {
2610           /* The current state accepts the character ch.  */
2611           if (IS_WORD_CHAR (ch))
2612             {
2613               for (k = 0; k < ndests; ++k)
2614                 if ((dests_ch[k][i] >> j) & 1)
2615                   {
2616                     /* k-th destination accepts the word character ch.  */
2617                     trtable[ch] = dest_states_word[k];
2618                     /* There must be only one destination which accepts
2619                        character ch.  See group_nodes_into_DFAstates.  */
2620                     break;
2621                   }
2622             }
2623           else /* not WORD_CHAR */
2624             {
2625               for (k = 0; k < ndests; ++k)
2626                 if ((dests_ch[k][i] >> j) & 1)
2627                   {
2628                     /* k-th destination accepts the non-word character ch.  */
2629                     trtable[ch] = dest_states[k];
2630                     /* There must be only one destination which accepts
2631                        character ch.  See group_nodes_into_DFAstates.  */
2632                     break;
2633                   }
2634             }
2635         }
2636   /* new line */
2637   if (bitset_contain (acceptable, NEWLINE_CHAR))
2638     {
2639       /* The current state accepts newline character.  */
2640       for (k = 0; k < ndests; ++k)
2641         if (bitset_contain (dests_ch[k], NEWLINE_CHAR))
2642           {
2643             /* k-th destination accepts newline character.  */
2644             trtable[NEWLINE_CHAR] = dest_states_nl[k];
2645             /* There must be only one destination which accepts
2646                newline.  See group_nodes_into_DFAstates.  */
2647             break;
2648           }
2649     }
2650
2651   re_free (dest_states_nl);
2652   re_free (dest_states_word);
2653   re_free (dest_states);
2654
2655   re_node_set_free (&follows);
2656   for (i = 0; i < ndests; ++i)
2657     re_node_set_free (dests_node + i);
2658
2659   re_free (dests_ch);
2660   re_free (dests_node);
2661
2662   return trtable;
2663 }
2664
2665 /* Group all nodes belonging to STATE into several destinations.
2666    Then for all destinations, set the nodes belonging to the destination
2667    to DESTS_NODE[i] and set the characters accepted by the destination
2668    to DEST_CH[i].  This function return the number of destinations.  */
2669
2670 static int
2671 group_nodes_into_DFAstates (preg, state, dests_node, dests_ch)
2672     const regex_t *preg;
2673     const re_dfastate_t *state;
2674     re_node_set *dests_node;
2675     bitset *dests_ch;
2676 {
2677   reg_errcode_t err;
2678   const re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2679   int i, j, k;
2680   int ndests; /* Number of the destinations from `state'.  */
2681   bitset accepts; /* Characters a node can accept.  */
2682   const re_node_set *cur_nodes = &state->nodes;
2683   bitset_empty (accepts);
2684   ndests = 0;
2685
2686   /* For all the nodes belonging to `state',  */
2687   for (i = 0; i < cur_nodes->nelem; ++i)
2688     {
2689       unsigned int constraint = 0;
2690       re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
2691       re_token_type_t type = node->type;
2692
2693       if (type == OP_CONTEXT_NODE)
2694         {
2695           constraint = node->constraint;
2696           node = dfa->nodes + node->opr.ctx_info->entity;
2697           type = node->type;
2698         }
2699
2700       /* Enumerate all single byte character this node can accept.  */
2701       if (type == CHARACTER)
2702         bitset_set (accepts, node->opr.c);
2703       else if (type == SIMPLE_BRACKET)
2704         {
2705           bitset_merge (accepts, node->opr.sbcset);
2706         }
2707       else if (type == OP_PERIOD)
2708         {
2709           bitset_set_all (accepts);
2710           if (!(preg->syntax & RE_DOT_NEWLINE))
2711             bitset_clear (accepts, '\n');
2712           if (preg->syntax & RE_DOT_NOT_NULL)
2713             bitset_clear (accepts, '\0');
2714         }
2715       else
2716         continue;
2717
2718       /* Check the `accepts' and sift the characters which are not
2719          match it the context.  */
2720       if (constraint)
2721         {
2722           if (constraint & NEXT_WORD_CONSTRAINT)
2723             for (j = 0; j < BITSET_UINTS; ++j)
2724               accepts[j] &= dfa->word_char[j];
2725           else if (constraint & NEXT_NOTWORD_CONSTRAINT)
2726             for (j = 0; j < BITSET_UINTS; ++j)
2727               accepts[j] &= ~dfa->word_char[j];
2728           else if (constraint & NEXT_NEWLINE_CONSTRAINT)
2729             {
2730               int accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
2731               bitset_empty (accepts);
2732               if (accepts_newline)
2733                 bitset_set (accepts, NEWLINE_CHAR);
2734               else
2735                 continue;
2736             }
2737         }
2738
2739       /* Then divide `accepts' into DFA states, or create a new
2740          state.  */
2741       for (j = 0; j < ndests; ++j)
2742         {
2743           bitset intersec; /* Intersection sets, see below.  */
2744           bitset remains;
2745           /* Flags, see below.  */
2746           int has_intersec, not_subset, not_consumed;
2747
2748           /* Optimization, skip if this state doesn't accept the character.  */
2749           if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
2750             continue;
2751
2752           /* Enumerate the intersection set of this state and `accepts'.  */
2753           has_intersec = 0;
2754           for (k = 0; k < BITSET_UINTS; ++k)
2755             has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];
2756           /* And skip if the intersection set is empty.  */
2757           if (!has_intersec)
2758             continue;
2759
2760           /* Then check if this state is a subset of `accepts'.  */
2761           not_subset = not_consumed = 0;
2762           for (k = 0; k < BITSET_UINTS; ++k)
2763             {
2764               not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k];
2765               not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k];
2766             }
2767
2768           /* If this state isn't a subset of `accepts', create a
2769              new group state, which has the `remains'. */
2770           if (not_subset)
2771             {
2772               bitset_copy (dests_ch[ndests], remains);
2773               bitset_copy (dests_ch[j], intersec);
2774               err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
2775               if (BE (err != REG_NOERROR, 0))
2776                 return -1;
2777               ++ndests;
2778             }
2779
2780           /* Put the position in the current group. */
2781           err = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
2782           if (BE (err < 0, 0))
2783             return -1;
2784
2785           /* If all characters are consumed, go to next node. */
2786           if (!not_consumed)
2787             break;
2788         }
2789       /* Some characters remain, create a new group. */
2790       if (j == ndests)
2791         {
2792           bitset_copy (dests_ch[ndests], accepts);
2793           err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
2794           if (BE (err != REG_NOERROR, 0))
2795             return -1;
2796           ++ndests;
2797           bitset_empty (accepts);
2798         }
2799     }
2800   return ndests;
2801 }
2802
2803 #ifdef RE_ENABLE_I18N
2804 /* Check how many bytes the node `dfa->nodes[node_idx]' accepts.
2805    Return the number of the bytes the node accepts.
2806    STR_IDX is the current index of the input string.
2807
2808    This function handles the nodes which can accept one character, or
2809    one collating element like '.', '[a-z]', opposite to the other nodes
2810    can only accept one byte.  */
2811
2812 static int
2813 check_node_accept_bytes (preg, node_idx, input, str_idx)
2814     const regex_t *preg;
2815     int node_idx, str_idx;
2816     const re_string_t *input;
2817 {
2818   const re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2819   const re_token_t *node = dfa->nodes + node_idx;
2820   int elem_len = re_string_elem_size_at (input, str_idx);
2821   int char_len = re_string_char_size_at (input, str_idx);
2822   int i;
2823 # ifdef _LIBC
2824   int j;
2825   uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
2826 # endif /* _LIBC */
2827   if (elem_len <= 1 && char_len <= 1)
2828     return 0;
2829   if (node->type == OP_PERIOD)
2830     {
2831       /* '.' accepts any one character except the following two cases.  */
2832       if ((!(preg->syntax & RE_DOT_NEWLINE) &&
2833            re_string_byte_at (input, str_idx) == '\n') ||
2834           ((preg->syntax & RE_DOT_NOT_NULL) &&
2835            re_string_byte_at (input, str_idx) == '\0'))
2836         return 0;
2837       return char_len;
2838     }
2839   else if (node->type == COMPLEX_BRACKET)
2840     {
2841       const re_charset_t *cset = node->opr.mbcset;
2842 # ifdef _LIBC
2843       const unsigned char *pin = re_string_get_buffer (input) + str_idx;
2844 # endif /* _LIBC */
2845       int match_len = 0;
2846       wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)
2847                     ? re_string_wchar_at (input, str_idx) : 0);
2848
2849       /* match with multibyte character?  */
2850       for (i = 0; i < cset->nmbchars; ++i)
2851         if (wc == cset->mbchars[i])
2852           {
2853             match_len = char_len;
2854             goto check_node_accept_bytes_match;
2855           }
2856       /* match with character_class?  */
2857       for (i = 0; i < cset->nchar_classes; ++i)
2858         {
2859           wctype_t wt = cset->char_classes[i];
2860           if (__iswctype (wc, wt))
2861             {
2862               match_len = char_len;
2863               goto check_node_accept_bytes_match;
2864             }
2865         }
2866
2867 # ifdef _LIBC
2868       if (nrules != 0)
2869         {
2870           unsigned int in_collseq = 0;
2871           const int32_t *table, *indirect;
2872           const unsigned char *weights, *extra;
2873           const char *collseqwc;
2874           int32_t idx;
2875           /* This #include defines a local function!  */
2876 #  include <locale/weight.h>
2877
2878           /* match with collating_symbol?  */
2879           if (cset->ncoll_syms)
2880             extra = (const unsigned char *)
2881               _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
2882           for (i = 0; i < cset->ncoll_syms; ++i)
2883             {
2884               const unsigned char *coll_sym = extra + cset->coll_syms[i];
2885               /* Compare the length of input collating element and
2886                  the length of current collating element.  */
2887               if (*coll_sym != elem_len)
2888                 continue;
2889               /* Compare each bytes.  */
2890               for (j = 0; j < *coll_sym; j++)
2891                 if (pin[j] != coll_sym[1 + j])
2892                   break;
2893               if (j == *coll_sym)
2894                 {
2895                   /* Match if every bytes is equal.  */
2896                   match_len = j;
2897                   goto check_node_accept_bytes_match;
2898                 }
2899             }
2900
2901           if (cset->nranges)
2902             {
2903               if (elem_len <= char_len)
2904                 {
2905                   collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
2906                   in_collseq = collseq_table_lookup (collseqwc, wc);
2907                 }
2908               else
2909                 in_collseq = find_collation_sequence_value (pin, elem_len);
2910             }
2911           /* match with range expression?  */
2912           for (i = 0; i < cset->nranges; ++i)
2913             if (cset->range_starts[i] <= in_collseq
2914                 && in_collseq <= cset->range_ends[i])
2915               {
2916                 match_len = elem_len;
2917                 goto check_node_accept_bytes_match;
2918               }
2919
2920           /* match with equivalence_class?  */
2921           if (cset->nequiv_classes)
2922             {
2923               const unsigned char *cp = pin;
2924               table = (const int32_t *)
2925                 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
2926               weights = (const unsigned char *)
2927                 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
2928               extra = (const unsigned char *)
2929                 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
2930               indirect = (const int32_t *)
2931                 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
2932               idx = findidx (&cp);
2933               if (idx > 0)
2934                 for (i = 0; i < cset->nequiv_classes; ++i)
2935                   {
2936                     int32_t equiv_class_idx = cset->equiv_classes[i];
2937                     size_t weight_len = weights[idx];
2938                     if (weight_len == weights[equiv_class_idx])
2939                       {
2940                         int cnt = 0;
2941                         while (cnt <= weight_len
2942                                && (weights[equiv_class_idx + 1 + cnt]
2943                                    == weights[idx + 1 + cnt]))
2944                           ++cnt;
2945                         if (cnt > weight_len)
2946                           {
2947                             match_len = elem_len;
2948                             goto check_node_accept_bytes_match;
2949                           }
2950                       }
2951                   }
2952             }
2953         }
2954       else
2955 # endif /* _LIBC */
2956         {
2957           /* match with range expression?  */
2958 #if __GNUC__ >= 2
2959           wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'};
2960 #else
2961           wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
2962           cmp_buf[2] = wc;
2963 #endif
2964           for (i = 0; i < cset->nranges; ++i)
2965             {
2966               cmp_buf[0] = cset->range_starts[i];
2967               cmp_buf[4] = cset->range_ends[i];
2968               if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
2969                   && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
2970                 {
2971                   match_len = char_len;
2972                   goto check_node_accept_bytes_match;
2973                 }
2974             }
2975         }
2976     check_node_accept_bytes_match:
2977       if (!cset->non_match)
2978         return match_len;
2979       else
2980         {
2981           if (match_len > 0)
2982             return 0;
2983           else
2984             return (elem_len > char_len) ? elem_len : char_len;
2985         }
2986     }
2987   return 0;
2988 }
2989
2990 # ifdef _LIBC
2991 static unsigned int
2992 find_collation_sequence_value (mbs, mbs_len)
2993     const unsigned char *mbs;
2994     size_t mbs_len;
2995 {
2996   uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
2997   if (nrules == 0)
2998     {
2999       if (mbs_len == 1)
3000         {
3001           /* No valid character.  Match it as a single byte character.  */
3002           const unsigned char *collseq = (const unsigned char *)
3003             _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
3004           return collseq[mbs[0]];
3005         }
3006       return UINT_MAX;
3007     }
3008   else
3009     {
3010       int32_t idx;
3011       const unsigned char *extra = (const unsigned char *)
3012         _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3013
3014       for (idx = 0; ;)
3015         {
3016           int mbs_cnt, found = 0;
3017           int32_t elem_mbs_len;
3018           /* Skip the name of collating element name.  */
3019           idx = idx + extra[idx] + 1;
3020           elem_mbs_len = extra[idx++];
3021           if (mbs_len == elem_mbs_len)
3022             {
3023               for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt)
3024                 if (extra[idx + mbs_cnt] != mbs[mbs_cnt])
3025                   break;
3026               if (mbs_cnt == elem_mbs_len)
3027                 /* Found the entry.  */
3028                 found = 1;
3029             }
3030           /* Skip the byte sequence of the collating element.  */
3031           idx += elem_mbs_len;
3032           /* Adjust for the alignment.  */
3033           idx = (idx + 3) & ~3;
3034           /* Skip the collation sequence value.  */
3035           idx += sizeof (uint32_t);
3036           /* Skip the wide char sequence of the collating element.  */
3037           idx = idx + sizeof (uint32_t) * (extra[idx] + 1);
3038           /* If we found the entry, return the sequence value.  */
3039           if (found)
3040             return *(uint32_t *) (extra + idx);
3041           /* Skip the collation sequence value.  */
3042           idx += sizeof (uint32_t);
3043         }
3044     }
3045 }
3046 # endif /* _LIBC */
3047 #endif /* RE_ENABLE_I18N */
3048
3049 /* Check whether the node accepts the byte which is IDX-th
3050    byte of the INPUT.  */
3051
3052 static int
3053 check_node_accept (preg, node, mctx, idx)
3054     const regex_t *preg;
3055     const re_token_t *node;
3056     const re_match_context_t *mctx;
3057     int idx;
3058 {
3059   const re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
3060   const re_token_t *cur_node;
3061   unsigned char ch;
3062   if (node->type == OP_CONTEXT_NODE)
3063     {
3064       /* The node has constraints.  Check whether the current context
3065          satisfies the constraints.  */
3066       unsigned int context = re_string_context_at (mctx->input, idx,
3067                                                    mctx->eflags,
3068                                                    preg->newline_anchor);
3069       if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
3070         return 0;
3071       cur_node = dfa->nodes + node->opr.ctx_info->entity;
3072     }
3073   else
3074     cur_node = node;
3075
3076   ch = re_string_byte_at (mctx->input, idx);
3077   if (cur_node->type == CHARACTER)
3078     return cur_node->opr.c == ch;
3079   else if (cur_node->type == SIMPLE_BRACKET)
3080     return bitset_contain (cur_node->opr.sbcset, ch);
3081   else if (cur_node->type == OP_PERIOD)
3082     return !((ch == '\n' && !(preg->syntax & RE_DOT_NEWLINE))
3083              || (ch == '\0' && (preg->syntax & RE_DOT_NOT_NULL)));
3084   else
3085     return 0;
3086 }
3087
3088 /* Extend the buffers, if the buffers have run out.  */
3089
3090 static reg_errcode_t
3091 extend_buffers (mctx)
3092      re_match_context_t *mctx;
3093 {
3094   reg_errcode_t ret;
3095   re_string_t *pstr = mctx->input;
3096
3097   /* Double the lengthes of the buffers.  */
3098   ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
3099   if (BE (ret != REG_NOERROR, 0))
3100     return ret;
3101
3102   if (mctx->state_log != NULL)
3103     {
3104       /* And double the length of state_log.  */
3105       mctx->state_log = re_realloc (mctx->state_log, re_dfastate_t *,
3106                                     pstr->bufs_len * 2);
3107       if (BE (mctx->state_log == NULL, 0))
3108         return REG_ESPACE;
3109     }
3110
3111   /* Then reconstruct the buffers.  */
3112   if (pstr->icase)
3113     {
3114 #ifdef RE_ENABLE_I18N
3115       if (MB_CUR_MAX > 1)
3116         build_wcs_upper_buffer (pstr);
3117       else
3118 #endif /* RE_ENABLE_I18N  */
3119         build_upper_buffer (pstr);
3120     }
3121   else
3122     {
3123 #ifdef RE_ENABLE_I18N
3124       if (MB_CUR_MAX > 1)
3125         build_wcs_buffer (pstr);
3126       else
3127 #endif /* RE_ENABLE_I18N  */
3128         {
3129           if (pstr->trans != NULL)
3130             re_string_translate_buffer (pstr);
3131           else
3132             pstr->valid_len = pstr->bufs_len;
3133         }
3134     }
3135   return REG_NOERROR;
3136 }
3137
3138 \f
3139 /* Functions for matching context.  */
3140
3141 static reg_errcode_t
3142 match_ctx_init (mctx, eflags, input, n)
3143     re_match_context_t *mctx;
3144     int eflags, n;
3145     re_string_t *input;
3146 {
3147   mctx->eflags = eflags;
3148   mctx->input = input;
3149   mctx->match_last = -1;
3150   if (n > 0)
3151     {
3152       mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
3153       if (BE (mctx->bkref_ents == NULL, 0))
3154         return REG_ESPACE;
3155     }
3156   else
3157     mctx->bkref_ents = NULL;
3158   mctx->nbkref_ents = 0;
3159   mctx->abkref_ents = n;
3160   mctx->max_mb_elem_len = 0;
3161   return REG_NOERROR;
3162 }
3163
3164 static void
3165 match_ctx_free (mctx)
3166     re_match_context_t *mctx;
3167 {
3168   re_free (mctx->bkref_ents);
3169 }
3170
3171 /* Add a new backreference entry to the cache.  */
3172
3173 static reg_errcode_t
3174 match_ctx_add_entry (mctx, node, str_idx, from, to)
3175     re_match_context_t *mctx;
3176     int node, str_idx, from, to;
3177 {
3178   if (mctx->nbkref_ents >= mctx->abkref_ents)
3179     {
3180       mctx->bkref_ents = re_realloc (mctx->bkref_ents,
3181                                      struct re_backref_cache_entry,
3182                                      mctx->abkref_ents * 2);
3183       if (BE (mctx->bkref_ents == NULL, 0))
3184         return REG_ESPACE;
3185       memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',
3186              sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);
3187       mctx->abkref_ents *= 2;
3188     }
3189   mctx->bkref_ents[mctx->nbkref_ents].node = node;
3190   mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
3191   mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
3192   mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
3193   mctx->bkref_ents[mctx->nbkref_ents++].flag = 0;
3194   if (mctx->max_mb_elem_len < to - from)
3195     mctx->max_mb_elem_len = to - from;
3196   return REG_NOERROR;
3197 }
3198
3199 static void
3200 match_ctx_clear_flag (mctx)
3201      re_match_context_t *mctx;
3202 {
3203   int i;
3204   for (i = 0; i < mctx->nbkref_ents; ++i)
3205     {
3206       mctx->bkref_ents[i].flag = 0;
3207     }
3208 }
3209
3210 static void
3211 sift_ctx_init (sctx, sifted_sts, limited_sts, last_node, last_str_idx,
3212                check_subexp)
3213     re_sift_context_t *sctx;
3214     re_dfastate_t **sifted_sts, **limited_sts;
3215     int last_node, last_str_idx, check_subexp;
3216 {
3217   sctx->sifted_states = sifted_sts;
3218   sctx->limited_states = limited_sts;
3219   sctx->last_node = last_node;
3220   sctx->last_str_idx = last_str_idx;
3221   sctx->check_subexp = check_subexp;
3222   re_node_set_init_empty (&sctx->limits);
3223 }