release/src/router/php/ext/pcre/pcrelib/pcre_exec.c

   1 /*************************************************
   2 *      Perl-Compatible Regular Expressions       *
   3 *************************************************/
   4
   5 /* PCRE is a library of functions to support regular expressions whose syntax
   6 and semantics are as close as possible to those of the Perl 5 language.
   7
   8                        Written by Philip Hazel
   9            Copyright (c) 1997-2012 University of Cambridge
  10
  11 -----------------------------------------------------------------------------
  12 Redistribution and use in source and binary forms, with or without
  13 modification, are permitted provided that the following conditions are met:
  14
  15     * Redistributions of source code must retain the above copyright notice,
  16       this list of conditions and the following disclaimer.
  17
  18     * Redistributions in binary form must reproduce the above copyright
  19       notice, this list of conditions and the following disclaimer in the
  20       documentation and/or other materials provided with the distribution.
  21
  22     * Neither the name of the University of Cambridge nor the names of its
  23       contributors may be used to endorse or promote products derived from
  24       this software without specific prior written permission.
  25
  26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  36 POSSIBILITY OF SUCH DAMAGE.
  37 -----------------------------------------------------------------------------
  38 */
  39
  40 /* This module contains pcre_exec(), the externally visible function that does
  41 pattern matching using an NFA algorithm, trying to mimic Perl as closely as
  42 possible. There are also some static supporting functions. */
  43
  44 #ifdef HAVE_CONFIG_H
  45 #include "config.h"
  46 #endif
  47
  48 #define NLBLOCK md             /* Block containing newline information */
  49 #define PSSTART start_subject  /* Field containing processed string start */
  50 #define PSEND   end_subject    /* Field containing processed string end */
  51
  52 #include "pcre_internal.h"
  53
  54 /* Undefine some potentially clashing cpp symbols */
  55
  56 #undef min
  57 #undef max
  58
  59 /* Values for setting in md->match_function_type to indicate two special types
  60 of call to match(). We do it this way to save on using another stack variable,
  61 as stack usage is to be discouraged. */
  62
  63 #define MATCH_CONDASSERT     1  /* Called to check a condition assertion */
  64 #define MATCH_CBEGROUP       2  /* Could-be-empty unlimited repeat group */
  65
  66 /* Non-error returns from the match() function. Error returns are externally
  67 defined PCRE_ERROR_xxx codes, which are all negative. */
  68
  69 #define MATCH_MATCH        1
  70 #define MATCH_NOMATCH      0
  71
  72 /* Special internal returns from the match() function. Make them sufficiently
  73 negative to avoid the external error codes. */
  74
  75 #define MATCH_ACCEPT       (-999)
  76 #define MATCH_COMMIT       (-998)
  77 #define MATCH_KETRPOS      (-997)
  78 #define MATCH_ONCE         (-996)
  79 #define MATCH_PRUNE        (-995)
  80 #define MATCH_SKIP         (-994)
  81 #define MATCH_SKIP_ARG     (-993)
  82 #define MATCH_THEN         (-992)
  83
  84 /* Maximum number of ints of offset to save on the stack for recursive calls.
  85 If the offset vector is bigger, malloc is used. This should be a multiple of 3,
  86 because the offset vector is always a multiple of 3 long. */
  87
  88 #define REC_STACK_SAVE_MAX 30
  89
  90 /* Min and max values for the common repeats; for the maxima, 0 => infinity */
  91
  92 static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
  93 static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
  94
  95 #ifdef PCRE_DEBUG
  96 /*************************************************
  97 *        Debugging function to print chars       *
  98 *************************************************/
  99
 100 /* Print a sequence of chars in printable format, stopping at the end of the
 101 subject if the requested.
 102
 103 Arguments:
 104   p           points to characters
 105   length      number to print
 106   is_subject  TRUE if printing from within md->start_subject
 107   md          pointer to matching data block, if is_subject is TRUE
 108
 109 Returns:     nothing
 110 */
 111
 112 static void
 113 pchars(const pcre_uchar *p, int length, BOOL is_subject, match_data *md)
 114 {
 115 pcre_uint32 c;
 116 BOOL utf = md->utf;
 117 if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
 118 while (length-- > 0)
 119   if (isprint(c = RAWUCHARINCTEST(p))) printf("%c", (char)c); else printf("\\x{%02x}", c);
 120 }
 121 #endif
 122
 123
 124
 125 /*************************************************
 126 *          Match a back-reference                *
 127 *************************************************/
 128
 129 /* Normally, if a back reference hasn't been set, the length that is passed is
 130 negative, so the match always fails. However, in JavaScript compatibility mode,
 131 the length passed is zero. Note that in caseless UTF-8 mode, the number of
 132 subject bytes matched may be different to the number of reference bytes.
 133
 134 Arguments:
 135   offset      index into the offset vector
 136   eptr        pointer into the subject
 137   length      length of reference to be matched (number of bytes)
 138   md          points to match data block
 139   caseless    TRUE if caseless
 140
 141 Returns:      >= 0 the number of subject bytes matched
 142               -1 no match
 143               -2 partial match; always given if at end subject
 144 */
 145
 146 static int
 147 match_ref(int offset, register PCRE_PUCHAR eptr, int length, match_data *md,
 148   BOOL caseless)
 149 {
 150 PCRE_PUCHAR eptr_start = eptr;
 151 register PCRE_PUCHAR p = md->start_subject + md->offset_vector[offset];
 152 #ifdef SUPPORT_UTF
 153 BOOL utf = md->utf;
 154 #endif
 155
 156 #ifdef PCRE_DEBUG
 157 if (eptr >= md->end_subject)
 158   printf("matching subject <null>");
 159 else
 160   {
 161   printf("matching subject ");
 162   pchars(eptr, length, TRUE, md);
 163   }
 164 printf(" against backref ");
 165 pchars(p, length, FALSE, md);
 166 printf("\n");
 167 #endif
 168
 169 /* Always fail if reference not set (and not JavaScript compatible - in that
 170 case the length is passed as zero). */
 171
 172 if (length < 0) return -1;
 173
 174 /* Separate the caseless case for speed. In UTF-8 mode we can only do this
 175 properly if Unicode properties are supported. Otherwise, we can check only
 176 ASCII characters. */
 177
 178 if (caseless)
 179   {
 180 #ifdef SUPPORT_UTF
 181 #ifdef SUPPORT_UCP
 182   if (utf)
 183     {
 184     /* Match characters up to the end of the reference. NOTE: the number of
 185     data units matched may differ, because in UTF-8 there are some characters
 186     whose upper and lower case versions code have different numbers of bytes.
 187     For example, U+023A (2 bytes in UTF-8) is the upper case version of U+2C65
 188     (3 bytes in UTF-8); a sequence of 3 of the former uses 6 bytes, as does a
 189     sequence of two of the latter. It is important, therefore, to check the
 190     length along the reference, not along the subject (earlier code did this
 191     wrong). */
 192
 193     PCRE_PUCHAR endptr = p + length;
 194     while (p < endptr)
 195       {
 196       pcre_uint32 c, d;
 197       const ucd_record *ur;
 198       if (eptr >= md->end_subject) return -2;   /* Partial match */
 199       GETCHARINC(c, eptr);
 200       GETCHARINC(d, p);
 201       ur = GET_UCD(d);
 202       if (c != d && c != d + ur->other_case)
 203         {
 204         const pcre_uint32 *pp = PRIV(ucd_caseless_sets) + ur->caseset;
 205         for (;;)
 206           {
 207           if (c < *pp) return -1;
 208           if (c == *pp++) break;
 209           }
 210         }
 211       }
 212     }
 213   else
 214 #endif
 215 #endif
 216
 217   /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
 218   is no UCP support. */
 219     {
 220     while (length-- > 0)
 221       {
 222       pcre_uchar cc, cp;
 223       if (eptr >= md->end_subject) return -2;   /* Partial match */
 224       cc = RAWUCHARTEST(eptr);
 225       cp = RAWUCHARTEST(p);
 226       if (TABLE_GET(cp, md->lcc, cp) != TABLE_GET(cc, md->lcc, cc)) return -1;
 227       p++;
 228       eptr++;
 229       }
 230     }
 231   }
 232
 233 /* In the caseful case, we can just compare the bytes, whether or not we
 234 are in UTF-8 mode. */
 235
 236 else
 237   {
 238   while (length-- > 0)
 239     {
 240     if (eptr >= md->end_subject) return -2;   /* Partial match */
 241     if (RAWUCHARINCTEST(p) != RAWUCHARINCTEST(eptr)) return -1;
 242     }
 243   }
 244
 245 return (int)(eptr - eptr_start);
 246 }
 247
 248
 249
 250 /***************************************************************************
 251 ****************************************************************************
 252                    RECURSION IN THE match() FUNCTION
 253
 254 The match() function is highly recursive, though not every recursive call
 255 increases the recursive depth. Nevertheless, some regular expressions can cause
 256 it to recurse to a great depth. I was writing for Unix, so I just let it call
 257 itself recursively. This uses the stack for saving everything that has to be
 258 saved for a recursive call. On Unix, the stack can be large, and this works
 259 fine.
 260
 261 It turns out that on some non-Unix-like systems there are problems with
 262 programs that use a lot of stack. (This despite the fact that every last chip
 263 has oodles of memory these days, and techniques for extending the stack have
 264 been known for decades.) So....
 265
 266 There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
 267 calls by keeping local variables that need to be preserved in blocks of memory
 268 obtained from malloc() instead instead of on the stack. Macros are used to
 269 achieve this so that the actual code doesn't look very different to what it
 270 always used to.
 271
 272 The original heap-recursive code used longjmp(). However, it seems that this
 273 can be very slow on some operating systems. Following a suggestion from Stan
 274 Switzer, the use of longjmp() has been abolished, at the cost of having to
 275 provide a unique number for each call to RMATCH. There is no way of generating
 276 a sequence of numbers at compile time in C. I have given them names, to make
 277 them stand out more clearly.
 278
 279 Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
 280 FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
 281 tests. Furthermore, not using longjmp() means that local dynamic variables
 282 don't have indeterminate values; this has meant that the frame size can be
 283 reduced because the result can be "passed back" by straight setting of the
 284 variable instead of being passed in the frame.
 285 ****************************************************************************
 286 ***************************************************************************/
 287
 288 /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
 289 below must be updated in sync.  */
 290
 291 enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
 292        RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
 293        RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
 294        RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
 295        RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
 296        RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
 297        RM61,  RM62, RM63, RM64, RM65, RM66, RM67 };
 298
 299 /* These versions of the macros use the stack, as normal. There are debugging
 300 versions and production versions. Note that the "rw" argument of RMATCH isn't
 301 actually used in this definition. */
 302
 303 #ifndef NO_RECURSE
 304 #define REGISTER register
 305
 306 #ifdef PCRE_DEBUG
 307 #define RMATCH(ra,rb,rc,rd,re,rw) \
 308   { \
 309   printf("match() called in line %d\n", __LINE__); \
 310   rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1); \
 311   printf("to line %d\n", __LINE__); \
 312   }
 313 #define RRETURN(ra) \
 314   { \
 315   printf("match() returned %d from line %d\n", ra, __LINE__); \
 316   return ra; \
 317   }
 318 #else
 319 #define RMATCH(ra,rb,rc,rd,re,rw) \
 320   rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1)
 321 #define RRETURN(ra) return ra
 322 #endif
 323
 324 #else
 325
 326
 327 /* These versions of the macros manage a private stack on the heap. Note that
 328 the "rd" argument of RMATCH isn't actually used in this definition. It's the md
 329 argument of match(), which never changes. */
 330
 331 #define REGISTER
 332
 333 #define RMATCH(ra,rb,rc,rd,re,rw)\
 334   {\
 335   heapframe *newframe = frame->Xnextframe;\
 336   if (newframe == NULL)\
 337     {\
 338     newframe = (heapframe *)(PUBL(stack_malloc))(sizeof(heapframe));\
 339     if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
 340     newframe->Xnextframe = NULL;\
 341     frame->Xnextframe = newframe;\
 342     }\
 343   frame->Xwhere = rw;\
 344   newframe->Xeptr = ra;\
 345   newframe->Xecode = rb;\
 346   newframe->Xmstart = mstart;\
 347   newframe->Xoffset_top = rc;\
 348   newframe->Xeptrb = re;\
 349   newframe->Xrdepth = frame->Xrdepth + 1;\
 350   newframe->Xprevframe = frame;\
 351   frame = newframe;\
 352   DPRINTF(("restarting from line %d\n", __LINE__));\
 353   goto HEAP_RECURSE;\
 354   L_##rw:\
 355   DPRINTF(("jumped back to line %d\n", __LINE__));\
 356   }
 357
 358 #define RRETURN(ra)\
 359   {\
 360   heapframe *oldframe = frame;\
 361   frame = oldframe->Xprevframe;\
 362   if (frame != NULL)\
 363     {\
 364     rrc = ra;\
 365     goto HEAP_RETURN;\
 366     }\
 367   return ra;\
 368   }
 369
 370
 371 /* Structure for remembering the local variables in a private frame */
 372
 373 typedef struct heapframe {
 374   struct heapframe *Xprevframe;
 375   struct heapframe *Xnextframe;
 376
 377   /* Function arguments that may change */
 378
 379   PCRE_PUCHAR Xeptr;
 380   const pcre_uchar *Xecode;
 381   PCRE_PUCHAR Xmstart;
 382   int Xoffset_top;
 383   eptrblock *Xeptrb;
 384   unsigned int Xrdepth;
 385
 386   /* Function local variables */
 387
 388   PCRE_PUCHAR Xcallpat;
 389 #ifdef SUPPORT_UTF
 390   PCRE_PUCHAR Xcharptr;
 391 #endif
 392   PCRE_PUCHAR Xdata;
 393   PCRE_PUCHAR Xnext;
 394   PCRE_PUCHAR Xpp;
 395   PCRE_PUCHAR Xprev;
 396   PCRE_PUCHAR Xsaved_eptr;
 397
 398   recursion_info Xnew_recursive;
 399
 400   BOOL Xcur_is_word;
 401   BOOL Xcondition;
 402   BOOL Xprev_is_word;
 403
 404 #ifdef SUPPORT_UCP
 405   int Xprop_type;
 406   unsigned int Xprop_value;
 407   int Xprop_fail_result;
 408   int Xoclength;
 409   pcre_uchar Xocchars[6];
 410 #endif
 411
 412   int Xcodelink;
 413   int Xctype;
 414   unsigned int Xfc;
 415   int Xfi;
 416   int Xlength;
 417   int Xmax;
 418   int Xmin;
 419   int Xnumber;
 420   int Xoffset;
 421   int Xop;
 422   int Xsave_capture_last;
 423   int Xsave_offset1, Xsave_offset2, Xsave_offset3;
 424   int Xstacksave[REC_STACK_SAVE_MAX];
 425
 426   eptrblock Xnewptrb;
 427
 428   /* Where to jump back to */
 429
 430   int Xwhere;
 431
 432 } heapframe;
 433
 434 #endif
 435
 436
 437 /***************************************************************************
 438 ***************************************************************************/
 439
 440
 441
 442 /*************************************************
 443 *         Match from current position            *
 444 *************************************************/
 445
 446 /* This function is called recursively in many circumstances. Whenever it
 447 returns a negative (error) response, the outer incarnation must also return the
 448 same response. */
 449
 450 /* These macros pack up tests that are used for partial matching, and which
 451 appear several times in the code. We set the "hit end" flag if the pointer is
 452 at the end of the subject and also past the start of the subject (i.e.
 453 something has been matched). For hard partial matching, we then return
 454 immediately. The second one is used when we already know we are past the end of
 455 the subject. */
 456
 457 #define CHECK_PARTIAL()\
 458   if (md->partial != 0 && eptr >= md->end_subject && \
 459       eptr > md->start_used_ptr) \
 460     { \
 461     md->hitend = TRUE; \
 462     if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); \
 463     }
 464
 465 #define SCHECK_PARTIAL()\
 466   if (md->partial != 0 && eptr > md->start_used_ptr) \
 467     { \
 468     md->hitend = TRUE; \
 469     if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); \
 470     }
 471
 472
 473 /* Performance note: It might be tempting to extract commonly used fields from
 474 the md structure (e.g. utf, end_subject) into individual variables to improve
 475 performance. Tests using gcc on a SPARC disproved this; in the first case, it
 476 made performance worse.
 477
 478 Arguments:
 479    eptr        pointer to current character in subject
 480    ecode       pointer to current position in compiled code
 481    mstart      pointer to the current match start position (can be modified
 482                  by encountering \K)
 483    offset_top  current top pointer
 484    md          pointer to "static" info for the match
 485    eptrb       pointer to chain of blocks containing eptr at start of
 486                  brackets - for testing for empty matches
 487    rdepth      the recursion depth
 488
 489 Returns:       MATCH_MATCH if matched            )  these values are >= 0
 490                MATCH_NOMATCH if failed to match  )
 491                a negative MATCH_xxx value for PRUNE, SKIP, etc
 492                a negative PCRE_ERROR_xxx value if aborted by an error condition
 493                  (e.g. stopped by repeated call or recursion limit)
 494 */
 495
 496 static int
 497 match(REGISTER PCRE_PUCHAR eptr, REGISTER const pcre_uchar *ecode,
 498   PCRE_PUCHAR mstart, int offset_top, match_data *md, eptrblock *eptrb,
 499   unsigned int rdepth)
 500 {
 501 /* These variables do not need to be preserved over recursion in this function,
 502 so they can be ordinary variables in all cases. Mark some of them with
 503 "register" because they are used a lot in loops. */
 504
 505 register int  rrc;         /* Returns from recursive calls */
 506 register int  i;           /* Used for loops not involving calls to RMATCH() */
 507 register pcre_uint32 c;    /* Character values not kept over RMATCH() calls */
 508 register BOOL utf;         /* Local copy of UTF flag for speed */
 509
 510 BOOL minimize, possessive; /* Quantifier options */
 511 BOOL caseless;
 512 int condcode;
 513
 514 /* When recursion is not being used, all "local" variables that have to be
 515 preserved over calls to RMATCH() are part of a "frame". We set up the top-level
 516 frame on the stack here; subsequent instantiations are obtained from the heap
 517 whenever RMATCH() does a "recursion". See the macro definitions above. Putting
 518 the top-level on the stack rather than malloc-ing them all gives a performance
 519 boost in many cases where there is not much "recursion". */
 520
 521 #ifdef NO_RECURSE
 522 heapframe *frame = (heapframe *)md->match_frames_base;
 523
 524 /* Copy in the original argument variables */
 525
 526 frame->Xeptr = eptr;
 527 frame->Xecode = ecode;
 528 frame->Xmstart = mstart;
 529 frame->Xoffset_top = offset_top;
 530 frame->Xeptrb = eptrb;
 531 frame->Xrdepth = rdepth;
 532
 533 /* This is where control jumps back to to effect "recursion" */
 534
 535 HEAP_RECURSE:
 536
 537 /* Macros make the argument variables come from the current frame */
 538
 539 #define eptr               frame->Xeptr
 540 #define ecode              frame->Xecode
 541 #define mstart             frame->Xmstart
 542 #define offset_top         frame->Xoffset_top
 543 #define eptrb              frame->Xeptrb
 544 #define rdepth             frame->Xrdepth
 545
 546 /* Ditto for the local variables */
 547
 548 #ifdef SUPPORT_UTF
 549 #define charptr            frame->Xcharptr
 550 #endif
 551 #define callpat            frame->Xcallpat
 552 #define codelink           frame->Xcodelink
 553 #define data               frame->Xdata
 554 #define next               frame->Xnext
 555 #define pp                 frame->Xpp
 556 #define prev               frame->Xprev
 557 #define saved_eptr         frame->Xsaved_eptr
 558
 559 #define new_recursive      frame->Xnew_recursive
 560
 561 #define cur_is_word        frame->Xcur_is_word
 562 #define condition          frame->Xcondition
 563 #define prev_is_word       frame->Xprev_is_word
 564
 565 #ifdef SUPPORT_UCP
 566 #define prop_type          frame->Xprop_type
 567 #define prop_value         frame->Xprop_value
 568 #define prop_fail_result   frame->Xprop_fail_result
 569 #define oclength           frame->Xoclength
 570 #define occhars            frame->Xocchars
 571 #endif
 572
 573 #define ctype              frame->Xctype
 574 #define fc                 frame->Xfc
 575 #define fi                 frame->Xfi
 576 #define length             frame->Xlength
 577 #define max                frame->Xmax
 578 #define min                frame->Xmin
 579 #define number             frame->Xnumber
 580 #define offset             frame->Xoffset
 581 #define op                 frame->Xop
 582 #define save_capture_last  frame->Xsave_capture_last
 583 #define save_offset1       frame->Xsave_offset1
 584 #define save_offset2       frame->Xsave_offset2
 585 #define save_offset3       frame->Xsave_offset3
 586 #define stacksave          frame->Xstacksave
 587
 588 #define newptrb            frame->Xnewptrb
 589
 590 /* When recursion is being used, local variables are allocated on the stack and
 591 get preserved during recursion in the normal way. In this environment, fi and
 592 i, and fc and c, can be the same variables. */
 593
 594 #else         /* NO_RECURSE not defined */
 595 #define fi i
 596 #define fc c
 597
 598 /* Many of the following variables are used only in small blocks of the code.
 599 My normal style of coding would have declared them within each of those blocks.
 600 However, in order to accommodate the version of this code that uses an external
 601 "stack" implemented on the heap, it is easier to declare them all here, so the
 602 declarations can be cut out in a block. The only declarations within blocks
 603 below are for variables that do not have to be preserved over a recursive call
 604 to RMATCH(). */
 605
 606 #ifdef SUPPORT_UTF
 607 const pcre_uchar *charptr;
 608 #endif
 609 const pcre_uchar *callpat;
 610 const pcre_uchar *data;
 611 const pcre_uchar *next;
 612 PCRE_PUCHAR       pp;
 613 const pcre_uchar *prev;
 614 PCRE_PUCHAR       saved_eptr;
 615
 616 recursion_info new_recursive;
 617
 618 BOOL cur_is_word;
 619 BOOL condition;
 620 BOOL prev_is_word;
 621
 622 #ifdef SUPPORT_UCP
 623 int prop_type;
 624 unsigned int prop_value;
 625 int prop_fail_result;
 626 int oclength;
 627 pcre_uchar occhars[6];
 628 #endif
 629
 630 int codelink;
 631 int ctype;
 632 int length;
 633 int max;
 634 int min;
 635 unsigned int number;
 636 int offset;
 637 pcre_uchar op;
 638 int save_capture_last;
 639 int save_offset1, save_offset2, save_offset3;
 640 int stacksave[REC_STACK_SAVE_MAX];
 641
 642 eptrblock newptrb;
 643
 644 /* There is a special fudge for calling match() in a way that causes it to
 645 measure the size of its basic stack frame when the stack is being used for
 646 recursion. The second argument (ecode) being NULL triggers this behaviour. It
 647 cannot normally ever be NULL. The return is the negated value of the frame
 648 size. */
 649
 650 if (ecode == NULL)
 651   {
 652   if (rdepth == 0)
 653     return match((PCRE_PUCHAR)&rdepth, NULL, NULL, 0, NULL, NULL, 1);
 654   else
 655     {
 656     int len = (char *)&rdepth - (char *)eptr;
 657     return (len > 0)? -len : len;
 658     }
 659   }
 660 #endif     /* NO_RECURSE */
 661
 662 /* To save space on the stack and in the heap frame, I have doubled up on some
 663 of the local variables that are used only in localised parts of the code, but
 664 still need to be preserved over recursive calls of match(). These macros define
 665 the alternative names that are used. */
 666
 667 #define allow_zero    cur_is_word
 668 #define cbegroup      condition
 669 #define code_offset   codelink
 670 #define condassert    condition
 671 #define matched_once  prev_is_word
 672 #define foc           number
 673 #define save_mark     data
 674
 675 /* These statements are here to stop the compiler complaining about unitialized
 676 variables. */
 677
 678 #ifdef SUPPORT_UCP
 679 prop_value = 0;
 680 prop_fail_result = 0;
 681 #endif
 682
 683
 684 /* This label is used for tail recursion, which is used in a few cases even
 685 when NO_RECURSE is not defined, in order to reduce the amount of stack that is
 686 used. Thanks to Ian Taylor for noticing this possibility and sending the
 687 original patch. */
 688
 689 TAIL_RECURSE:
 690
 691 /* OK, now we can get on with the real code of the function. Recursive calls
 692 are specified by the macro RMATCH and RRETURN is used to return. When
 693 NO_RECURSE is *not* defined, these just turn into a recursive call to match()
 694 and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
 695 defined). However, RMATCH isn't like a function call because it's quite a
 696 complicated macro. It has to be used in one particular way. This shouldn't,
 697 however, impact performance when true recursion is being used. */
 698
 699 #ifdef SUPPORT_UTF
 700 utf = md->utf;       /* Local copy of the flag */
 701 #else
 702 utf = FALSE;
 703 #endif
 704
 705 /* First check that we haven't called match() too many times, or that we
 706 haven't exceeded the recursive call limit. */
 707
 708 if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
 709 if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
 710
 711 /* At the start of a group with an unlimited repeat that may match an empty
 712 string, the variable md->match_function_type is set to MATCH_CBEGROUP. It is
 713 done this way to save having to use another function argument, which would take
 714 up space on the stack. See also MATCH_CONDASSERT below.
 715
 716 When MATCH_CBEGROUP is set, add the current subject pointer to the chain of
 717 such remembered pointers, to be checked when we hit the closing ket, in order
 718 to break infinite loops that match no characters. When match() is called in
 719 other circumstances, don't add to the chain. The MATCH_CBEGROUP feature must
 720 NOT be used with tail recursion, because the memory block that is used is on
 721 the stack, so a new one may be required for each match(). */
 722
 723 if (md->match_function_type == MATCH_CBEGROUP)
 724   {
 725   newptrb.epb_saved_eptr = eptr;
 726   newptrb.epb_prev = eptrb;
 727   eptrb = &newptrb;
 728   md->match_function_type = 0;
 729   }
 730
 731 /* Now start processing the opcodes. */
 732
 733 for (;;)
 734   {
 735   minimize = possessive = FALSE;
 736   op = *ecode;
 737
 738   switch(op)
 739     {
 740     case OP_MARK:
 741     md->nomatch_mark = ecode + 2;
 742     md->mark = NULL;    /* In case previously set by assertion */
 743     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
 744       eptrb, RM55);
 745     if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
 746          md->mark == NULL) md->mark = ecode + 2;
 747
 748     /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
 749     argument, and we must check whether that argument matches this MARK's
 750     argument. It is passed back in md->start_match_ptr (an overloading of that
 751     variable). If it does match, we reset that variable to the current subject
 752     position and return MATCH_SKIP. Otherwise, pass back the return code
 753     unaltered. */
 754
 755     else if (rrc == MATCH_SKIP_ARG &&
 756         STRCMP_UC_UC_TEST(ecode + 2, md->start_match_ptr) == 0)
 757       {
 758       md->start_match_ptr = eptr;
 759       RRETURN(MATCH_SKIP);
 760       }
 761     RRETURN(rrc);
 762
 763     case OP_FAIL:
 764     RRETURN(MATCH_NOMATCH);
 765
 766     /* COMMIT overrides PRUNE, SKIP, and THEN */
 767
 768     case OP_COMMIT:
 769     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
 770       eptrb, RM52);
 771     if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&
 772         rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&
 773         rrc != MATCH_THEN)
 774       RRETURN(rrc);
 775     RRETURN(MATCH_COMMIT);
 776
 777     /* PRUNE overrides THEN */
 778
 779     case OP_PRUNE:
 780     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
 781       eptrb, RM51);
 782     if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
 783     RRETURN(MATCH_PRUNE);
 784
 785     case OP_PRUNE_ARG:
 786     md->nomatch_mark = ecode + 2;
 787     md->mark = NULL;    /* In case previously set by assertion */
 788     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
 789       eptrb, RM56);
 790     if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
 791          md->mark == NULL) md->mark = ecode + 2;
 792     if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
 793     RRETURN(MATCH_PRUNE);
 794
 795     /* SKIP overrides PRUNE and THEN */
 796
 797     case OP_SKIP:
 798     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
 799       eptrb, RM53);
 800     if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
 801       RRETURN(rrc);
 802     md->start_match_ptr = eptr;   /* Pass back current position */
 803     RRETURN(MATCH_SKIP);
 804
 805     /* Note that, for Perl compatibility, SKIP with an argument does NOT set
 806     nomatch_mark. There is a flag that disables this opcode when re-matching a
 807     pattern that ended with a SKIP for which there was not a matching MARK. */
 808
 809     case OP_SKIP_ARG:
 810     if (md->ignore_skip_arg)
 811       {
 812       ecode += PRIV(OP_lengths)[*ecode] + ecode[1];
 813       break;
 814       }
 815     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
 816       eptrb, RM57);
 817     if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
 818       RRETURN(rrc);
 819
 820     /* Pass back the current skip name by overloading md->start_match_ptr and
 821     returning the special MATCH_SKIP_ARG return code. This will either be
 822     caught by a matching MARK, or get to the top, where it causes a rematch
 823     with the md->ignore_skip_arg flag set. */
 824
 825     md->start_match_ptr = ecode + 2;
 826     RRETURN(MATCH_SKIP_ARG);
 827
 828     /* For THEN (and THEN_ARG) we pass back the address of the opcode, so that
 829     the branch in which it occurs can be determined. Overload the start of
 830     match pointer to do this. */
 831
 832     case OP_THEN:
 833     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
 834       eptrb, RM54);
 835     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 836     md->start_match_ptr = ecode;
 837     RRETURN(MATCH_THEN);
 838
 839     case OP_THEN_ARG:
 840     md->nomatch_mark = ecode + 2;
 841     md->mark = NULL;    /* In case previously set by assertion */
 842     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top,
 843       md, eptrb, RM58);
 844     if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
 845          md->mark == NULL) md->mark = ecode + 2;
 846     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 847     md->start_match_ptr = ecode;
 848     RRETURN(MATCH_THEN);
 849
 850     /* Handle an atomic group that does not contain any capturing parentheses.
 851     This can be handled like an assertion. Prior to 8.13, all atomic groups
 852     were handled this way. In 8.13, the code was changed as below for ONCE, so
 853     that backups pass through the group and thereby reset captured values.
 854     However, this uses a lot more stack, so in 8.20, atomic groups that do not
 855     contain any captures generate OP_ONCE_NC, which can be handled in the old,
 856     less stack intensive way.
 857
 858     Check the alternative branches in turn - the matching won't pass the KET
 859     for this kind of subpattern. If any one branch matches, we carry on as at
 860     the end of a normal bracket, leaving the subject pointer, but resetting
 861     the start-of-match value in case it was changed by \K. */
 862
 863     case OP_ONCE_NC:
 864     prev = ecode;
 865     saved_eptr = eptr;
 866     save_mark = md->mark;
 867     do
 868       {
 869       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM64);
 870       if (rrc == MATCH_MATCH)  /* Note: _not_ MATCH_ACCEPT */
 871         {
 872         mstart = md->start_match_ptr;
 873         break;
 874         }
 875       if (rrc == MATCH_THEN)
 876         {
 877         next = ecode + GET(ecode,1);
 878         if (md->start_match_ptr < next &&
 879             (*ecode == OP_ALT || *next == OP_ALT))
 880           rrc = MATCH_NOMATCH;
 881         }
 882
 883       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 884       ecode += GET(ecode,1);
 885       md->mark = save_mark;
 886       }
 887     while (*ecode == OP_ALT);
 888
 889     /* If hit the end of the group (which could be repeated), fail */
 890
 891     if (*ecode != OP_ONCE_NC && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
 892
 893     /* Continue as from after the group, updating the offsets high water
 894     mark, since extracts may have been taken. */
 895
 896     do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
 897
 898     offset_top = md->end_offset_top;
 899     eptr = md->end_match_ptr;
 900
 901     /* For a non-repeating ket, just continue at this level. This also
 902     happens for a repeating ket if no characters were matched in the group.
 903     This is the forcible breaking of infinite loops as implemented in Perl
 904     5.005. */
 905
 906     if (*ecode == OP_KET || eptr == saved_eptr)
 907       {
 908       ecode += 1+LINK_SIZE;
 909       break;
 910       }
 911
 912     /* The repeating kets try the rest of the pattern or restart from the
 913     preceding bracket, in the appropriate order. The second "call" of match()
 914     uses tail recursion, to avoid using another stack frame. */
 915
 916     if (*ecode == OP_KETRMIN)
 917       {
 918       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM65);
 919       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 920       ecode = prev;
 921       goto TAIL_RECURSE;
 922       }
 923     else  /* OP_KETRMAX */
 924       {
 925       RMATCH(eptr, prev, offset_top, md, eptrb, RM66);
 926       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 927       ecode += 1 + LINK_SIZE;
 928       goto TAIL_RECURSE;
 929       }
 930     /* Control never gets here */
 931
 932     /* Handle a capturing bracket, other than those that are possessive with an
 933     unlimited repeat. If there is space in the offset vector, save the current
 934     subject position in the working slot at the top of the vector. We mustn't
 935     change the current values of the data slot, because they may be set from a
 936     previous iteration of this group, and be referred to by a reference inside
 937     the group. A failure to match might occur after the group has succeeded,
 938     if something later on doesn't match. For this reason, we need to restore
 939     the working value and also the values of the final offsets, in case they
 940     were set by a previous iteration of the same bracket.
 941
 942     If there isn't enough space in the offset vector, treat this as if it were
 943     a non-capturing bracket. Don't worry about setting the flag for the error
 944     case here; that is handled in the code for KET. */
 945
 946     case OP_CBRA:
 947     case OP_SCBRA:
 948     number = GET2(ecode, 1+LINK_SIZE);
 949     offset = number << 1;
 950
 951 #ifdef PCRE_DEBUG
 952     printf("start bracket %d\n", number);
 953     printf("subject=");
 954     pchars(eptr, 16, TRUE, md);
 955     printf("\n");
 956 #endif
 957
 958     if (offset < md->offset_max)
 959       {
 960       save_offset1 = md->offset_vector[offset];
 961       save_offset2 = md->offset_vector[offset+1];
 962       save_offset3 = md->offset_vector[md->offset_end - number];
 963       save_capture_last = md->capture_last;
 964       save_mark = md->mark;
 965
 966       DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
 967       md->offset_vector[md->offset_end - number] =
 968         (int)(eptr - md->start_subject);
 969
 970       for (;;)
 971         {
 972         if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
 973         RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
 974           eptrb, RM1);
 975         if (rrc == MATCH_ONCE) break;  /* Backing up through an atomic group */
 976
 977         /* If we backed up to a THEN, check whether it is within the current
 978         branch by comparing the address of the THEN that is passed back with
 979         the end of the branch. If it is within the current branch, and the
 980         branch is one of two or more alternatives (it either starts or ends
 981         with OP_ALT), we have reached the limit of THEN's action, so convert
 982         the return code to NOMATCH, which will cause normal backtracking to
 983         happen from now on. Otherwise, THEN is passed back to an outer
 984         alternative. This implements Perl's treatment of parenthesized groups,
 985         where a group not containing | does not affect the current alternative,
 986         that is, (X) is NOT the same as (X|(*F)). */
 987
 988         if (rrc == MATCH_THEN)
 989           {
 990           next = ecode + GET(ecode,1);
 991           if (md->start_match_ptr < next &&
 992               (*ecode == OP_ALT || *next == OP_ALT))
 993             rrc = MATCH_NOMATCH;
 994           }
 995
 996         /* Anything other than NOMATCH is passed back. */
 997
 998         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 999         md->capture_last = save_capture_last;
1000         ecode += GET(ecode, 1);
1001         md->mark = save_mark;
1002         if (*ecode != OP_ALT) break;
1003         }
1004
1005       DPRINTF(("bracket %d failed\n", number));
1006       md->offset_vector[offset] = save_offset1;
1007       md->offset_vector[offset+1] = save_offset2;
1008       md->offset_vector[md->offset_end - number] = save_offset3;
1009
1010       /* At this point, rrc will be one of MATCH_ONCE or MATCH_NOMATCH. */
1011
1012       RRETURN(rrc);
1013       }
1014
1015     /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
1016     as a non-capturing bracket. */
1017
1018     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1019     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1020
1021     DPRINTF(("insufficient capture room: treat as non-capturing\n"));
1022
1023     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1024     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1025
1026     /* Non-capturing or atomic group, except for possessive with unlimited
1027     repeat and ONCE group with no captures. Loop for all the alternatives.
1028
1029     When we get to the final alternative within the brackets, we used to return
1030     the result of a recursive call to match() whatever happened so it was
1031     possible to reduce stack usage by turning this into a tail recursion,
1032     except in the case of a possibly empty group. However, now that there is
1033     the possiblity of (*THEN) occurring in the final alternative, this
1034     optimization is no longer always possible.
1035
1036     We can optimize if we know there are no (*THEN)s in the pattern; at present
1037     this is the best that can be done.
1038
1039     MATCH_ONCE is returned when the end of an atomic group is successfully
1040     reached, but subsequent matching fails. It passes back up the tree (causing
1041     captured values to be reset) until the original atomic group level is
1042     reached. This is tested by comparing md->once_target with the start of the
1043     group. At this point, the return is converted into MATCH_NOMATCH so that
1044     previous backup points can be taken. */
1045
1046     case OP_ONCE:
1047     case OP_BRA:
1048     case OP_SBRA:
1049     DPRINTF(("start non-capturing bracket\n"));
1050
1051     for (;;)
1052       {
1053       if (op >= OP_SBRA || op == OP_ONCE)
1054         md->match_function_type = MATCH_CBEGROUP;
1055
1056       /* If this is not a possibly empty group, and there are no (*THEN)s in
1057       the pattern, and this is the final alternative, optimize as described
1058       above. */
1059
1060       else if (!md->hasthen && ecode[GET(ecode, 1)] != OP_ALT)
1061         {
1062         ecode += PRIV(OP_lengths)[*ecode];
1063         goto TAIL_RECURSE;
1064         }
1065
1066       /* In all other cases, we have to make another call to match(). */
1067
1068       save_mark = md->mark;
1069       RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb,
1070         RM2);
1071
1072       /* See comment in the code for capturing groups above about handling
1073       THEN. */
1074
1075       if (rrc == MATCH_THEN)
1076         {
1077         next = ecode + GET(ecode,1);
1078         if (md->start_match_ptr < next &&
1079             (*ecode == OP_ALT || *next == OP_ALT))
1080           rrc = MATCH_NOMATCH;
1081         }
1082
1083       if (rrc != MATCH_NOMATCH)
1084         {
1085         if (rrc == MATCH_ONCE)
1086           {
1087           const pcre_uchar *scode = ecode;
1088           if (*scode != OP_ONCE)           /* If not at start, find it */
1089             {
1090             while (*scode == OP_ALT) scode += GET(scode, 1);
1091             scode -= GET(scode, 1);
1092             }
1093           if (md->once_target == scode) rrc = MATCH_NOMATCH;
1094           }
1095         RRETURN(rrc);
1096         }
1097       ecode += GET(ecode, 1);
1098       md->mark = save_mark;
1099       if (*ecode != OP_ALT) break;
1100       }
1101
1102     RRETURN(MATCH_NOMATCH);
1103
1104     /* Handle possessive capturing brackets with an unlimited repeat. We come
1105     here from BRAZERO with allow_zero set TRUE. The offset_vector values are
1106     handled similarly to the normal case above. However, the matching is
1107     different. The end of these brackets will always be OP_KETRPOS, which
1108     returns MATCH_KETRPOS without going further in the pattern. By this means
1109     we can handle the group by iteration rather than recursion, thereby
1110     reducing the amount of stack needed. */
1111
1112     case OP_CBRAPOS:
1113     case OP_SCBRAPOS:
1114     allow_zero = FALSE;
1115
1116     POSSESSIVE_CAPTURE:
1117     number = GET2(ecode, 1+LINK_SIZE);
1118     offset = number << 1;
1119
1120 #ifdef PCRE_DEBUG
1121     printf("start possessive bracket %d\n", number);
1122     printf("subject=");
1123     pchars(eptr, 16, TRUE, md);
1124     printf("\n");
1125 #endif
1126
1127     if (offset < md->offset_max)
1128       {
1129       matched_once = FALSE;
1130       code_offset = (int)(ecode - md->start_code);
1131
1132       save_offset1 = md->offset_vector[offset];
1133       save_offset2 = md->offset_vector[offset+1];
1134       save_offset3 = md->offset_vector[md->offset_end - number];
1135       save_capture_last = md->capture_last;
1136
1137       DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
1138
1139       /* Each time round the loop, save the current subject position for use
1140       when the group matches. For MATCH_MATCH, the group has matched, so we
1141       restart it with a new subject starting position, remembering that we had
1142       at least one match. For MATCH_NOMATCH, carry on with the alternatives, as
1143       usual. If we haven't matched any alternatives in any iteration, check to
1144       see if a previous iteration matched. If so, the group has matched;
1145       continue from afterwards. Otherwise it has failed; restore the previous
1146       capture values before returning NOMATCH. */
1147
1148       for (;;)
1149         {
1150         md->offset_vector[md->offset_end - number] =
1151           (int)(eptr - md->start_subject);
1152         if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
1153         RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1154           eptrb, RM63);
1155         if (rrc == MATCH_KETRPOS)
1156           {
1157           offset_top = md->end_offset_top;
1158           eptr = md->end_match_ptr;
1159           ecode = md->start_code + code_offset;
1160           save_capture_last = md->capture_last;
1161           matched_once = TRUE;
1162           continue;
1163           }
1164
1165         /* See comment in the code for capturing groups above about handling
1166         THEN. */
1167
1168         if (rrc == MATCH_THEN)
1169           {
1170           next = ecode + GET(ecode,1);
1171           if (md->start_match_ptr < next &&
1172               (*ecode == OP_ALT || *next == OP_ALT))
1173             rrc = MATCH_NOMATCH;
1174           }
1175
1176         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1177         md->capture_last = save_capture_last;
1178         ecode += GET(ecode, 1);
1179         if (*ecode != OP_ALT) break;
1180         }
1181
1182       if (!matched_once)
1183         {
1184         md->offset_vector[offset] = save_offset1;
1185         md->offset_vector[offset+1] = save_offset2;
1186         md->offset_vector[md->offset_end - number] = save_offset3;
1187         }
1188
1189       if (allow_zero || matched_once)
1190         {
1191         ecode += 1 + LINK_SIZE;
1192         break;
1193         }
1194
1195       RRETURN(MATCH_NOMATCH);
1196       }
1197
1198     /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
1199     as a non-capturing bracket. */
1200
1201     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1202     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1203
1204     DPRINTF(("insufficient capture room: treat as non-capturing\n"));
1205
1206     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1207     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1208
1209     /* Non-capturing possessive bracket with unlimited repeat. We come here
1210     from BRAZERO with allow_zero = TRUE. The code is similar to the above,
1211     without the capturing complication. It is written out separately for speed
1212     and cleanliness. */
1213
1214     case OP_BRAPOS:
1215     case OP_SBRAPOS:
1216     allow_zero = FALSE;
1217
1218     POSSESSIVE_NON_CAPTURE:
1219     matched_once = FALSE;
1220     code_offset = (int)(ecode - md->start_code);
1221
1222     for (;;)
1223       {
1224       if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
1225       RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1226         eptrb, RM48);
1227       if (rrc == MATCH_KETRPOS)
1228         {
1229         offset_top = md->end_offset_top;
1230         eptr = md->end_match_ptr;
1231         ecode = md->start_code + code_offset;
1232         matched_once = TRUE;
1233         continue;
1234         }
1235
1236       /* See comment in the code for capturing groups above about handling
1237       THEN. */
1238
1239       if (rrc == MATCH_THEN)
1240         {
1241         next = ecode + GET(ecode,1);
1242         if (md->start_match_ptr < next &&
1243             (*ecode == OP_ALT || *next == OP_ALT))
1244           rrc = MATCH_NOMATCH;
1245         }
1246
1247       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1248       ecode += GET(ecode, 1);
1249       if (*ecode != OP_ALT) break;
1250       }
1251
1252     if (matched_once || allow_zero)
1253       {
1254       ecode += 1 + LINK_SIZE;
1255       break;
1256       }
1257     RRETURN(MATCH_NOMATCH);
1258
1259     /* Control never reaches here. */
1260
1261     /* Conditional group: compilation checked that there are no more than
1262     two branches. If the condition is false, skipping the first branch takes us
1263     past the end if there is only one branch, but that's OK because that is
1264     exactly what going to the ket would do. */
1265
1266     case OP_COND:
1267     case OP_SCOND:
1268     codelink = GET(ecode, 1);
1269
1270     /* Because of the way auto-callout works during compile, a callout item is
1271     inserted between OP_COND and an assertion condition. */
1272
1273     if (ecode[LINK_SIZE+1] == OP_CALLOUT)
1274       {
1275       if (PUBL(callout) != NULL)
1276         {
1277         PUBL(callout_block) cb;
1278         cb.version          = 2;   /* Version 1 of the callout block */
1279         cb.callout_number   = ecode[LINK_SIZE+2];
1280         cb.offset_vector    = md->offset_vector;
1281 #if defined COMPILE_PCRE8
1282         cb.subject          = (PCRE_SPTR)md->start_subject;
1283 #elif defined COMPILE_PCRE16
1284         cb.subject          = (PCRE_SPTR16)md->start_subject;
1285 #elif defined COMPILE_PCRE32
1286         cb.subject          = (PCRE_SPTR32)md->start_subject;
1287 #endif
1288         cb.subject_length   = (int)(md->end_subject - md->start_subject);
1289         cb.start_match      = (int)(mstart - md->start_subject);
1290         cb.current_position = (int)(eptr - md->start_subject);
1291         cb.pattern_position = GET(ecode, LINK_SIZE + 3);
1292         cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
1293         cb.capture_top      = offset_top/2;
1294         cb.capture_last     = md->capture_last;
1295         cb.callout_data     = md->callout_data;
1296         cb.mark             = md->nomatch_mark;
1297         if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1298         if (rrc < 0) RRETURN(rrc);
1299         }
1300       ecode += PRIV(OP_lengths)[OP_CALLOUT];
1301       }
1302
1303     condcode = ecode[LINK_SIZE+1];
1304
1305     /* Now see what the actual condition is */
1306
1307     if (condcode == OP_RREF || condcode == OP_NRREF)    /* Recursion test */
1308       {
1309       if (md->recursive == NULL)                /* Not recursing => FALSE */
1310         {
1311         condition = FALSE;
1312         ecode += GET(ecode, 1);
1313         }
1314       else
1315         {
1316         unsigned int recno = GET2(ecode, LINK_SIZE + 2);   /* Recursion group number*/
1317         condition = (recno == RREF_ANY || recno == md->recursive->group_num);
1318
1319         /* If the test is for recursion into a specific subpattern, and it is
1320         false, but the test was set up by name, scan the table to see if the
1321         name refers to any other numbers, and test them. The condition is true
1322         if any one is set. */
1323
1324         if (!condition && condcode == OP_NRREF)
1325           {
1326           pcre_uchar *slotA = md->name_table;
1327           for (i = 0; i < md->name_count; i++)
1328             {
1329             if (GET2(slotA, 0) == recno) break;
1330             slotA += md->name_entry_size;
1331             }
1332
1333           /* Found a name for the number - there can be only one; duplicate
1334           names for different numbers are allowed, but not vice versa. First
1335           scan down for duplicates. */
1336
1337           if (i < md->name_count)
1338             {
1339             pcre_uchar *slotB = slotA;
1340             while (slotB > md->name_table)
1341               {
1342               slotB -= md->name_entry_size;
1343               if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1344                 {
1345                 condition = GET2(slotB, 0) == md->recursive->group_num;
1346                 if (condition) break;
1347                 }
1348               else break;
1349               }
1350
1351             /* Scan up for duplicates */
1352
1353             if (!condition)
1354               {
1355               slotB = slotA;
1356               for (i++; i < md->name_count; i++)
1357                 {
1358                 slotB += md->name_entry_size;
1359                 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1360                   {
1361                   condition = GET2(slotB, 0) == md->recursive->group_num;
1362                   if (condition) break;
1363                   }
1364                 else break;
1365                 }
1366               }
1367             }
1368           }
1369
1370         /* Chose branch according to the condition */
1371
1372         ecode += condition? 1 + IMM2_SIZE : GET(ecode, 1);
1373         }
1374       }
1375
1376     else if (condcode == OP_CREF || condcode == OP_NCREF)  /* Group used test */
1377       {
1378       offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */
1379       condition = offset < offset_top && md->offset_vector[offset] >= 0;
1380
1381       /* If the numbered capture is unset, but the reference was by name,
1382       scan the table to see if the name refers to any other numbers, and test
1383       them. The condition is true if any one is set. This is tediously similar
1384       to the code above, but not close enough to try to amalgamate. */
1385
1386       if (!condition && condcode == OP_NCREF)
1387         {
1388         unsigned int refno = offset >> 1;
1389         pcre_uchar *slotA = md->name_table;
1390
1391         for (i = 0; i < md->name_count; i++)
1392           {
1393           if (GET2(slotA, 0) == refno) break;
1394           slotA += md->name_entry_size;
1395           }
1396
1397         /* Found a name for the number - there can be only one; duplicate names
1398         for different numbers are allowed, but not vice versa. First scan down
1399         for duplicates. */
1400
1401         if (i < md->name_count)
1402           {
1403           pcre_uchar *slotB = slotA;
1404           while (slotB > md->name_table)
1405             {
1406             slotB -= md->name_entry_size;
1407             if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1408               {
1409               offset = GET2(slotB, 0) << 1;
1410               condition = offset < offset_top &&
1411                 md->offset_vector[offset] >= 0;
1412               if (condition) break;
1413               }
1414             else break;
1415             }
1416
1417           /* Scan up for duplicates */
1418
1419           if (!condition)
1420             {
1421             slotB = slotA;
1422             for (i++; i < md->name_count; i++)
1423               {
1424               slotB += md->name_entry_size;
1425               if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1426                 {
1427                 offset = GET2(slotB, 0) << 1;
1428                 condition = offset < offset_top &&
1429                   md->offset_vector[offset] >= 0;
1430                 if (condition) break;
1431                 }
1432               else break;
1433               }
1434             }
1435           }
1436         }
1437
1438       /* Chose branch according to the condition */
1439
1440       ecode += condition? 1 + IMM2_SIZE : GET(ecode, 1);
1441       }
1442
1443     else if (condcode == OP_DEF)     /* DEFINE - always false */
1444       {
1445       condition = FALSE;
1446       ecode += GET(ecode, 1);
1447       }
1448
1449     /* The condition is an assertion. Call match() to evaluate it - setting
1450     md->match_function_type to MATCH_CONDASSERT causes it to stop at the end of
1451     an assertion. */
1452
1453     else
1454       {
1455       md->match_function_type = MATCH_CONDASSERT;
1456       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM3);
1457       if (rrc == MATCH_MATCH)
1458         {
1459         if (md->end_offset_top > offset_top)
1460           offset_top = md->end_offset_top;  /* Captures may have happened */
1461         condition = TRUE;
1462         ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
1463         while (*ecode == OP_ALT) ecode += GET(ecode, 1);
1464         }
1465
1466       /* PCRE doesn't allow the effect of (*THEN) to escape beyond an
1467       assertion; it is therefore treated as NOMATCH. */
1468
1469       else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1470         {
1471         RRETURN(rrc);         /* Need braces because of following else */
1472         }
1473       else
1474         {
1475         condition = FALSE;
1476         ecode += codelink;
1477         }
1478       }
1479
1480     /* We are now at the branch that is to be obeyed. As there is only one, can
1481     use tail recursion to avoid using another stack frame, except when there is
1482     unlimited repeat of a possibly empty group. In the latter case, a recursive
1483     call to match() is always required, unless the second alternative doesn't
1484     exist, in which case we can just plough on. Note that, for compatibility
1485     with Perl, the | in a conditional group is NOT treated as creating two
1486     alternatives. If a THEN is encountered in the branch, it propagates out to
1487     the enclosing alternative (unless nested in a deeper set of alternatives,
1488     of course). */
1489
1490     if (condition || *ecode == OP_ALT)
1491       {
1492       if (op != OP_SCOND)
1493         {
1494         ecode += 1 + LINK_SIZE;
1495         goto TAIL_RECURSE;
1496         }
1497
1498       md->match_function_type = MATCH_CBEGROUP;
1499       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM49);
1500       RRETURN(rrc);
1501       }
1502
1503      /* Condition false & no alternative; continue after the group. */
1504
1505     else
1506       {
1507       ecode += 1 + LINK_SIZE;
1508       }
1509     break;
1510
1511
1512     /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
1513     to close any currently open capturing brackets. */
1514
1515     case OP_CLOSE:
1516     number = GET2(ecode, 1);
1517     offset = number << 1;
1518
1519 #ifdef PCRE_DEBUG
1520       printf("end bracket %d at *ACCEPT", number);
1521       printf("\n");
1522 #endif
1523
1524     md->capture_last = number;
1525     if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1526       {
1527       md->offset_vector[offset] =
1528         md->offset_vector[md->offset_end - number];
1529       md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1530       if (offset_top <= offset) offset_top = offset + 2;
1531       }
1532     ecode += 1 + IMM2_SIZE;
1533     break;
1534
1535
1536     /* End of the pattern, either real or forced. */
1537
1538     case OP_END:
1539     case OP_ACCEPT:
1540     case OP_ASSERT_ACCEPT:
1541
1542     /* If we have matched an empty string, fail if not in an assertion and not
1543     in a recursion if either PCRE_NOTEMPTY is set, or if PCRE_NOTEMPTY_ATSTART
1544     is set and we have matched at the start of the subject. In both cases,
1545     backtracking will then try other alternatives, if any. */
1546
1547     if (eptr == mstart && op != OP_ASSERT_ACCEPT &&
1548          md->recursive == NULL &&
1549          (md->notempty ||
1550            (md->notempty_atstart &&
1551              mstart == md->start_subject + md->start_offset)))
1552       RRETURN(MATCH_NOMATCH);
1553
1554     /* Otherwise, we have a match. */
1555
1556     md->end_match_ptr = eptr;           /* Record where we ended */
1557     md->end_offset_top = offset_top;    /* and how many extracts were taken */
1558     md->start_match_ptr = mstart;       /* and the start (\K can modify) */
1559
1560     /* For some reason, the macros don't work properly if an expression is
1561     given as the argument to RRETURN when the heap is in use. */
1562
1563     rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;
1564     RRETURN(rrc);
1565
1566     /* Assertion brackets. Check the alternative branches in turn - the
1567     matching won't pass the KET for an assertion. If any one branch matches,
1568     the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
1569     start of each branch to move the current point backwards, so the code at
1570     this level is identical to the lookahead case. When the assertion is part
1571     of a condition, we want to return immediately afterwards. The caller of
1572     this incarnation of the match() function will have set MATCH_CONDASSERT in
1573     md->match_function type, and one of these opcodes will be the first opcode
1574     that is processed. We use a local variable that is preserved over calls to
1575     match() to remember this case. */
1576
1577     case OP_ASSERT:
1578     case OP_ASSERTBACK:
1579     save_mark = md->mark;
1580     if (md->match_function_type == MATCH_CONDASSERT)
1581       {
1582       condassert = TRUE;
1583       md->match_function_type = 0;
1584       }
1585     else condassert = FALSE;
1586
1587     do
1588       {
1589       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM4);
1590       if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1591         {
1592         mstart = md->start_match_ptr;   /* In case \K reset it */
1593         break;
1594         }
1595       md->mark = save_mark;
1596
1597       /* A COMMIT failure must fail the entire assertion, without trying any
1598       subsequent branches. */
1599
1600       if (rrc == MATCH_COMMIT) RRETURN(MATCH_NOMATCH);
1601
1602       /* PCRE does not allow THEN to escape beyond an assertion; it
1603       is treated as NOMATCH. */
1604
1605       if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1606       ecode += GET(ecode, 1);
1607       }
1608     while (*ecode == OP_ALT);
1609
1610     if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
1611
1612     /* If checking an assertion for a condition, return MATCH_MATCH. */
1613
1614     if (condassert) RRETURN(MATCH_MATCH);
1615
1616     /* Continue from after the assertion, updating the offsets high water
1617     mark, since extracts may have been taken during the assertion. */
1618
1619     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1620     ecode += 1 + LINK_SIZE;
1621     offset_top = md->end_offset_top;
1622     continue;
1623
1624     /* Negative assertion: all branches must fail to match. Encountering SKIP,
1625     PRUNE, or COMMIT means we must assume failure without checking subsequent
1626     branches. */
1627
1628     case OP_ASSERT_NOT:
1629     case OP_ASSERTBACK_NOT:
1630     save_mark = md->mark;
1631     if (md->match_function_type == MATCH_CONDASSERT)
1632       {
1633       condassert = TRUE;
1634       md->match_function_type = 0;
1635       }
1636     else condassert = FALSE;
1637
1638     do
1639       {
1640       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5);
1641       md->mark = save_mark;
1642       if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) RRETURN(MATCH_NOMATCH);
1643       if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
1644         {
1645         do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1646         break;
1647         }
1648
1649       /* PCRE does not allow THEN to escape beyond an assertion; it is treated
1650       as NOMATCH. */
1651
1652       if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1653       ecode += GET(ecode,1);
1654       }
1655     while (*ecode == OP_ALT);
1656
1657     if (condassert) RRETURN(MATCH_MATCH);  /* Condition assertion */
1658
1659     ecode += 1 + LINK_SIZE;
1660     continue;
1661
1662     /* Move the subject pointer back. This occurs only at the start of
1663     each branch of a lookbehind assertion. If we are too close to the start to
1664     move back, this match function fails. When working with UTF-8 we move
1665     back a number of characters, not bytes. */
1666
1667     case OP_REVERSE:
1668 #ifdef SUPPORT_UTF
1669     if (utf)
1670       {
1671       i = GET(ecode, 1);
1672       while (i-- > 0)
1673         {
1674         eptr--;
1675         if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1676         BACKCHAR(eptr);
1677         }
1678       }
1679     else
1680 #endif
1681
1682     /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
1683
1684       {
1685       eptr -= GET(ecode, 1);
1686       if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1687       }
1688
1689     /* Save the earliest consulted character, then skip to next op code */
1690
1691     if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
1692     ecode += 1 + LINK_SIZE;
1693     break;
1694
1695     /* The callout item calls an external function, if one is provided, passing
1696     details of the match so far. This is mainly for debugging, though the
1697     function is able to force a failure. */
1698
1699     case OP_CALLOUT:
1700     if (PUBL(callout) != NULL)
1701       {
1702       PUBL(callout_block) cb;
1703       cb.version          = 2;   /* Version 1 of the callout block */
1704       cb.callout_number   = ecode[1];
1705       cb.offset_vector    = md->offset_vector;
1706 #if defined COMPILE_PCRE8
1707       cb.subject          = (PCRE_SPTR)md->start_subject;
1708 #elif defined COMPILE_PCRE16
1709       cb.subject          = (PCRE_SPTR16)md->start_subject;
1710 #elif defined COMPILE_PCRE32
1711       cb.subject          = (PCRE_SPTR32)md->start_subject;
1712 #endif
1713       cb.subject_length   = (int)(md->end_subject - md->start_subject);
1714       cb.start_match      = (int)(mstart - md->start_subject);
1715       cb.current_position = (int)(eptr - md->start_subject);
1716       cb.pattern_position = GET(ecode, 2);
1717       cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1718       cb.capture_top      = offset_top/2;
1719       cb.capture_last     = md->capture_last;
1720       cb.callout_data     = md->callout_data;
1721       cb.mark             = md->nomatch_mark;
1722       if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1723       if (rrc < 0) RRETURN(rrc);
1724       }
1725     ecode += 2 + 2*LINK_SIZE;
1726     break;
1727
1728     /* Recursion either matches the current regex, or some subexpression. The
1729     offset data is the offset to the starting bracket from the start of the
1730     whole pattern. (This is so that it works from duplicated subpatterns.)
1731
1732     The state of the capturing groups is preserved over recursion, and
1733     re-instated afterwards. We don't know how many are started and not yet
1734     finished (offset_top records the completed total) so we just have to save
1735     all the potential data. There may be up to 65535 such values, which is too
1736     large to put on the stack, but using malloc for small numbers seems
1737     expensive. As a compromise, the stack is used when there are no more than
1738     REC_STACK_SAVE_MAX values to store; otherwise malloc is used.
1739
1740     There are also other values that have to be saved. We use a chained
1741     sequence of blocks that actually live on the stack. Thanks to Robin Houston
1742     for the original version of this logic. It has, however, been hacked around
1743     a lot, so he is not to blame for the current way it works. */
1744
1745     case OP_RECURSE:
1746       {
1747       recursion_info *ri;
1748       unsigned int recno;
1749
1750       callpat = md->start_code + GET(ecode, 1);
1751       recno = (callpat == md->start_code)? 0 :
1752         GET2(callpat, 1 + LINK_SIZE);
1753
1754       /* Check for repeating a recursion without advancing the subject pointer.
1755       This should catch convoluted mutual recursions. (Some simple cases are
1756       caught at compile time.) */
1757
1758       for (ri = md->recursive; ri != NULL; ri = ri->prevrec)
1759         if (recno == ri->group_num && eptr == ri->subject_position)
1760           RRETURN(PCRE_ERROR_RECURSELOOP);
1761
1762       /* Add to "recursing stack" */
1763
1764       new_recursive.group_num = recno;
1765       new_recursive.subject_position = eptr;
1766       new_recursive.prevrec = md->recursive;
1767       md->recursive = &new_recursive;
1768
1769       /* Where to continue from afterwards */
1770
1771       ecode += 1 + LINK_SIZE;
1772
1773       /* Now save the offset data */
1774
1775       new_recursive.saved_max = md->offset_end;
1776       if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
1777         new_recursive.offset_save = stacksave;
1778       else
1779         {
1780         new_recursive.offset_save =
1781           (int *)(PUBL(malloc))(new_recursive.saved_max * sizeof(int));
1782         if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
1783         }
1784       memcpy(new_recursive.offset_save, md->offset_vector,
1785             new_recursive.saved_max * sizeof(int));
1786
1787       /* OK, now we can do the recursion. After processing each alternative,
1788       restore the offset data. If there were nested recursions, md->recursive
1789       might be changed, so reset it before looping. */
1790
1791       DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
1792       cbegroup = (*callpat >= OP_SBRA);
1793       do
1794         {
1795         if (cbegroup) md->match_function_type = MATCH_CBEGROUP;
1796         RMATCH(eptr, callpat + PRIV(OP_lengths)[*callpat], offset_top,
1797           md, eptrb, RM6);
1798         memcpy(md->offset_vector, new_recursive.offset_save,
1799             new_recursive.saved_max * sizeof(int));
1800         md->recursive = new_recursive.prevrec;
1801         if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1802           {
1803           DPRINTF(("Recursion matched\n"));
1804           if (new_recursive.offset_save != stacksave)
1805             (PUBL(free))(new_recursive.offset_save);
1806
1807           /* Set where we got to in the subject, and reset the start in case
1808           it was changed by \K. This *is* propagated back out of a recursion,
1809           for Perl compatibility. */
1810
1811           eptr = md->end_match_ptr;
1812           mstart = md->start_match_ptr;
1813           goto RECURSION_MATCHED;        /* Exit loop; end processing */
1814           }
1815
1816         /* PCRE does not allow THEN or COMMIT to escape beyond a recursion; it
1817         is treated as NOMATCH. */
1818
1819         else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN &&
1820                  rrc != MATCH_COMMIT)
1821           {
1822           DPRINTF(("Recursion gave error %d\n", rrc));
1823           if (new_recursive.offset_save != stacksave)
1824             (PUBL(free))(new_recursive.offset_save);
1825           RRETURN(rrc);
1826           }
1827
1828         md->recursive = &new_recursive;
1829         callpat += GET(callpat, 1);
1830         }
1831       while (*callpat == OP_ALT);
1832
1833       DPRINTF(("Recursion didn't match\n"));
1834       md->recursive = new_recursive.prevrec;
1835       if (new_recursive.offset_save != stacksave)
1836         (PUBL(free))(new_recursive.offset_save);
1837       RRETURN(MATCH_NOMATCH);
1838       }
1839
1840     RECURSION_MATCHED:
1841     break;
1842
1843     /* An alternation is the end of a branch; scan along to find the end of the
1844     bracketed group and go to there. */
1845
1846     case OP_ALT:
1847     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1848     break;
1849
1850     /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1851     indicating that it may occur zero times. It may repeat infinitely, or not
1852     at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1853     with fixed upper repeat limits are compiled as a number of copies, with the
1854     optional ones preceded by BRAZERO or BRAMINZERO. */
1855
1856     case OP_BRAZERO:
1857     next = ecode + 1;
1858     RMATCH(eptr, next, offset_top, md, eptrb, RM10);
1859     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1860     do next += GET(next, 1); while (*next == OP_ALT);
1861     ecode = next + 1 + LINK_SIZE;
1862     break;
1863
1864     case OP_BRAMINZERO:
1865     next = ecode + 1;
1866     do next += GET(next, 1); while (*next == OP_ALT);
1867     RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, eptrb, RM11);
1868     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1869     ecode++;
1870     break;
1871
1872     case OP_SKIPZERO:
1873     next = ecode+1;
1874     do next += GET(next,1); while (*next == OP_ALT);
1875     ecode = next + 1 + LINK_SIZE;
1876     break;
1877
1878     /* BRAPOSZERO occurs before a possessive bracket group. Don't do anything
1879     here; just jump to the group, with allow_zero set TRUE. */
1880
1881     case OP_BRAPOSZERO:
1882     op = *(++ecode);
1883     allow_zero = TRUE;
1884     if (op == OP_CBRAPOS || op == OP_SCBRAPOS) goto POSSESSIVE_CAPTURE;
1885       goto POSSESSIVE_NON_CAPTURE;
1886
1887     /* End of a group, repeated or non-repeating. */
1888
1889     case OP_KET:
1890     case OP_KETRMIN:
1891     case OP_KETRMAX:
1892     case OP_KETRPOS:
1893     prev = ecode - GET(ecode, 1);
1894
1895     /* If this was a group that remembered the subject start, in order to break
1896     infinite repeats of empty string matches, retrieve the subject start from
1897     the chain. Otherwise, set it NULL. */
1898
1899     if (*prev >= OP_SBRA || *prev == OP_ONCE)
1900       {
1901       saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */
1902       eptrb = eptrb->epb_prev;              /* Backup to previous group */
1903       }
1904     else saved_eptr = NULL;
1905
1906     /* If we are at the end of an assertion group or a non-capturing atomic
1907     group, stop matching and return MATCH_MATCH, but record the current high
1908     water mark for use by positive assertions. We also need to record the match
1909     start in case it was changed by \K. */
1910
1911     if ((*prev >= OP_ASSERT && *prev <= OP_ASSERTBACK_NOT) ||
1912          *prev == OP_ONCE_NC)
1913       {
1914       md->end_match_ptr = eptr;      /* For ONCE_NC */
1915       md->end_offset_top = offset_top;
1916       md->start_match_ptr = mstart;
1917       RRETURN(MATCH_MATCH);         /* Sets md->mark */
1918       }
1919
1920     /* For capturing groups we have to check the group number back at the start
1921     and if necessary complete handling an extraction by setting the offsets and
1922     bumping the high water mark. Whole-pattern recursion is coded as a recurse
1923     into group 0, so it won't be picked up here. Instead, we catch it when the
1924     OP_END is reached. Other recursion is handled here. We just have to record
1925     the current subject position and start match pointer and give a MATCH
1926     return. */
1927
1928     if (*prev == OP_CBRA || *prev == OP_SCBRA ||
1929         *prev == OP_CBRAPOS || *prev == OP_SCBRAPOS)
1930       {
1931       number = GET2(prev, 1+LINK_SIZE);
1932       offset = number << 1;
1933
1934 #ifdef PCRE_DEBUG
1935       printf("end bracket %d", number);
1936       printf("\n");
1937 #endif
1938
1939       /* Handle a recursively called group. */
1940
1941       if (md->recursive != NULL && md->recursive->group_num == number)
1942         {
1943         md->end_match_ptr = eptr;
1944         md->start_match_ptr = mstart;
1945         RRETURN(MATCH_MATCH);
1946         }
1947
1948       /* Deal with capturing */
1949
1950       md->capture_last = number;
1951       if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1952         {
1953         /* If offset is greater than offset_top, it means that we are
1954         "skipping" a capturing group, and that group's offsets must be marked
1955         unset. In earlier versions of PCRE, all the offsets were unset at the
1956         start of matching, but this doesn't work because atomic groups and
1957         assertions can cause a value to be set that should later be unset.
1958         Example: matching /(?>(a))b|(a)c/ against "ac". This sets group 1 as
1959         part of the atomic group, but this is not on the final matching path,
1960         so must be unset when 2 is set. (If there is no group 2, there is no
1961         problem, because offset_top will then be 2, indicating no capture.) */
1962
1963         if (offset > offset_top)
1964           {
1965           register int *iptr = md->offset_vector + offset_top;
1966           register int *iend = md->offset_vector + offset;
1967           while (iptr < iend) *iptr++ = -1;
1968           }
1969
1970         /* Now make the extraction */
1971
1972         md->offset_vector[offset] =
1973           md->offset_vector[md->offset_end - number];
1974         md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1975         if (offset_top <= offset) offset_top = offset + 2;
1976         }
1977       }
1978
1979     /* For an ordinary non-repeating ket, just continue at this level. This
1980     also happens for a repeating ket if no characters were matched in the
1981     group. This is the forcible breaking of infinite loops as implemented in
1982     Perl 5.005. For a non-repeating atomic group that includes captures,
1983     establish a backup point by processing the rest of the pattern at a lower
1984     level. If this results in a NOMATCH return, pass MATCH_ONCE back to the
1985     original OP_ONCE level, thereby bypassing intermediate backup points, but
1986     resetting any captures that happened along the way. */
1987
1988     if (*ecode == OP_KET || eptr == saved_eptr)
1989       {
1990       if (*prev == OP_ONCE)
1991         {
1992         RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM12);
1993         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1994         md->once_target = prev;  /* Level at which to change to MATCH_NOMATCH */
1995         RRETURN(MATCH_ONCE);
1996         }
1997       ecode += 1 + LINK_SIZE;    /* Carry on at this level */
1998       break;
1999       }
2000
2001     /* OP_KETRPOS is a possessive repeating ket. Remember the current position,
2002     and return the MATCH_KETRPOS. This makes it possible to do the repeats one
2003     at a time from the outer level, thus saving stack. */
2004
2005     if (*ecode == OP_KETRPOS)
2006       {
2007       md->end_match_ptr = eptr;
2008       md->end_offset_top = offset_top;
2009       RRETURN(MATCH_KETRPOS);
2010       }
2011
2012     /* The normal repeating kets try the rest of the pattern or restart from
2013     the preceding bracket, in the appropriate order. In the second case, we can
2014     use tail recursion to avoid using another stack frame, unless we have an
2015     an atomic group or an unlimited repeat of a group that can match an empty
2016     string. */
2017
2018     if (*ecode == OP_KETRMIN)
2019       {
2020       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM7);
2021       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2022       if (*prev == OP_ONCE)
2023         {
2024         RMATCH(eptr, prev, offset_top, md, eptrb, RM8);
2025         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2026         md->once_target = prev;  /* Level at which to change to MATCH_NOMATCH */
2027         RRETURN(MATCH_ONCE);
2028         }
2029       if (*prev >= OP_SBRA)    /* Could match an empty string */
2030         {
2031         RMATCH(eptr, prev, offset_top, md, eptrb, RM50);
2032         RRETURN(rrc);
2033         }
2034       ecode = prev;
2035       goto TAIL_RECURSE;
2036       }
2037     else  /* OP_KETRMAX */
2038       {
2039       RMATCH(eptr, prev, offset_top, md, eptrb, RM13);
2040       if (rrc == MATCH_ONCE && md->once_target == prev) rrc = MATCH_NOMATCH;
2041       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2042       if (*prev == OP_ONCE)
2043         {
2044         RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM9);
2045         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2046         md->once_target = prev;
2047         RRETURN(MATCH_ONCE);
2048         }
2049       ecode += 1 + LINK_SIZE;
2050       goto TAIL_RECURSE;
2051       }
2052     /* Control never gets here */
2053
2054     /* Not multiline mode: start of subject assertion, unless notbol. */
2055
2056     case OP_CIRC:
2057     if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
2058
2059     /* Start of subject assertion */
2060
2061     case OP_SOD:
2062     if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);
2063     ecode++;
2064     break;
2065
2066     /* Multiline mode: start of subject unless notbol, or after any newline. */
2067
2068     case OP_CIRCM:
2069     if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
2070     if (eptr != md->start_subject &&
2071         (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
2072       RRETURN(MATCH_NOMATCH);
2073     ecode++;
2074     break;
2075
2076     /* Start of match assertion */
2077
2078     case OP_SOM:
2079     if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);
2080     ecode++;
2081     break;
2082
2083     /* Reset the start of match point */
2084
2085     case OP_SET_SOM:
2086     mstart = eptr;
2087     ecode++;
2088     break;
2089
2090     /* Multiline mode: assert before any newline, or before end of subject
2091     unless noteol is set. */
2092
2093     case OP_DOLLM:
2094     if (eptr < md->end_subject)
2095       {
2096       if (!IS_NEWLINE(eptr))
2097         {
2098         if (md->partial != 0 &&
2099             eptr + 1 >= md->end_subject &&
2100             NLBLOCK->nltype == NLTYPE_FIXED &&
2101             NLBLOCK->nllen == 2 &&
2102             RAWUCHARTEST(eptr) == NLBLOCK->nl[0])
2103           {
2104           md->hitend = TRUE;
2105           if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2106           }
2107         RRETURN(MATCH_NOMATCH);
2108         }
2109       }
2110     else
2111       {
2112       if (md->noteol) RRETURN(MATCH_NOMATCH);
2113       SCHECK_PARTIAL();
2114       }
2115     ecode++;
2116     break;
2117
2118     /* Not multiline mode: assert before a terminating newline or before end of
2119     subject unless noteol is set. */
2120
2121     case OP_DOLL:
2122     if (md->noteol) RRETURN(MATCH_NOMATCH);
2123     if (!md->endonly) goto ASSERT_NL_OR_EOS;
2124
2125     /* ... else fall through for endonly */
2126
2127     /* End of subject assertion (\z) */
2128
2129     case OP_EOD:
2130     if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);
2131     SCHECK_PARTIAL();
2132     ecode++;
2133     break;
2134
2135     /* End of subject or ending \n assertion (\Z) */
2136
2137     case OP_EODN:
2138     ASSERT_NL_OR_EOS:
2139     if (eptr < md->end_subject &&
2140         (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
2141       {
2142       if (md->partial != 0 &&
2143           eptr + 1 >= md->end_subject &&
2144           NLBLOCK->nltype == NLTYPE_FIXED &&
2145           NLBLOCK->nllen == 2 &&
2146           RAWUCHARTEST(eptr) == NLBLOCK->nl[0])
2147         {
2148         md->hitend = TRUE;
2149         if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2150         }
2151       RRETURN(MATCH_NOMATCH);
2152       }
2153
2154     /* Either at end of string or \n before end. */
2155
2156     SCHECK_PARTIAL();
2157     ecode++;
2158     break;
2159
2160     /* Word boundary assertions */
2161
2162     case OP_NOT_WORD_BOUNDARY:
2163     case OP_WORD_BOUNDARY:
2164       {
2165
2166       /* Find out if the previous and current characters are "word" characters.
2167       It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
2168       be "non-word" characters. Remember the earliest consulted character for
2169       partial matching. */
2170
2171 #ifdef SUPPORT_UTF
2172       if (utf)
2173         {
2174         /* Get status of previous character */
2175
2176         if (eptr == md->start_subject) prev_is_word = FALSE; else
2177           {
2178           PCRE_PUCHAR lastptr = eptr - 1;
2179           BACKCHAR(lastptr);
2180           if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
2181           GETCHAR(c, lastptr);
2182 #ifdef SUPPORT_UCP
2183           if (md->use_ucp)
2184             {
2185             if (c == '_') prev_is_word = TRUE; else
2186               {
2187               int cat = UCD_CATEGORY(c);
2188               prev_is_word = (cat == ucp_L || cat == ucp_N);
2189               }
2190             }
2191           else
2192 #endif
2193           prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
2194           }
2195
2196         /* Get status of next character */
2197
2198         if (eptr >= md->end_subject)
2199           {
2200           SCHECK_PARTIAL();
2201           cur_is_word = FALSE;
2202           }
2203         else
2204           {
2205           GETCHAR(c, eptr);
2206 #ifdef SUPPORT_UCP
2207           if (md->use_ucp)
2208             {
2209             if (c == '_') cur_is_word = TRUE; else
2210               {
2211               int cat = UCD_CATEGORY(c);
2212               cur_is_word = (cat == ucp_L || cat == ucp_N);
2213               }
2214             }
2215           else
2216 #endif
2217           cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
2218           }
2219         }
2220       else
2221 #endif
2222
2223       /* Not in UTF-8 mode, but we may still have PCRE_UCP set, and for
2224       consistency with the behaviour of \w we do use it in this case. */
2225
2226         {
2227         /* Get status of previous character */
2228
2229         if (eptr == md->start_subject) prev_is_word = FALSE; else
2230           {
2231           if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
2232 #ifdef SUPPORT_UCP
2233           if (md->use_ucp)
2234             {
2235             c = eptr[-1];
2236             if (c == '_') prev_is_word = TRUE; else
2237               {
2238               int cat = UCD_CATEGORY(c);
2239               prev_is_word = (cat == ucp_L || cat == ucp_N);
2240               }
2241             }
2242           else
2243 #endif
2244           prev_is_word = MAX_255(eptr[-1])
2245             && ((md->ctypes[eptr[-1]] & ctype_word) != 0);
2246           }
2247
2248         /* Get status of next character */
2249
2250         if (eptr >= md->end_subject)
2251           {
2252           SCHECK_PARTIAL();
2253           cur_is_word = FALSE;
2254           }
2255         else
2256 #ifdef SUPPORT_UCP
2257         if (md->use_ucp)
2258           {
2259           c = *eptr;
2260           if (c == '_') cur_is_word = TRUE; else
2261             {
2262             int cat = UCD_CATEGORY(c);
2263             cur_is_word = (cat == ucp_L || cat == ucp_N);
2264             }
2265           }
2266         else
2267 #endif
2268         cur_is_word = MAX_255(*eptr)
2269           && ((md->ctypes[*eptr] & ctype_word) != 0);
2270         }
2271
2272       /* Now see if the situation is what we want */
2273
2274       if ((*ecode++ == OP_WORD_BOUNDARY)?
2275            cur_is_word == prev_is_word : cur_is_word != prev_is_word)
2276         RRETURN(MATCH_NOMATCH);
2277       }
2278     break;
2279
2280     /* Match any single character type except newline; have to take care with
2281     CRLF newlines and partial matching. */
2282
2283     case OP_ANY:
2284     if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
2285     if (md->partial != 0 &&
2286         eptr + 1 >= md->end_subject &&
2287         NLBLOCK->nltype == NLTYPE_FIXED &&
2288         NLBLOCK->nllen == 2 &&
2289         RAWUCHARTEST(eptr) == NLBLOCK->nl[0])
2290       {
2291       md->hitend = TRUE;
2292       if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2293       }
2294
2295     /* Fall through */
2296
2297     /* Match any single character whatsoever. */
2298
2299     case OP_ALLANY:
2300     if (eptr >= md->end_subject)   /* DO NOT merge the eptr++ here; it must */
2301       {                            /* not be updated before SCHECK_PARTIAL. */
2302       SCHECK_PARTIAL();
2303       RRETURN(MATCH_NOMATCH);
2304       }
2305     eptr++;
2306 #ifdef SUPPORT_UTF
2307     if (utf) ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
2308 #endif
2309     ecode++;
2310     break;
2311
2312     /* Match a single byte, even in UTF-8 mode. This opcode really does match
2313     any byte, even newline, independent of the setting of PCRE_DOTALL. */
2314
2315     case OP_ANYBYTE:
2316     if (eptr >= md->end_subject)   /* DO NOT merge the eptr++ here; it must */
2317       {                            /* not be updated before SCHECK_PARTIAL. */
2318       SCHECK_PARTIAL();
2319       RRETURN(MATCH_NOMATCH);
2320       }
2321     eptr++;
2322     ecode++;
2323     break;
2324
2325     case OP_NOT_DIGIT:
2326     if (eptr >= md->end_subject)
2327       {
2328       SCHECK_PARTIAL();
2329       RRETURN(MATCH_NOMATCH);
2330       }
2331     GETCHARINCTEST(c, eptr);
2332     if (
2333 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2334        c < 256 &&
2335 #endif
2336        (md->ctypes[c] & ctype_digit) != 0
2337        )
2338       RRETURN(MATCH_NOMATCH);
2339     ecode++;
2340     break;
2341
2342     case OP_DIGIT:
2343     if (eptr >= md->end_subject)
2344       {
2345       SCHECK_PARTIAL();
2346       RRETURN(MATCH_NOMATCH);
2347       }
2348     GETCHARINCTEST(c, eptr);
2349     if (
2350 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2351        c > 255 ||
2352 #endif
2353        (md->ctypes[c] & ctype_digit) == 0
2354        )
2355       RRETURN(MATCH_NOMATCH);
2356     ecode++;
2357     break;
2358
2359     case OP_NOT_WHITESPACE:
2360     if (eptr >= md->end_subject)
2361       {
2362       SCHECK_PARTIAL();
2363       RRETURN(MATCH_NOMATCH);
2364       }
2365     GETCHARINCTEST(c, eptr);
2366     if (
2367 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2368        c < 256 &&
2369 #endif
2370        (md->ctypes[c] & ctype_space) != 0
2371        )
2372       RRETURN(MATCH_NOMATCH);
2373     ecode++;
2374     break;
2375
2376     case OP_WHITESPACE:
2377     if (eptr >= md->end_subject)
2378       {
2379       SCHECK_PARTIAL();
2380       RRETURN(MATCH_NOMATCH);
2381       }
2382     GETCHARINCTEST(c, eptr);
2383     if (
2384 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2385        c > 255 ||
2386 #endif
2387        (md->ctypes[c] & ctype_space) == 0
2388        )
2389       RRETURN(MATCH_NOMATCH);
2390     ecode++;
2391     break;
2392
2393     case OP_NOT_WORDCHAR:
2394     if (eptr >= md->end_subject)
2395       {
2396       SCHECK_PARTIAL();
2397       RRETURN(MATCH_NOMATCH);
2398       }
2399     GETCHARINCTEST(c, eptr);
2400     if (
2401 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2402        c < 256 &&
2403 #endif
2404        (md->ctypes[c] & ctype_word) != 0
2405        )
2406       RRETURN(MATCH_NOMATCH);
2407     ecode++;
2408     break;
2409
2410     case OP_WORDCHAR:
2411     if (eptr >= md->end_subject)
2412       {
2413       SCHECK_PARTIAL();
2414       RRETURN(MATCH_NOMATCH);
2415       }
2416     GETCHARINCTEST(c, eptr);
2417     if (
2418 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2419        c > 255 ||
2420 #endif
2421        (md->ctypes[c] & ctype_word) == 0
2422        )
2423       RRETURN(MATCH_NOMATCH);
2424     ecode++;
2425     break;
2426
2427     case OP_ANYNL:
2428     if (eptr >= md->end_subject)
2429       {
2430       SCHECK_PARTIAL();
2431       RRETURN(MATCH_NOMATCH);
2432       }
2433     GETCHARINCTEST(c, eptr);
2434     switch(c)
2435       {
2436       default: RRETURN(MATCH_NOMATCH);
2437
2438       case CHAR_CR:
2439       if (eptr >= md->end_subject)
2440         {
2441         SCHECK_PARTIAL();
2442         }
2443       else if (RAWUCHARTEST(eptr) == CHAR_LF) eptr++;
2444       break;
2445
2446       case CHAR_LF:
2447       break;
2448
2449       case CHAR_VT:
2450       case CHAR_FF:
2451       case CHAR_NEL:
2452 #ifndef EBCDIC
2453       case 0x2028:
2454       case 0x2029:
2455 #endif  /* Not EBCDIC */
2456       if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
2457       break;
2458       }
2459     ecode++;
2460     break;
2461
2462     case OP_NOT_HSPACE:
2463     if (eptr >= md->end_subject)
2464       {
2465       SCHECK_PARTIAL();
2466       RRETURN(MATCH_NOMATCH);
2467       }
2468     GETCHARINCTEST(c, eptr);
2469     switch(c)
2470       {
2471       HSPACE_CASES: RRETURN(MATCH_NOMATCH);  /* Byte and multibyte cases */
2472       default: break;
2473       }
2474     ecode++;
2475     break;
2476
2477     case OP_HSPACE:
2478     if (eptr >= md->end_subject)
2479       {
2480       SCHECK_PARTIAL();
2481       RRETURN(MATCH_NOMATCH);
2482       }
2483     GETCHARINCTEST(c, eptr);
2484     switch(c)
2485       {
2486       HSPACE_CASES: break;  /* Byte and multibyte cases */
2487       default: RRETURN(MATCH_NOMATCH);
2488       }
2489     ecode++;
2490     break;
2491
2492     case OP_NOT_VSPACE:
2493     if (eptr >= md->end_subject)
2494       {
2495       SCHECK_PARTIAL();
2496       RRETURN(MATCH_NOMATCH);
2497       }
2498     GETCHARINCTEST(c, eptr);
2499     switch(c)
2500       {
2501       VSPACE_CASES: RRETURN(MATCH_NOMATCH);
2502       default: break;
2503       }
2504     ecode++;
2505     break;
2506
2507     case OP_VSPACE:
2508     if (eptr >= md->end_subject)
2509       {
2510       SCHECK_PARTIAL();
2511       RRETURN(MATCH_NOMATCH);
2512       }
2513     GETCHARINCTEST(c, eptr);
2514     switch(c)
2515       {
2516       VSPACE_CASES: break;
2517       default: RRETURN(MATCH_NOMATCH);
2518       }
2519     ecode++;
2520     break;
2521
2522 #ifdef SUPPORT_UCP
2523     /* Check the next character by Unicode property. We will get here only
2524     if the support is in the binary; otherwise a compile-time error occurs. */
2525
2526     case OP_PROP:
2527     case OP_NOTPROP:
2528     if (eptr >= md->end_subject)
2529       {
2530       SCHECK_PARTIAL();
2531       RRETURN(MATCH_NOMATCH);
2532       }
2533     GETCHARINCTEST(c, eptr);
2534       {
2535       const pcre_uint32 *cp;
2536       const ucd_record *prop = GET_UCD(c);
2537
2538       switch(ecode[1])
2539         {
2540         case PT_ANY:
2541         if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
2542         break;
2543
2544         case PT_LAMP:
2545         if ((prop->chartype == ucp_Lu ||
2546              prop->chartype == ucp_Ll ||
2547              prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
2548           RRETURN(MATCH_NOMATCH);
2549         break;
2550
2551         case PT_GC:
2552         if ((ecode[2] != PRIV(ucp_gentype)[prop->chartype]) == (op == OP_PROP))
2553           RRETURN(MATCH_NOMATCH);
2554         break;
2555
2556         case PT_PC:
2557         if ((ecode[2] != prop->chartype) == (op == OP_PROP))
2558           RRETURN(MATCH_NOMATCH);
2559         break;
2560
2561         case PT_SC:
2562         if ((ecode[2] != prop->script) == (op == OP_PROP))
2563           RRETURN(MATCH_NOMATCH);
2564         break;
2565
2566         /* These are specials */
2567
2568         case PT_ALNUM:
2569         if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
2570              PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
2571           RRETURN(MATCH_NOMATCH);
2572         break;
2573
2574         case PT_SPACE:    /* Perl space */
2575         if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
2576              c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
2577                == (op == OP_NOTPROP))
2578           RRETURN(MATCH_NOMATCH);
2579         break;
2580
2581         case PT_PXSPACE:  /* POSIX space */
2582         if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
2583              c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
2584              c == CHAR_FF || c == CHAR_CR)
2585                == (op == OP_NOTPROP))
2586           RRETURN(MATCH_NOMATCH);
2587         break;
2588
2589         case PT_WORD:
2590         if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
2591              PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
2592              c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
2593           RRETURN(MATCH_NOMATCH);
2594         break;
2595
2596         case PT_CLIST:
2597         cp = PRIV(ucd_caseless_sets) + ecode[2];
2598         for (;;)
2599           {
2600           if (c < *cp)
2601             { if (op == OP_PROP) { RRETURN(MATCH_NOMATCH); } else break; }
2602           if (c == *cp++)
2603             { if (op == OP_PROP) break; else { RRETURN(MATCH_NOMATCH); } }
2604           }
2605         break;
2606
2607         /* This should never occur */
2608
2609         default:
2610         RRETURN(PCRE_ERROR_INTERNAL);
2611         }
2612
2613       ecode += 3;
2614       }
2615     break;
2616
2617     /* Match an extended Unicode sequence. We will get here only if the support
2618     is in the binary; otherwise a compile-time error occurs. */
2619
2620     case OP_EXTUNI:
2621     if (eptr >= md->end_subject)
2622       {
2623       SCHECK_PARTIAL();
2624       RRETURN(MATCH_NOMATCH);
2625       }
2626     else
2627       {
2628       int lgb, rgb;
2629       GETCHARINCTEST(c, eptr);
2630       lgb = UCD_GRAPHBREAK(c);
2631       while (eptr < md->end_subject)
2632         {
2633         int len = 1;
2634         if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
2635         rgb = UCD_GRAPHBREAK(c);
2636         if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
2637         lgb = rgb;
2638         eptr += len;
2639         }
2640       }
2641     CHECK_PARTIAL();
2642     ecode++;
2643     break;
2644 #endif  /* SUPPORT_UCP */
2645
2646
2647     /* Match a back reference, possibly repeatedly. Look past the end of the
2648     item to see if there is repeat information following. The code is similar
2649     to that for character classes, but repeated for efficiency. Then obey
2650     similar code to character type repeats - written out again for speed.
2651     However, if the referenced string is the empty string, always treat
2652     it as matched, any number of times (otherwise there could be infinite
2653     loops). */
2654
2655     case OP_REF:
2656     case OP_REFI:
2657     caseless = op == OP_REFI;
2658     offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
2659     ecode += 1 + IMM2_SIZE;
2660
2661     /* If the reference is unset, there are two possibilities:
2662
2663     (a) In the default, Perl-compatible state, set the length negative;
2664     this ensures that every attempt at a match fails. We can't just fail
2665     here, because of the possibility of quantifiers with zero minima.
2666
2667     (b) If the JavaScript compatibility flag is set, set the length to zero
2668     so that the back reference matches an empty string.
2669
2670     Otherwise, set the length to the length of what was matched by the
2671     referenced subpattern. */
2672
2673     if (offset >= offset_top || md->offset_vector[offset] < 0)
2674       length = (md->jscript_compat)? 0 : -1;
2675     else
2676       length = md->offset_vector[offset+1] - md->offset_vector[offset];
2677
2678     /* Set up for repetition, or handle the non-repeated case */
2679
2680     switch (*ecode)
2681       {
2682       case OP_CRSTAR:
2683       case OP_CRMINSTAR:
2684       case OP_CRPLUS:
2685       case OP_CRMINPLUS:
2686       case OP_CRQUERY:
2687       case OP_CRMINQUERY:
2688       c = *ecode++ - OP_CRSTAR;
2689       minimize = (c & 1) != 0;
2690       min = rep_min[c];                 /* Pick up values from tables; */
2691       max = rep_max[c];                 /* zero for max => infinity */
2692       if (max == 0) max = INT_MAX;
2693       break;
2694
2695       case OP_CRRANGE:
2696       case OP_CRMINRANGE:
2697       minimize = (*ecode == OP_CRMINRANGE);
2698       min = GET2(ecode, 1);
2699       max = GET2(ecode, 1 + IMM2_SIZE);
2700       if (max == 0) max = INT_MAX;
2701       ecode += 1 + 2 * IMM2_SIZE;
2702       break;
2703
2704       default:               /* No repeat follows */
2705       if ((length = match_ref(offset, eptr, length, md, caseless)) < 0)
2706         {
2707         if (length == -2) eptr = md->end_subject;   /* Partial match */
2708         CHECK_PARTIAL();
2709         RRETURN(MATCH_NOMATCH);
2710         }
2711       eptr += length;
2712       continue;              /* With the main loop */
2713       }
2714
2715     /* Handle repeated back references. If the length of the reference is
2716     zero, just continue with the main loop. If the length is negative, it
2717     means the reference is unset in non-Java-compatible mode. If the minimum is
2718     zero, we can continue at the same level without recursion. For any other
2719     minimum, carrying on will result in NOMATCH. */
2720
2721     if (length == 0) continue;
2722     if (length < 0 && min == 0) continue;
2723
2724     /* First, ensure the minimum number of matches are present. We get back
2725     the length of the reference string explicitly rather than passing the
2726     address of eptr, so that eptr can be a register variable. */
2727
2728     for (i = 1; i <= min; i++)
2729       {
2730       int slength;
2731       if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2732         {
2733         if (slength == -2) eptr = md->end_subject;   /* Partial match */
2734         CHECK_PARTIAL();
2735         RRETURN(MATCH_NOMATCH);
2736         }
2737       eptr += slength;
2738       }
2739
2740     /* If min = max, continue at the same level without recursion.
2741     They are not both allowed to be zero. */
2742
2743     if (min == max) continue;
2744
2745     /* If minimizing, keep trying and advancing the pointer */
2746
2747     if (minimize)
2748       {
2749       for (fi = min;; fi++)
2750         {
2751         int slength;
2752         RMATCH(eptr, ecode, offset_top, md, eptrb, RM14);
2753         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2754         if (fi >= max) RRETURN(MATCH_NOMATCH);
2755         if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2756           {
2757           if (slength == -2) eptr = md->end_subject;   /* Partial match */
2758           CHECK_PARTIAL();
2759           RRETURN(MATCH_NOMATCH);
2760           }
2761         eptr += slength;
2762         }
2763       /* Control never gets here */
2764       }
2765
2766     /* If maximizing, find the longest string and work backwards */
2767
2768     else
2769       {
2770       pp = eptr;
2771       for (i = min; i < max; i++)
2772         {
2773         int slength;
2774         if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2775           {
2776           /* Can't use CHECK_PARTIAL because we don't want to update eptr in
2777           the soft partial matching case. */
2778
2779           if (slength == -2 && md->partial != 0 &&
2780               md->end_subject > md->start_used_ptr)
2781             {
2782             md->hitend = TRUE;
2783             if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2784             }
2785           break;
2786           }
2787         eptr += slength;
2788         }
2789
2790       while (eptr >= pp)
2791         {
2792         RMATCH(eptr, ecode, offset_top, md, eptrb, RM15);
2793         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2794         eptr -= length;
2795         }
2796       RRETURN(MATCH_NOMATCH);
2797       }
2798     /* Control never gets here */
2799
2800     /* Match a bit-mapped character class, possibly repeatedly. This op code is
2801     used when all the characters in the class have values in the range 0-255,
2802     and either the matching is caseful, or the characters are in the range
2803     0-127 when UTF-8 processing is enabled. The only difference between
2804     OP_CLASS and OP_NCLASS occurs when a data character outside the range is
2805     encountered.
2806
2807     First, look past the end of the item to see if there is repeat information
2808     following. Then obey similar code to character type repeats - written out
2809     again for speed. */
2810
2811     case OP_NCLASS:
2812     case OP_CLASS:
2813       {
2814       /* The data variable is saved across frames, so the byte map needs to
2815       be stored there. */
2816 #define BYTE_MAP ((pcre_uint8 *)data)
2817       data = ecode + 1;                /* Save for matching */
2818       ecode += 1 + (32 / sizeof(pcre_uchar)); /* Advance past the item */
2819
2820       switch (*ecode)
2821         {
2822         case OP_CRSTAR:
2823         case OP_CRMINSTAR:
2824         case OP_CRPLUS:
2825         case OP_CRMINPLUS:
2826         case OP_CRQUERY:
2827         case OP_CRMINQUERY:
2828         c = *ecode++ - OP_CRSTAR;
2829         minimize = (c & 1) != 0;
2830         min = rep_min[c];                 /* Pick up values from tables; */
2831         max = rep_max[c];                 /* zero for max => infinity */
2832         if (max == 0) max = INT_MAX;
2833         break;
2834
2835         case OP_CRRANGE:
2836         case OP_CRMINRANGE:
2837         minimize = (*ecode == OP_CRMINRANGE);
2838         min = GET2(ecode, 1);
2839         max = GET2(ecode, 1 + IMM2_SIZE);
2840         if (max == 0) max = INT_MAX;
2841         ecode += 1 + 2 * IMM2_SIZE;
2842         break;
2843
2844         default:               /* No repeat follows */
2845         min = max = 1;
2846         break;
2847         }
2848
2849       /* First, ensure the minimum number of matches are present. */
2850
2851 #ifdef SUPPORT_UTF
2852       if (utf)
2853         {
2854         for (i = 1; i <= min; i++)
2855           {
2856           if (eptr >= md->end_subject)
2857             {
2858             SCHECK_PARTIAL();
2859             RRETURN(MATCH_NOMATCH);
2860             }
2861           GETCHARINC(c, eptr);
2862           if (c > 255)
2863             {
2864             if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2865             }
2866           else
2867             if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2868           }
2869         }
2870       else
2871 #endif
2872       /* Not UTF mode */
2873         {
2874         for (i = 1; i <= min; i++)
2875           {
2876           if (eptr >= md->end_subject)
2877             {
2878             SCHECK_PARTIAL();
2879             RRETURN(MATCH_NOMATCH);
2880             }
2881           c = *eptr++;
2882 #ifndef COMPILE_PCRE8
2883           if (c > 255)
2884             {
2885             if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2886             }
2887           else
2888 #endif
2889             if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2890           }
2891         }
2892
2893       /* If max == min we can continue with the main loop without the
2894       need to recurse. */
2895
2896       if (min == max) continue;
2897
2898       /* If minimizing, keep testing the rest of the expression and advancing
2899       the pointer while it matches the class. */
2900
2901       if (minimize)
2902         {
2903 #ifdef SUPPORT_UTF
2904         if (utf)
2905           {
2906           for (fi = min;; fi++)
2907             {
2908             RMATCH(eptr, ecode, offset_top, md, eptrb, RM16);
2909             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2910             if (fi >= max) RRETURN(MATCH_NOMATCH);
2911             if (eptr >= md->end_subject)
2912               {
2913               SCHECK_PARTIAL();
2914               RRETURN(MATCH_NOMATCH);
2915               }
2916             GETCHARINC(c, eptr);
2917             if (c > 255)
2918               {
2919               if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2920               }
2921             else
2922               if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2923             }
2924           }
2925         else
2926 #endif
2927         /* Not UTF mode */
2928           {
2929           for (fi = min;; fi++)
2930             {
2931             RMATCH(eptr, ecode, offset_top, md, eptrb, RM17);
2932             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2933             if (fi >= max) RRETURN(MATCH_NOMATCH);
2934             if (eptr >= md->end_subject)
2935               {
2936               SCHECK_PARTIAL();
2937               RRETURN(MATCH_NOMATCH);
2938               }
2939             c = *eptr++;
2940 #ifndef COMPILE_PCRE8
2941             if (c > 255)
2942               {
2943               if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2944               }
2945             else
2946 #endif
2947               if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2948             }
2949           }
2950         /* Control never gets here */
2951         }
2952
2953       /* If maximizing, find the longest possible run, then work backwards. */
2954
2955       else
2956         {
2957         pp = eptr;
2958
2959 #ifdef SUPPORT_UTF
2960         if (utf)
2961           {
2962           for (i = min; i < max; i++)
2963             {
2964             int len = 1;
2965             if (eptr >= md->end_subject)
2966               {
2967               SCHECK_PARTIAL();
2968               break;
2969               }
2970             GETCHARLEN(c, eptr, len);
2971             if (c > 255)
2972               {
2973               if (op == OP_CLASS) break;
2974               }
2975             else
2976               if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
2977             eptr += len;
2978             }
2979           for (;;)
2980             {
2981             RMATCH(eptr, ecode, offset_top, md, eptrb, RM18);
2982             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2983             if (eptr-- == pp) break;        /* Stop if tried at original pos */
2984             BACKCHAR(eptr);
2985             }
2986           }
2987         else
2988 #endif
2989           /* Not UTF mode */
2990           {
2991           for (i = min; i < max; i++)
2992             {
2993             if (eptr >= md->end_subject)
2994               {
2995               SCHECK_PARTIAL();
2996               break;
2997               }
2998             c = *eptr;
2999 #ifndef COMPILE_PCRE8
3000             if (c > 255)
3001               {
3002               if (op == OP_CLASS) break;
3003               }
3004             else
3005 #endif
3006               if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
3007             eptr++;
3008             }
3009           while (eptr >= pp)
3010             {
3011             RMATCH(eptr, ecode, offset_top, md, eptrb, RM19);
3012             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3013             eptr--;
3014             }
3015           }
3016
3017         RRETURN(MATCH_NOMATCH);
3018         }
3019 #undef BYTE_MAP
3020       }
3021     /* Control never gets here */
3022
3023
3024     /* Match an extended character class. This opcode is encountered only
3025     when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
3026     mode, because Unicode properties are supported in non-UTF-8 mode. */
3027
3028 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3029     case OP_XCLASS:
3030       {
3031       data = ecode + 1 + LINK_SIZE;                /* Save for matching */
3032       ecode += GET(ecode, 1);                      /* Advance past the item */
3033
3034       switch (*ecode)
3035         {
3036         case OP_CRSTAR:
3037         case OP_CRMINSTAR:
3038         case OP_CRPLUS:
3039         case OP_CRMINPLUS:
3040         case OP_CRQUERY:
3041         case OP_CRMINQUERY:
3042         c = *ecode++ - OP_CRSTAR;
3043         minimize = (c & 1) != 0;
3044         min = rep_min[c];                 /* Pick up values from tables; */
3045         max = rep_max[c];                 /* zero for max => infinity */
3046         if (max == 0) max = INT_MAX;
3047         break;
3048
3049         case OP_CRRANGE:
3050         case OP_CRMINRANGE:
3051         minimize = (*ecode == OP_CRMINRANGE);
3052         min = GET2(ecode, 1);
3053         max = GET2(ecode, 1 + IMM2_SIZE);
3054         if (max == 0) max = INT_MAX;
3055         ecode += 1 + 2 * IMM2_SIZE;
3056         break;
3057
3058         default:               /* No repeat follows */
3059         min = max = 1;
3060         break;
3061         }
3062
3063       /* First, ensure the minimum number of matches are present. */
3064
3065       for (i = 1; i <= min; i++)
3066         {
3067         if (eptr >= md->end_subject)
3068           {
3069           SCHECK_PARTIAL();
3070           RRETURN(MATCH_NOMATCH);
3071           }
3072         GETCHARINCTEST(c, eptr);
3073         if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
3074         }
3075
3076       /* If max == min we can continue with the main loop without the
3077       need to recurse. */
3078
3079       if (min == max) continue;
3080
3081       /* If minimizing, keep testing the rest of the expression and advancing
3082       the pointer while it matches the class. */
3083
3084       if (minimize)
3085         {
3086         for (fi = min;; fi++)
3087           {
3088           RMATCH(eptr, ecode, offset_top, md, eptrb, RM20);
3089           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3090           if (fi >= max) RRETURN(MATCH_NOMATCH);
3091           if (eptr >= md->end_subject)
3092             {
3093             SCHECK_PARTIAL();
3094             RRETURN(MATCH_NOMATCH);
3095             }
3096           GETCHARINCTEST(c, eptr);
3097           if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
3098           }
3099         /* Control never gets here */
3100         }
3101
3102       /* If maximizing, find the longest possible run, then work backwards. */
3103
3104       else
3105         {
3106         pp = eptr;
3107         for (i = min; i < max; i++)
3108           {
3109           int len = 1;
3110           if (eptr >= md->end_subject)
3111             {
3112             SCHECK_PARTIAL();
3113             break;
3114             }
3115 #ifdef SUPPORT_UTF
3116           GETCHARLENTEST(c, eptr, len);
3117 #else
3118           c = *eptr;
3119 #endif
3120           if (!PRIV(xclass)(c, data, utf)) break;
3121           eptr += len;
3122           }
3123         for(;;)
3124           {
3125           RMATCH(eptr, ecode, offset_top, md, eptrb, RM21);
3126           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3127           if (eptr-- == pp) break;        /* Stop if tried at original pos */
3128 #ifdef SUPPORT_UTF
3129           if (utf) BACKCHAR(eptr);
3130 #endif
3131           }
3132         RRETURN(MATCH_NOMATCH);
3133         }
3134
3135       /* Control never gets here */
3136       }
3137 #endif    /* End of XCLASS */
3138
3139     /* Match a single character, casefully */
3140
3141     case OP_CHAR:
3142 #ifdef SUPPORT_UTF
3143     if (utf)
3144       {
3145       length = 1;
3146       ecode++;
3147       GETCHARLEN(fc, ecode, length);
3148       if (length > md->end_subject - eptr)
3149         {
3150         CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
3151         RRETURN(MATCH_NOMATCH);
3152         }
3153       while (length-- > 0) if (*ecode++ != RAWUCHARINC(eptr)) RRETURN(MATCH_NOMATCH);
3154       }
3155     else
3156 #endif
3157     /* Not UTF mode */
3158       {
3159       if (md->end_subject - eptr < 1)
3160         {
3161         SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
3162         RRETURN(MATCH_NOMATCH);
3163         }
3164       if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
3165       ecode += 2;
3166       }
3167     break;
3168
3169     /* Match a single character, caselessly. If we are at the end of the
3170     subject, give up immediately. */
3171
3172     case OP_CHARI:
3173     if (eptr >= md->end_subject)
3174       {
3175       SCHECK_PARTIAL();
3176       RRETURN(MATCH_NOMATCH);
3177       }
3178
3179 #ifdef SUPPORT_UTF
3180     if (utf)
3181       {
3182       length = 1;
3183       ecode++;
3184       GETCHARLEN(fc, ecode, length);
3185
3186       /* If the pattern character's value is < 128, we have only one byte, and
3187       we know that its other case must also be one byte long, so we can use the
3188       fast lookup table. We know that there is at least one byte left in the
3189       subject. */
3190
3191       if (fc < 128)
3192         {
3193         pcre_uchar cc = RAWUCHAR(eptr);
3194         if (md->lcc[fc] != TABLE_GET(cc, md->lcc, cc)) RRETURN(MATCH_NOMATCH);
3195         ecode++;
3196         eptr++;
3197         }
3198
3199       /* Otherwise we must pick up the subject character. Note that we cannot
3200       use the value of "length" to check for sufficient bytes left, because the
3201       other case of the character may have more or fewer bytes.  */
3202
3203       else
3204         {
3205         pcre_uint32 dc;
3206         GETCHARINC(dc, eptr);
3207         ecode += length;
3208
3209         /* If we have Unicode property support, we can use it to test the other
3210         case of the character, if there is one. */
3211
3212         if (fc != dc)
3213           {
3214 #ifdef SUPPORT_UCP
3215           if (dc != UCD_OTHERCASE(fc))
3216 #endif
3217             RRETURN(MATCH_NOMATCH);
3218           }
3219         }
3220       }
3221     else
3222 #endif   /* SUPPORT_UTF */
3223
3224     /* Not UTF mode */
3225       {
3226       if (TABLE_GET(ecode[1], md->lcc, ecode[1])
3227           != TABLE_GET(*eptr, md->lcc, *eptr)) RRETURN(MATCH_NOMATCH);
3228       eptr++;
3229       ecode += 2;
3230       }
3231     break;
3232
3233     /* Match a single character repeatedly. */
3234
3235     case OP_EXACT:
3236     case OP_EXACTI:
3237     min = max = GET2(ecode, 1);
3238     ecode += 1 + IMM2_SIZE;
3239     goto REPEATCHAR;
3240
3241     case OP_POSUPTO:
3242     case OP_POSUPTOI:
3243     possessive = TRUE;
3244     /* Fall through */
3245
3246     case OP_UPTO:
3247     case OP_UPTOI:
3248     case OP_MINUPTO:
3249     case OP_MINUPTOI:
3250     min = 0;
3251     max = GET2(ecode, 1);
3252     minimize = *ecode == OP_MINUPTO || *ecode == OP_MINUPTOI;
3253     ecode += 1 + IMM2_SIZE;
3254     goto REPEATCHAR;
3255
3256     case OP_POSSTAR:
3257     case OP_POSSTARI:
3258     possessive = TRUE;
3259     min = 0;
3260     max = INT_MAX;
3261     ecode++;
3262     goto REPEATCHAR;
3263
3264     case OP_POSPLUS:
3265     case OP_POSPLUSI:
3266     possessive = TRUE;
3267     min = 1;
3268     max = INT_MAX;
3269     ecode++;
3270     goto REPEATCHAR;
3271
3272     case OP_POSQUERY:
3273     case OP_POSQUERYI:
3274     possessive = TRUE;
3275     min = 0;
3276     max = 1;
3277     ecode++;
3278     goto REPEATCHAR;
3279
3280     case OP_STAR:
3281     case OP_STARI:
3282     case OP_MINSTAR:
3283     case OP_MINSTARI:
3284     case OP_PLUS:
3285     case OP_PLUSI:
3286     case OP_MINPLUS:
3287     case OP_MINPLUSI:
3288     case OP_QUERY:
3289     case OP_QUERYI:
3290     case OP_MINQUERY:
3291     case OP_MINQUERYI:
3292     c = *ecode++ - ((op < OP_STARI)? OP_STAR : OP_STARI);
3293     minimize = (c & 1) != 0;
3294     min = rep_min[c];                 /* Pick up values from tables; */
3295     max = rep_max[c];                 /* zero for max => infinity */
3296     if (max == 0) max = INT_MAX;
3297
3298     /* Common code for all repeated single-character matches. */
3299
3300     REPEATCHAR:
3301 #ifdef SUPPORT_UTF
3302     if (utf)
3303       {
3304       length = 1;
3305       charptr = ecode;
3306       GETCHARLEN(fc, ecode, length);
3307       ecode += length;
3308
3309       /* Handle multibyte character matching specially here. There is
3310       support for caseless matching if UCP support is present. */
3311
3312       if (length > 1)
3313         {
3314 #ifdef SUPPORT_UCP
3315         pcre_uint32 othercase;
3316         if (op >= OP_STARI &&     /* Caseless */
3317             (othercase = UCD_OTHERCASE(fc)) != fc)
3318           oclength = PRIV(ord2utf)(othercase, occhars);
3319         else oclength = 0;
3320 #endif  /* SUPPORT_UCP */
3321
3322         for (i = 1; i <= min; i++)
3323           {
3324           if (eptr <= md->end_subject - length &&
3325             memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3326 #ifdef SUPPORT_UCP
3327           else if (oclength > 0 &&
3328                    eptr <= md->end_subject - oclength &&
3329                    memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3330 #endif  /* SUPPORT_UCP */
3331           else
3332             {
3333             CHECK_PARTIAL();
3334             RRETURN(MATCH_NOMATCH);
3335             }
3336           }
3337
3338         if (min == max) continue;
3339
3340         if (minimize)
3341           {
3342           for (fi = min;; fi++)
3343             {
3344             RMATCH(eptr, ecode, offset_top, md, eptrb, RM22);
3345             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3346             if (fi >= max) RRETURN(MATCH_NOMATCH);
3347             if (eptr <= md->end_subject - length &&
3348               memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3349 #ifdef SUPPORT_UCP
3350             else if (oclength > 0 &&
3351                      eptr <= md->end_subject - oclength &&
3352                      memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3353 #endif  /* SUPPORT_UCP */
3354             else
3355               {
3356               CHECK_PARTIAL();
3357               RRETURN(MATCH_NOMATCH);
3358               }
3359             }
3360           /* Control never gets here */
3361           }
3362
3363         else  /* Maximize */
3364           {
3365           pp = eptr;
3366           for (i = min; i < max; i++)
3367             {
3368             if (eptr <= md->end_subject - length &&
3369                 memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3370 #ifdef SUPPORT_UCP
3371             else if (oclength > 0 &&
3372                      eptr <= md->end_subject - oclength &&
3373                      memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3374 #endif  /* SUPPORT_UCP */
3375             else
3376               {
3377               CHECK_PARTIAL();
3378               break;
3379               }
3380             }
3381
3382           if (possessive) continue;
3383
3384           for(;;)
3385             {
3386             RMATCH(eptr, ecode, offset_top, md, eptrb, RM23);
3387             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3388             if (eptr == pp) { RRETURN(MATCH_NOMATCH); }
3389 #ifdef SUPPORT_UCP
3390             eptr--;
3391             BACKCHAR(eptr);
3392 #else   /* without SUPPORT_UCP */
3393             eptr -= length;
3394 #endif  /* SUPPORT_UCP */
3395             }
3396           }
3397         /* Control never gets here */
3398         }
3399
3400       /* If the length of a UTF-8 character is 1, we fall through here, and
3401       obey the code as for non-UTF-8 characters below, though in this case the
3402       value of fc will always be < 128. */
3403       }
3404     else
3405 #endif  /* SUPPORT_UTF */
3406       /* When not in UTF-8 mode, load a single-byte character. */
3407       fc = *ecode++;
3408
3409     /* The value of fc at this point is always one character, though we may
3410     or may not be in UTF mode. The code is duplicated for the caseless and
3411     caseful cases, for speed, since matching characters is likely to be quite
3412     common. First, ensure the minimum number of matches are present. If min =
3413     max, continue at the same level without recursing. Otherwise, if
3414     minimizing, keep trying the rest of the expression and advancing one
3415     matching character if failing, up to the maximum. Alternatively, if
3416     maximizing, find the maximum number of characters and work backwards. */
3417
3418     DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
3419       max, (char *)eptr));
3420
3421     if (op >= OP_STARI)  /* Caseless */
3422       {
3423 #ifdef COMPILE_PCRE8
3424       /* fc must be < 128 if UTF is enabled. */
3425       foc = md->fcc[fc];
3426 #else
3427 #ifdef SUPPORT_UTF
3428 #ifdef SUPPORT_UCP
3429       if (utf && fc > 127)
3430         foc = UCD_OTHERCASE(fc);
3431 #else
3432       if (utf && fc > 127)
3433         foc = fc;
3434 #endif /* SUPPORT_UCP */
3435       else
3436 #endif /* SUPPORT_UTF */
3437         foc = TABLE_GET(fc, md->fcc, fc);
3438 #endif /* COMPILE_PCRE8 */
3439
3440       for (i = 1; i <= min; i++)
3441         {
3442         pcre_uchar cc;
3443
3444         if (eptr >= md->end_subject)
3445           {
3446           SCHECK_PARTIAL();
3447           RRETURN(MATCH_NOMATCH);
3448           }
3449         cc = RAWUCHARTEST(eptr);
3450         if (fc != cc && foc != cc) RRETURN(MATCH_NOMATCH);
3451         eptr++;
3452         }
3453       if (min == max) continue;
3454       if (minimize)
3455         {
3456         for (fi = min;; fi++)
3457           {
3458           pcre_uchar cc;
3459
3460           RMATCH(eptr, ecode, offset_top, md, eptrb, RM24);
3461           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3462           if (fi >= max) RRETURN(MATCH_NOMATCH);
3463           if (eptr >= md->end_subject)
3464             {
3465             SCHECK_PARTIAL();
3466             RRETURN(MATCH_NOMATCH);
3467             }
3468           cc = RAWUCHARTEST(eptr);
3469           if (fc != cc && foc != cc) RRETURN(MATCH_NOMATCH);
3470           eptr++;
3471           }
3472         /* Control never gets here */
3473         }
3474       else  /* Maximize */
3475         {
3476         pp = eptr;
3477         for (i = min; i < max; i++)
3478           {
3479           pcre_uchar cc;
3480
3481           if (eptr >= md->end_subject)
3482             {
3483             SCHECK_PARTIAL();
3484             break;
3485             }
3486           cc = RAWUCHARTEST(eptr);
3487           if (fc != cc && foc != cc) break;
3488           eptr++;
3489           }
3490
3491         if (possessive) continue;
3492
3493         while (eptr >= pp)
3494           {
3495           RMATCH(eptr, ecode, offset_top, md, eptrb, RM25);
3496           eptr--;
3497           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3498           }
3499         RRETURN(MATCH_NOMATCH);
3500         }
3501       /* Control never gets here */
3502       }
3503
3504     /* Caseful comparisons (includes all multi-byte characters) */
3505
3506     else
3507       {
3508       for (i = 1; i <= min; i++)
3509         {
3510         if (eptr >= md->end_subject)
3511           {
3512           SCHECK_PARTIAL();
3513           RRETURN(MATCH_NOMATCH);
3514           }
3515         if (fc != RAWUCHARINCTEST(eptr)) RRETURN(MATCH_NOMATCH);
3516         }
3517
3518       if (min == max) continue;
3519
3520       if (minimize)
3521         {
3522         for (fi = min;; fi++)
3523           {
3524           RMATCH(eptr, ecode, offset_top, md, eptrb, RM26);
3525           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3526           if (fi >= max) RRETURN(MATCH_NOMATCH);
3527           if (eptr >= md->end_subject)
3528             {
3529             SCHECK_PARTIAL();
3530             RRETURN(MATCH_NOMATCH);
3531             }
3532           if (fc != RAWUCHARINCTEST(eptr)) RRETURN(MATCH_NOMATCH);
3533           }
3534         /* Control never gets here */
3535         }
3536       else  /* Maximize */
3537         {
3538         pp = eptr;
3539         for (i = min; i < max; i++)
3540           {
3541           if (eptr >= md->end_subject)
3542             {
3543             SCHECK_PARTIAL();
3544             break;
3545             }
3546           if (fc != RAWUCHARTEST(eptr)) break;
3547           eptr++;
3548           }
3549         if (possessive) continue;
3550
3551         while (eptr >= pp)
3552           {
3553           RMATCH(eptr, ecode, offset_top, md, eptrb, RM27);
3554           eptr--;
3555           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3556           }
3557         RRETURN(MATCH_NOMATCH);
3558         }
3559       }
3560     /* Control never gets here */
3561
3562     /* Match a negated single one-byte character. The character we are
3563     checking can be multibyte. */
3564
3565     case OP_NOT:
3566     case OP_NOTI:
3567     if (eptr >= md->end_subject)
3568       {
3569       SCHECK_PARTIAL();
3570       RRETURN(MATCH_NOMATCH);
3571       }
3572 #ifdef SUPPORT_UTF
3573     if (utf)
3574       {
3575       register pcre_uint32 ch, och;
3576
3577       ecode++;
3578       GETCHARINC(ch, ecode);
3579       GETCHARINC(c, eptr);
3580
3581       if (op == OP_NOT)
3582         {
3583         if (ch == c) RRETURN(MATCH_NOMATCH);
3584         }
3585       else
3586         {
3587 #ifdef SUPPORT_UCP
3588         if (ch > 127)
3589           och = UCD_OTHERCASE(ch);
3590 #else
3591         if (ch > 127)
3592           och = ch;
3593 #endif /* SUPPORT_UCP */
3594         else
3595           och = TABLE_GET(ch, md->fcc, ch);
3596         if (ch == c || och == c) RRETURN(MATCH_NOMATCH);
3597         }
3598       }
3599     else
3600 #endif
3601       {
3602       register pcre_uint32 ch = ecode[1];
3603       c = *eptr++;
3604       if (ch == c || (op == OP_NOTI && TABLE_GET(ch, md->fcc, ch) == c))
3605         RRETURN(MATCH_NOMATCH);
3606       ecode += 2;
3607       }
3608     break;
3609
3610     /* Match a negated single one-byte character repeatedly. This is almost a
3611     repeat of the code for a repeated single character, but I haven't found a
3612     nice way of commoning these up that doesn't require a test of the
3613     positive/negative option for each character match. Maybe that wouldn't add
3614     very much to the time taken, but character matching *is* what this is all
3615     about... */
3616
3617     case OP_NOTEXACT:
3618     case OP_NOTEXACTI:
3619     min = max = GET2(ecode, 1);
3620     ecode += 1 + IMM2_SIZE;
3621     goto REPEATNOTCHAR;
3622
3623     case OP_NOTUPTO:
3624     case OP_NOTUPTOI:
3625     case OP_NOTMINUPTO:
3626     case OP_NOTMINUPTOI:
3627     min = 0;
3628     max = GET2(ecode, 1);
3629     minimize = *ecode == OP_NOTMINUPTO || *ecode == OP_NOTMINUPTOI;
3630     ecode += 1 + IMM2_SIZE;
3631     goto REPEATNOTCHAR;
3632
3633     case OP_NOTPOSSTAR:
3634     case OP_NOTPOSSTARI:
3635     possessive = TRUE;
3636     min = 0;
3637     max = INT_MAX;
3638     ecode++;
3639     goto REPEATNOTCHAR;
3640
3641     case OP_NOTPOSPLUS:
3642     case OP_NOTPOSPLUSI:
3643     possessive = TRUE;
3644     min = 1;
3645     max = INT_MAX;
3646     ecode++;
3647     goto REPEATNOTCHAR;
3648
3649     case OP_NOTPOSQUERY:
3650     case OP_NOTPOSQUERYI:
3651     possessive = TRUE;
3652     min = 0;
3653     max = 1;
3654     ecode++;
3655     goto REPEATNOTCHAR;
3656
3657     case OP_NOTPOSUPTO:
3658     case OP_NOTPOSUPTOI:
3659     possessive = TRUE;
3660     min = 0;
3661     max = GET2(ecode, 1);
3662     ecode += 1 + IMM2_SIZE;
3663     goto REPEATNOTCHAR;
3664
3665     case OP_NOTSTAR:
3666     case OP_NOTSTARI:
3667     case OP_NOTMINSTAR:
3668     case OP_NOTMINSTARI:
3669     case OP_NOTPLUS:
3670     case OP_NOTPLUSI:
3671     case OP_NOTMINPLUS:
3672     case OP_NOTMINPLUSI:
3673     case OP_NOTQUERY:
3674     case OP_NOTQUERYI:
3675     case OP_NOTMINQUERY:
3676     case OP_NOTMINQUERYI:
3677     c = *ecode++ - ((op >= OP_NOTSTARI)? OP_NOTSTARI: OP_NOTSTAR);
3678     minimize = (c & 1) != 0;
3679     min = rep_min[c];                 /* Pick up values from tables; */
3680     max = rep_max[c];                 /* zero for max => infinity */
3681     if (max == 0) max = INT_MAX;
3682
3683     /* Common code for all repeated single-byte matches. */
3684
3685     REPEATNOTCHAR:
3686     GETCHARINCTEST(fc, ecode);
3687
3688     /* The code is duplicated for the caseless and caseful cases, for speed,
3689     since matching characters is likely to be quite common. First, ensure the
3690     minimum number of matches are present. If min = max, continue at the same
3691     level without recursing. Otherwise, if minimizing, keep trying the rest of
3692     the expression and advancing one matching character if failing, up to the
3693     maximum. Alternatively, if maximizing, find the maximum number of
3694     characters and work backwards. */
3695
3696     DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
3697       max, (char *)eptr));
3698
3699     if (op >= OP_NOTSTARI)     /* Caseless */
3700       {
3701 #ifdef SUPPORT_UTF
3702 #ifdef SUPPORT_UCP
3703       if (utf && fc > 127)
3704         foc = UCD_OTHERCASE(fc);
3705 #else
3706       if (utf && fc > 127)
3707         foc = fc;
3708 #endif /* SUPPORT_UCP */
3709       else
3710 #endif /* SUPPORT_UTF */
3711         foc = TABLE_GET(fc, md->fcc, fc);
3712
3713 #ifdef SUPPORT_UTF
3714       if (utf)
3715         {
3716         register pcre_uint32 d;
3717         for (i = 1; i <= min; i++)
3718           {
3719           if (eptr >= md->end_subject)
3720             {
3721             SCHECK_PARTIAL();
3722             RRETURN(MATCH_NOMATCH);
3723             }
3724           GETCHARINC(d, eptr);
3725           if (fc == d || (unsigned int)foc == d) RRETURN(MATCH_NOMATCH);
3726           }
3727         }
3728       else
3729 #endif
3730       /* Not UTF mode */
3731         {
3732         for (i = 1; i <= min; i++)
3733           {
3734           if (eptr >= md->end_subject)
3735             {
3736             SCHECK_PARTIAL();
3737             RRETURN(MATCH_NOMATCH);
3738             }
3739           if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
3740           eptr++;
3741           }
3742         }
3743
3744       if (min == max) continue;
3745
3746       if (minimize)
3747         {
3748 #ifdef SUPPORT_UTF
3749         if (utf)
3750           {
3751           register pcre_uint32 d;
3752           for (fi = min;; fi++)
3753             {
3754             RMATCH(eptr, ecode, offset_top, md, eptrb, RM28);
3755             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3756             if (fi >= max) RRETURN(MATCH_NOMATCH);
3757             if (eptr >= md->end_subject)
3758               {
3759               SCHECK_PARTIAL();
3760               RRETURN(MATCH_NOMATCH);
3761               }
3762             GETCHARINC(d, eptr);
3763             if (fc == d || (unsigned int)foc == d) RRETURN(MATCH_NOMATCH);
3764             }
3765           }
3766         else
3767 #endif
3768         /* Not UTF mode */
3769           {
3770           for (fi = min;; fi++)
3771             {
3772             RMATCH(eptr, ecode, offset_top, md, eptrb, RM29);
3773             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3774             if (fi >= max) RRETURN(MATCH_NOMATCH);
3775             if (eptr >= md->end_subject)
3776               {
3777               SCHECK_PARTIAL();
3778               RRETURN(MATCH_NOMATCH);
3779               }
3780             if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
3781             eptr++;
3782             }
3783           }
3784         /* Control never gets here */
3785         }
3786
3787       /* Maximize case */
3788
3789       else
3790         {
3791         pp = eptr;
3792
3793 #ifdef SUPPORT_UTF
3794         if (utf)
3795           {
3796           register pcre_uint32 d;
3797           for (i = min; i < max; i++)
3798             {
3799             int len = 1;
3800             if (eptr >= md->end_subject)
3801               {
3802               SCHECK_PARTIAL();
3803               break;
3804               }
3805             GETCHARLEN(d, eptr, len);
3806             if (fc == d || (unsigned int)foc == d) break;
3807             eptr += len;
3808             }
3809           if (possessive) continue;
3810           for(;;)
3811             {
3812             RMATCH(eptr, ecode, offset_top, md, eptrb, RM30);
3813             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3814             if (eptr-- == pp) break;        /* Stop if tried at original pos */
3815             BACKCHAR(eptr);
3816             }
3817           }
3818         else
3819 #endif
3820         /* Not UTF mode */
3821           {
3822           for (i = min; i < max; i++)
3823             {
3824             if (eptr >= md->end_subject)
3825               {
3826               SCHECK_PARTIAL();
3827               break;
3828               }
3829             if (fc == *eptr || foc == *eptr) break;
3830             eptr++;
3831             }
3832           if (possessive) continue;
3833           while (eptr >= pp)
3834             {
3835             RMATCH(eptr, ecode, offset_top, md, eptrb, RM31);
3836             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3837             eptr--;
3838             }
3839           }
3840
3841         RRETURN(MATCH_NOMATCH);
3842         }
3843       /* Control never gets here */
3844       }
3845
3846     /* Caseful comparisons */
3847
3848     else
3849       {
3850 #ifdef SUPPORT_UTF
3851       if (utf)
3852         {
3853         register pcre_uint32 d;
3854         for (i = 1; i <= min; i++)
3855           {
3856           if (eptr >= md->end_subject)
3857             {
3858             SCHECK_PARTIAL();
3859             RRETURN(MATCH_NOMATCH);
3860             }
3861           GETCHARINC(d, eptr);
3862           if (fc == d) RRETURN(MATCH_NOMATCH);
3863           }
3864         }
3865       else
3866 #endif
3867       /* Not UTF mode */
3868         {
3869         for (i = 1; i <= min; i++)
3870           {
3871           if (eptr >= md->end_subject)
3872             {
3873             SCHECK_PARTIAL();
3874             RRETURN(MATCH_NOMATCH);
3875             }
3876           if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
3877           }
3878         }
3879
3880       if (min == max) continue;
3881
3882       if (minimize)
3883         {
3884 #ifdef SUPPORT_UTF
3885         if (utf)
3886           {
3887           register pcre_uint32 d;
3888           for (fi = min;; fi++)
3889             {
3890             RMATCH(eptr, ecode, offset_top, md, eptrb, RM32);
3891             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3892             if (fi >= max) RRETURN(MATCH_NOMATCH);
3893             if (eptr >= md->end_subject)
3894               {
3895               SCHECK_PARTIAL();
3896               RRETURN(MATCH_NOMATCH);
3897               }
3898             GETCHARINC(d, eptr);
3899             if (fc == d) RRETURN(MATCH_NOMATCH);
3900             }
3901           }
3902         else
3903 #endif
3904         /* Not UTF mode */
3905           {
3906           for (fi = min;; fi++)
3907             {
3908             RMATCH(eptr, ecode, offset_top, md, eptrb, RM33);
3909             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3910             if (fi >= max) RRETURN(MATCH_NOMATCH);
3911             if (eptr >= md->end_subject)
3912               {
3913               SCHECK_PARTIAL();
3914               RRETURN(MATCH_NOMATCH);
3915               }
3916             if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
3917             }
3918           }
3919         /* Control never gets here */
3920         }
3921
3922       /* Maximize case */
3923
3924       else
3925         {
3926         pp = eptr;
3927
3928 #ifdef SUPPORT_UTF
3929         if (utf)
3930           {
3931           register pcre_uint32 d;
3932           for (i = min; i < max; i++)
3933             {
3934             int len = 1;
3935             if (eptr >= md->end_subject)
3936               {
3937               SCHECK_PARTIAL();
3938               break;
3939               }
3940             GETCHARLEN(d, eptr, len);
3941             if (fc == d) break;
3942             eptr += len;
3943             }
3944           if (possessive) continue;
3945           for(;;)
3946             {
3947             RMATCH(eptr, ecode, offset_top, md, eptrb, RM34);
3948             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3949             if (eptr-- == pp) break;        /* Stop if tried at original pos */
3950             BACKCHAR(eptr);
3951             }
3952           }
3953         else
3954 #endif
3955         /* Not UTF mode */
3956           {
3957           for (i = min; i < max; i++)
3958             {
3959             if (eptr >= md->end_subject)
3960               {
3961               SCHECK_PARTIAL();
3962               break;
3963               }
3964             if (fc == *eptr) break;
3965             eptr++;
3966             }
3967           if (possessive) continue;
3968           while (eptr >= pp)
3969             {
3970             RMATCH(eptr, ecode, offset_top, md, eptrb, RM35);
3971             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3972             eptr--;
3973             }
3974           }
3975
3976         RRETURN(MATCH_NOMATCH);
3977         }
3978       }
3979     /* Control never gets here */
3980
3981     /* Match a single character type repeatedly; several different opcodes
3982     share code. This is very similar to the code for single characters, but we
3983     repeat it in the interests of efficiency. */
3984
3985     case OP_TYPEEXACT:
3986     min = max = GET2(ecode, 1);
3987     minimize = TRUE;
3988     ecode += 1 + IMM2_SIZE;
3989     goto REPEATTYPE;
3990
3991     case OP_TYPEUPTO:
3992     case OP_TYPEMINUPTO:
3993     min = 0;
3994     max = GET2(ecode, 1);
3995     minimize = *ecode == OP_TYPEMINUPTO;
3996     ecode += 1 + IMM2_SIZE;
3997     goto REPEATTYPE;
3998
3999     case OP_TYPEPOSSTAR:
4000     possessive = TRUE;
4001     min = 0;
4002     max = INT_MAX;
4003     ecode++;
4004     goto REPEATTYPE;
4005
4006     case OP_TYPEPOSPLUS:
4007     possessive = TRUE;
4008     min = 1;
4009     max = INT_MAX;
4010     ecode++;
4011     goto REPEATTYPE;
4012
4013     case OP_TYPEPOSQUERY:
4014     possessive = TRUE;
4015     min = 0;
4016     max = 1;
4017     ecode++;
4018     goto REPEATTYPE;
4019
4020     case OP_TYPEPOSUPTO:
4021     possessive = TRUE;
4022     min = 0;
4023     max = GET2(ecode, 1);
4024     ecode += 1 + IMM2_SIZE;
4025     goto REPEATTYPE;
4026
4027     case OP_TYPESTAR:
4028     case OP_TYPEMINSTAR:
4029     case OP_TYPEPLUS:
4030     case OP_TYPEMINPLUS:
4031     case OP_TYPEQUERY:
4032     case OP_TYPEMINQUERY:
4033     c = *ecode++ - OP_TYPESTAR;
4034     minimize = (c & 1) != 0;
4035     min = rep_min[c];                 /* Pick up values from tables; */
4036     max = rep_max[c];                 /* zero for max => infinity */
4037     if (max == 0) max = INT_MAX;
4038
4039     /* Common code for all repeated single character type matches. Note that
4040     in UTF-8 mode, '.' matches a character of any length, but for the other
4041     character types, the valid characters are all one-byte long. */
4042
4043     REPEATTYPE:
4044     ctype = *ecode++;      /* Code for the character type */
4045
4046 #ifdef SUPPORT_UCP
4047     if (ctype == OP_PROP || ctype == OP_NOTPROP)
4048       {
4049       prop_fail_result = ctype == OP_NOTPROP;
4050       prop_type = *ecode++;
4051       prop_value = *ecode++;
4052       }
4053     else prop_type = -1;
4054 #endif
4055
4056     /* First, ensure the minimum number of matches are present. Use inline
4057     code for maximizing the speed, and do the type test once at the start
4058     (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
4059     is tidier. Also separate the UCP code, which can be the same for both UTF-8
4060     and single-bytes. */
4061
4062     if (min > 0)
4063       {
4064 #ifdef SUPPORT_UCP
4065       if (prop_type >= 0)
4066         {
4067         switch(prop_type)
4068           {
4069           case PT_ANY:
4070           if (prop_fail_result) RRETURN(MATCH_NOMATCH);
4071           for (i = 1; i <= min; i++)
4072             {
4073             if (eptr >= md->end_subject)
4074               {
4075               SCHECK_PARTIAL();
4076               RRETURN(MATCH_NOMATCH);
4077               }
4078             GETCHARINCTEST(c, eptr);
4079             }
4080           break;
4081
4082           case PT_LAMP:
4083           for (i = 1; i <= min; i++)
4084             {
4085             int chartype;
4086             if (eptr >= md->end_subject)
4087               {
4088               SCHECK_PARTIAL();
4089               RRETURN(MATCH_NOMATCH);
4090               }
4091             GETCHARINCTEST(c, eptr);
4092             chartype = UCD_CHARTYPE(c);
4093             if ((chartype == ucp_Lu ||
4094                  chartype == ucp_Ll ||
4095                  chartype == ucp_Lt) == prop_fail_result)
4096               RRETURN(MATCH_NOMATCH);
4097             }
4098           break;
4099
4100           case PT_GC:
4101           for (i = 1; i <= min; i++)
4102             {
4103             if (eptr >= md->end_subject)
4104               {
4105               SCHECK_PARTIAL();
4106               RRETURN(MATCH_NOMATCH);
4107               }
4108             GETCHARINCTEST(c, eptr);
4109             if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)
4110               RRETURN(MATCH_NOMATCH);
4111             }
4112           break;
4113
4114           case PT_PC:
4115           for (i = 1; i <= min; i++)
4116             {
4117             if (eptr >= md->end_subject)
4118               {
4119               SCHECK_PARTIAL();
4120               RRETURN(MATCH_NOMATCH);
4121               }
4122             GETCHARINCTEST(c, eptr);
4123             if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)
4124               RRETURN(MATCH_NOMATCH);
4125             }
4126           break;
4127
4128           case PT_SC:
4129           for (i = 1; i <= min; i++)
4130             {
4131             if (eptr >= md->end_subject)
4132               {
4133               SCHECK_PARTIAL();
4134               RRETURN(MATCH_NOMATCH);
4135               }
4136             GETCHARINCTEST(c, eptr);
4137             if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)
4138               RRETURN(MATCH_NOMATCH);
4139             }
4140           break;
4141
4142           case PT_ALNUM:
4143           for (i = 1; i <= min; i++)
4144             {
4145             int category;
4146             if (eptr >= md->end_subject)
4147               {
4148               SCHECK_PARTIAL();
4149               RRETURN(MATCH_NOMATCH);
4150               }
4151             GETCHARINCTEST(c, eptr);
4152             category = UCD_CATEGORY(c);
4153             if ((category == ucp_L || category == ucp_N) == prop_fail_result)
4154               RRETURN(MATCH_NOMATCH);
4155             }
4156           break;
4157
4158           case PT_SPACE:    /* Perl space */
4159           for (i = 1; i <= min; i++)
4160             {
4161             if (eptr >= md->end_subject)
4162               {
4163               SCHECK_PARTIAL();
4164               RRETURN(MATCH_NOMATCH);
4165               }
4166             GETCHARINCTEST(c, eptr);
4167             if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4168                  c == CHAR_FF || c == CHAR_CR)
4169                    == prop_fail_result)
4170               RRETURN(MATCH_NOMATCH);
4171             }
4172           break;
4173
4174           case PT_PXSPACE:  /* POSIX space */
4175           for (i = 1; i <= min; i++)
4176             {
4177             if (eptr >= md->end_subject)
4178               {
4179               SCHECK_PARTIAL();
4180               RRETURN(MATCH_NOMATCH);
4181               }
4182             GETCHARINCTEST(c, eptr);
4183             if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4184                  c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
4185                    == prop_fail_result)
4186               RRETURN(MATCH_NOMATCH);
4187             }
4188           break;
4189
4190           case PT_WORD:
4191           for (i = 1; i <= min; i++)
4192             {
4193             int category;
4194             if (eptr >= md->end_subject)
4195               {
4196               SCHECK_PARTIAL();
4197               RRETURN(MATCH_NOMATCH);
4198               }
4199             GETCHARINCTEST(c, eptr);
4200             category = UCD_CATEGORY(c);
4201             if ((category == ucp_L || category == ucp_N || c == CHAR_UNDERSCORE)
4202                    == prop_fail_result)
4203               RRETURN(MATCH_NOMATCH);
4204             }
4205           break;
4206
4207           case PT_CLIST:
4208           for (i = 1; i <= min; i++)
4209             {
4210             const pcre_uint32 *cp;
4211             if (eptr >= md->end_subject)
4212               {
4213               SCHECK_PARTIAL();
4214               RRETURN(MATCH_NOMATCH);
4215               }
4216             GETCHARINCTEST(c, eptr);
4217             cp = PRIV(ucd_caseless_sets) + prop_value;
4218             for (;;)
4219               {
4220               if (c < *cp)
4221                 { if (prop_fail_result) break; else { RRETURN(MATCH_NOMATCH); } }
4222               if (c == *cp++)
4223                 { if (prop_fail_result) { RRETURN(MATCH_NOMATCH); } else break; }
4224               }
4225             }
4226           break;
4227
4228           /* This should not occur */
4229
4230           default:
4231           RRETURN(PCRE_ERROR_INTERNAL);
4232           }
4233         }
4234
4235       /* Match extended Unicode sequences. We will get here only if the
4236       support is in the binary; otherwise a compile-time error occurs. */
4237
4238       else if (ctype == OP_EXTUNI)
4239         {
4240         for (i = 1; i <= min; i++)
4241           {
4242           if (eptr >= md->end_subject)
4243             {
4244             SCHECK_PARTIAL();
4245             RRETURN(MATCH_NOMATCH);
4246             }
4247           else
4248             {
4249             int lgb, rgb;
4250             GETCHARINCTEST(c, eptr);
4251             lgb = UCD_GRAPHBREAK(c);
4252            while (eptr < md->end_subject)
4253               {
4254               int len = 1;
4255               if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
4256               rgb = UCD_GRAPHBREAK(c);
4257               if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
4258               lgb = rgb;
4259               eptr += len;
4260               }
4261             }
4262           CHECK_PARTIAL();
4263           }
4264         }
4265
4266       else
4267 #endif     /* SUPPORT_UCP */
4268
4269 /* Handle all other cases when the coding is UTF-8 */
4270
4271 #ifdef SUPPORT_UTF
4272       if (utf) switch(ctype)
4273         {
4274         case OP_ANY:
4275         for (i = 1; i <= min; i++)
4276           {
4277           if (eptr >= md->end_subject)
4278             {
4279             SCHECK_PARTIAL();
4280             RRETURN(MATCH_NOMATCH);
4281             }
4282           if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
4283           if (md->partial != 0 &&
4284               eptr + 1 >= md->end_subject &&
4285               NLBLOCK->nltype == NLTYPE_FIXED &&
4286               NLBLOCK->nllen == 2 &&
4287               RAWUCHAR(eptr) == NLBLOCK->nl[0])
4288             {
4289             md->hitend = TRUE;
4290             if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
4291             }
4292           eptr++;
4293           ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4294           }
4295         break;
4296
4297         case OP_ALLANY:
4298         for (i = 1; i <= min; i++)
4299           {
4300           if (eptr >= md->end_subject)
4301             {
4302             SCHECK_PARTIAL();
4303             RRETURN(MATCH_NOMATCH);
4304             }
4305           eptr++;
4306           ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4307           }
4308         break;
4309
4310         case OP_ANYBYTE:
4311         if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH);
4312         eptr += min;
4313         break;
4314
4315         case OP_ANYNL:
4316         for (i = 1; i <= min; i++)
4317           {
4318           if (eptr >= md->end_subject)
4319             {
4320             SCHECK_PARTIAL();
4321             RRETURN(MATCH_NOMATCH);
4322             }
4323           GETCHARINC(c, eptr);
4324           switch(c)
4325             {
4326             default: RRETURN(MATCH_NOMATCH);
4327
4328             case CHAR_CR:
4329             if (eptr < md->end_subject && RAWUCHAR(eptr) == CHAR_LF) eptr++;
4330             break;
4331
4332             case CHAR_LF:
4333             break;
4334
4335             case CHAR_VT:
4336             case CHAR_FF:
4337             case CHAR_NEL:
4338 #ifndef EBCDIC
4339             case 0x2028:
4340             case 0x2029:
4341 #endif  /* Not EBCDIC */
4342             if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
4343             break;
4344             }
4345           }
4346         break;
4347
4348         case OP_NOT_HSPACE:
4349         for (i = 1; i <= min; i++)
4350           {
4351           if (eptr >= md->end_subject)
4352             {
4353             SCHECK_PARTIAL();
4354             RRETURN(MATCH_NOMATCH);
4355             }
4356           GETCHARINC(c, eptr);
4357           switch(c)
4358             {
4359             HSPACE_CASES: RRETURN(MATCH_NOMATCH);  /* Byte and multibyte cases */
4360             default: break;
4361             }
4362           }
4363         break;
4364
4365         case OP_HSPACE:
4366         for (i = 1; i <= min; i++)
4367           {
4368           if (eptr >= md->end_subject)
4369             {
4370             SCHECK_PARTIAL();
4371             RRETURN(MATCH_NOMATCH);
4372             }
4373           GETCHARINC(c, eptr);
4374           switch(c)
4375             {
4376             HSPACE_CASES: break;  /* Byte and multibyte cases */
4377             default: RRETURN(MATCH_NOMATCH);
4378             }
4379           }
4380         break;
4381
4382         case OP_NOT_VSPACE:
4383         for (i = 1; i <= min; i++)
4384           {
4385           if (eptr >= md->end_subject)
4386             {
4387             SCHECK_PARTIAL();
4388             RRETURN(MATCH_NOMATCH);
4389             }
4390           GETCHARINC(c, eptr);
4391           switch(c)
4392             {
4393             VSPACE_CASES: RRETURN(MATCH_NOMATCH);
4394             default: break;
4395             }
4396           }
4397         break;
4398
4399         case OP_VSPACE:
4400         for (i = 1; i <= min; i++)
4401           {
4402           if (eptr >= md->end_subject)
4403             {
4404             SCHECK_PARTIAL();
4405             RRETURN(MATCH_NOMATCH);
4406             }
4407           GETCHARINC(c, eptr);
4408           switch(c)
4409             {
4410             VSPACE_CASES: break;
4411             default: RRETURN(MATCH_NOMATCH);
4412             }
4413           }
4414         break;
4415
4416         case OP_NOT_DIGIT:
4417         for (i = 1; i <= min; i++)
4418           {
4419           if (eptr >= md->end_subject)
4420             {
4421             SCHECK_PARTIAL();
4422             RRETURN(MATCH_NOMATCH);
4423             }
4424           GETCHARINC(c, eptr);
4425           if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
4426             RRETURN(MATCH_NOMATCH);
4427           }
4428         break;
4429
4430         case OP_DIGIT:
4431         for (i = 1; i <= min; i++)
4432           {
4433           pcre_uchar cc;
4434
4435           if (eptr >= md->end_subject)
4436             {
4437             SCHECK_PARTIAL();
4438             RRETURN(MATCH_NOMATCH);
4439             }
4440           cc = RAWUCHAR(eptr);
4441           if (cc >= 128 || (md->ctypes[cc] & ctype_digit) == 0)
4442             RRETURN(MATCH_NOMATCH);
4443           eptr++;
4444           /* No need to skip more bytes - we know it's a 1-byte character */
4445           }
4446         break;
4447
4448         case OP_NOT_WHITESPACE:
4449         for (i = 1; i <= min; i++)
4450           {
4451           pcre_uchar cc;
4452
4453           if (eptr >= md->end_subject)
4454             {
4455             SCHECK_PARTIAL();
4456             RRETURN(MATCH_NOMATCH);
4457             }
4458           cc = RAWUCHAR(eptr);
4459           if (cc < 128 && (md->ctypes[cc] & ctype_space) != 0)
4460             RRETURN(MATCH_NOMATCH);
4461           eptr++;
4462           ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4463           }
4464         break;
4465
4466         case OP_WHITESPACE:
4467         for (i = 1; i <= min; i++)
4468           {
4469           pcre_uchar cc;
4470
4471           if (eptr >= md->end_subject)
4472             {
4473             SCHECK_PARTIAL();
4474             RRETURN(MATCH_NOMATCH);
4475             }
4476           cc = RAWUCHAR(eptr);
4477           if (cc >= 128 || (md->ctypes[cc] & ctype_space) == 0)
4478             RRETURN(MATCH_NOMATCH);
4479           eptr++;
4480           /* No need to skip more bytes - we know it's a 1-byte character */
4481           }
4482         break;
4483
4484         case OP_NOT_WORDCHAR:
4485         for (i = 1; i <= min; i++)
4486           {
4487           pcre_uchar cc;
4488
4489           if (eptr >= md->end_subject)
4490             {
4491             SCHECK_PARTIAL();
4492             RRETURN(MATCH_NOMATCH);
4493             }
4494           cc = RAWUCHAR(eptr);
4495           if (cc < 128 && (md->ctypes[cc] & ctype_word) != 0)
4496             RRETURN(MATCH_NOMATCH);
4497           eptr++;
4498           ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4499           }
4500         break;
4501
4502         case OP_WORDCHAR:
4503         for (i = 1; i <= min; i++)
4504           {
4505           pcre_uchar cc;
4506
4507           if (eptr >= md->end_subject)
4508             {
4509             SCHECK_PARTIAL();
4510             RRETURN(MATCH_NOMATCH);
4511             }
4512           cc = RAWUCHAR(eptr);
4513           if (cc >= 128 || (md->ctypes[cc] & ctype_word) == 0)
4514             RRETURN(MATCH_NOMATCH);
4515           eptr++;
4516           /* No need to skip more bytes - we know it's a 1-byte character */
4517           }
4518         break;
4519
4520         default:
4521         RRETURN(PCRE_ERROR_INTERNAL);
4522         }  /* End switch(ctype) */
4523
4524       else
4525 #endif     /* SUPPORT_UTF */
4526
4527       /* Code for the non-UTF-8 case for minimum matching of operators other
4528       than OP_PROP and OP_NOTPROP. */
4529
4530       switch(ctype)
4531         {
4532         case OP_ANY:
4533         for (i = 1; i <= min; i++)
4534           {
4535           if (eptr >= md->end_subject)
4536             {
4537             SCHECK_PARTIAL();
4538             RRETURN(MATCH_NOMATCH);
4539             }
4540           if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
4541           if (md->partial != 0 &&
4542               eptr + 1 >= md->end_subject &&
4543               NLBLOCK->nltype == NLTYPE_FIXED &&
4544               NLBLOCK->nllen == 2 &&
4545               *eptr == NLBLOCK->nl[0])
4546             {
4547             md->hitend = TRUE;
4548             if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
4549             }
4550           eptr++;
4551           }
4552         break;
4553
4554         case OP_ALLANY:
4555         if (eptr > md->end_subject - min)
4556           {
4557           SCHECK_PARTIAL();
4558           RRETURN(MATCH_NOMATCH);
4559           }
4560         eptr += min;
4561         break;
4562
4563         case OP_ANYBYTE:
4564         if (eptr > md->end_subject - min)
4565           {
4566           SCHECK_PARTIAL();
4567           RRETURN(MATCH_NOMATCH);
4568           }
4569         eptr += min;
4570         break;
4571
4572         case OP_ANYNL:
4573         for (i = 1; i <= min; i++)
4574           {
4575           if (eptr >= md->end_subject)
4576             {
4577             SCHECK_PARTIAL();
4578             RRETURN(MATCH_NOMATCH);
4579             }
4580           switch(*eptr++)
4581             {
4582             default: RRETURN(MATCH_NOMATCH);
4583
4584             case CHAR_CR:
4585             if (eptr < md->end_subject && *eptr == CHAR_LF) eptr++;
4586             break;
4587
4588             case CHAR_LF:
4589             break;
4590
4591             case CHAR_VT:
4592             case CHAR_FF:
4593             case CHAR_NEL:
4594 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4595             case 0x2028:
4596             case 0x2029:
4597 #endif
4598             if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
4599             break;
4600             }
4601           }
4602         break;
4603
4604         case OP_NOT_HSPACE:
4605         for (i = 1; i <= min; i++)
4606           {
4607           if (eptr >= md->end_subject)
4608             {
4609             SCHECK_PARTIAL();
4610             RRETURN(MATCH_NOMATCH);
4611             }
4612           switch(*eptr++)
4613             {
4614             default: break;
4615             HSPACE_BYTE_CASES:
4616 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4617             HSPACE_MULTIBYTE_CASES:
4618 #endif
4619             RRETURN(MATCH_NOMATCH);
4620             }
4621           }
4622         break;
4623
4624         case OP_HSPACE:
4625         for (i = 1; i <= min; i++)
4626           {
4627           if (eptr >= md->end_subject)
4628             {
4629             SCHECK_PARTIAL();
4630             RRETURN(MATCH_NOMATCH);
4631             }
4632           switch(*eptr++)
4633             {
4634             default: RRETURN(MATCH_NOMATCH);
4635             HSPACE_BYTE_CASES:
4636 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4637             HSPACE_MULTIBYTE_CASES:
4638 #endif
4639             break;
4640             }
4641           }
4642         break;
4643
4644         case OP_NOT_VSPACE:
4645         for (i = 1; i <= min; i++)
4646           {
4647           if (eptr >= md->end_subject)
4648             {
4649             SCHECK_PARTIAL();
4650             RRETURN(MATCH_NOMATCH);
4651             }
4652           switch(*eptr++)
4653             {
4654             VSPACE_BYTE_CASES:
4655 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4656             VSPACE_MULTIBYTE_CASES:
4657 #endif
4658             RRETURN(MATCH_NOMATCH);
4659             default: break;
4660             }
4661           }
4662         break;
4663
4664         case OP_VSPACE:
4665         for (i = 1; i <= min; i++)
4666           {
4667           if (eptr >= md->end_subject)
4668             {
4669             SCHECK_PARTIAL();
4670             RRETURN(MATCH_NOMATCH);
4671             }
4672           switch(*eptr++)
4673             {
4674             default: RRETURN(MATCH_NOMATCH);
4675             VSPACE_BYTE_CASES:
4676 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4677             VSPACE_MULTIBYTE_CASES:
4678 #endif
4679             break;
4680             }
4681           }
4682         break;
4683
4684         case OP_NOT_DIGIT:
4685         for (i = 1; i <= min; i++)
4686           {
4687           if (eptr >= md->end_subject)
4688             {
4689             SCHECK_PARTIAL();
4690             RRETURN(MATCH_NOMATCH);
4691             }
4692           if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_digit) != 0)
4693             RRETURN(MATCH_NOMATCH);
4694           eptr++;
4695           }
4696         break;
4697
4698         case OP_DIGIT:
4699         for (i = 1; i <= min; i++)
4700           {
4701           if (eptr >= md->end_subject)
4702             {
4703             SCHECK_PARTIAL();
4704             RRETURN(MATCH_NOMATCH);
4705             }
4706           if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_digit) == 0)
4707             RRETURN(MATCH_NOMATCH);
4708           eptr++;
4709           }
4710         break;
4711
4712         case OP_NOT_WHITESPACE:
4713         for (i = 1; i <= min; i++)
4714           {
4715           if (eptr >= md->end_subject)
4716             {
4717             SCHECK_PARTIAL();
4718             RRETURN(MATCH_NOMATCH);
4719             }
4720           if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_space) != 0)
4721             RRETURN(MATCH_NOMATCH);
4722           eptr++;
4723           }
4724         break;
4725
4726         case OP_WHITESPACE:
4727         for (i = 1; i <= min; i++)
4728           {
4729           if (eptr >= md->end_subject)
4730             {
4731             SCHECK_PARTIAL();
4732             RRETURN(MATCH_NOMATCH);
4733             }
4734           if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_space) == 0)
4735             RRETURN(MATCH_NOMATCH);
4736           eptr++;
4737           }
4738         break;
4739
4740         case OP_NOT_WORDCHAR:
4741         for (i = 1; i <= min; i++)
4742           {
4743           if (eptr >= md->end_subject)
4744             {
4745             SCHECK_PARTIAL();
4746             RRETURN(MATCH_NOMATCH);
4747             }
4748           if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_word) != 0)
4749             RRETURN(MATCH_NOMATCH);
4750           eptr++;
4751           }
4752         break;
4753
4754         case OP_WORDCHAR:
4755         for (i = 1; i <= min; i++)
4756           {
4757           if (eptr >= md->end_subject)
4758             {
4759             SCHECK_PARTIAL();
4760             RRETURN(MATCH_NOMATCH);
4761             }
4762           if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_word) == 0)
4763             RRETURN(MATCH_NOMATCH);
4764           eptr++;
4765           }
4766         break;
4767
4768         default:
4769         RRETURN(PCRE_ERROR_INTERNAL);
4770         }
4771       }
4772
4773     /* If min = max, continue at the same level without recursing */
4774
4775     if (min == max) continue;
4776
4777     /* If minimizing, we have to test the rest of the pattern before each
4778     subsequent match. Again, separate the UTF-8 case for speed, and also
4779     separate the UCP cases. */
4780
4781     if (minimize)
4782       {
4783 #ifdef SUPPORT_UCP
4784       if (prop_type >= 0)
4785         {
4786         switch(prop_type)
4787           {
4788           case PT_ANY:
4789           for (fi = min;; fi++)
4790             {
4791             RMATCH(eptr, ecode, offset_top, md, eptrb, RM36);
4792             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4793             if (fi >= max) RRETURN(MATCH_NOMATCH);
4794             if (eptr >= md->end_subject)
4795               {
4796               SCHECK_PARTIAL();
4797               RRETURN(MATCH_NOMATCH);
4798               }
4799             GETCHARINCTEST(c, eptr);
4800             if (prop_fail_result) RRETURN(MATCH_NOMATCH);
4801             }
4802           /* Control never gets here */
4803
4804           case PT_LAMP:
4805           for (fi = min;; fi++)
4806             {
4807             int chartype;
4808             RMATCH(eptr, ecode, offset_top, md, eptrb, RM37);
4809             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4810             if (fi >= max) RRETURN(MATCH_NOMATCH);
4811             if (eptr >= md->end_subject)
4812               {
4813               SCHECK_PARTIAL();
4814               RRETURN(MATCH_NOMATCH);
4815               }
4816             GETCHARINCTEST(c, eptr);
4817             chartype = UCD_CHARTYPE(c);
4818             if ((chartype == ucp_Lu ||
4819                  chartype == ucp_Ll ||
4820                  chartype == ucp_Lt) == prop_fail_result)
4821               RRETURN(MATCH_NOMATCH);
4822             }
4823           /* Control never gets here */
4824
4825           case PT_GC:
4826           for (fi = min;; fi++)
4827             {
4828             RMATCH(eptr, ecode, offset_top, md, eptrb, RM38);
4829             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4830             if (fi >= max) RRETURN(MATCH_NOMATCH);
4831             if (eptr >= md->end_subject)
4832               {
4833               SCHECK_PARTIAL();
4834               RRETURN(MATCH_NOMATCH);
4835               }
4836             GETCHARINCTEST(c, eptr);
4837             if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)
4838               RRETURN(MATCH_NOMATCH);
4839             }
4840           /* Control never gets here */
4841
4842           case PT_PC:
4843           for (fi = min;; fi++)
4844             {
4845             RMATCH(eptr, ecode, offset_top, md, eptrb, RM39);
4846             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4847             if (fi >= max) RRETURN(MATCH_NOMATCH);
4848             if (eptr >= md->end_subject)
4849               {
4850               SCHECK_PARTIAL();
4851               RRETURN(MATCH_NOMATCH);
4852               }
4853             GETCHARINCTEST(c, eptr);
4854             if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)
4855               RRETURN(MATCH_NOMATCH);
4856             }
4857           /* Control never gets here */
4858
4859           case PT_SC:
4860           for (fi = min;; fi++)
4861             {
4862             RMATCH(eptr, ecode, offset_top, md, eptrb, RM40);
4863             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4864             if (fi >= max) RRETURN(MATCH_NOMATCH);
4865             if (eptr >= md->end_subject)
4866               {
4867               SCHECK_PARTIAL();
4868               RRETURN(MATCH_NOMATCH);
4869               }
4870             GETCHARINCTEST(c, eptr);
4871             if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)
4872               RRETURN(MATCH_NOMATCH);
4873             }
4874           /* Control never gets here */
4875
4876           case PT_ALNUM:
4877           for (fi = min;; fi++)
4878             {
4879             int category;
4880             RMATCH(eptr, ecode, offset_top, md, eptrb, RM59);
4881             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4882             if (fi >= max) RRETURN(MATCH_NOMATCH);
4883             if (eptr >= md->end_subject)
4884               {
4885               SCHECK_PARTIAL();
4886               RRETURN(MATCH_NOMATCH);
4887               }
4888             GETCHARINCTEST(c, eptr);
4889             category = UCD_CATEGORY(c);
4890             if ((category == ucp_L || category == ucp_N) == prop_fail_result)
4891               RRETURN(MATCH_NOMATCH);
4892             }
4893           /* Control never gets here */
4894
4895           case PT_SPACE:    /* Perl space */
4896           for (fi = min;; fi++)
4897             {
4898             RMATCH(eptr, ecode, offset_top, md, eptrb, RM60);
4899             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4900             if (fi >= max) RRETURN(MATCH_NOMATCH);
4901             if (eptr >= md->end_subject)
4902               {
4903               SCHECK_PARTIAL();
4904               RRETURN(MATCH_NOMATCH);
4905               }
4906             GETCHARINCTEST(c, eptr);
4907             if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4908                  c == CHAR_FF || c == CHAR_CR)
4909                    == prop_fail_result)
4910               RRETURN(MATCH_NOMATCH);
4911             }
4912           /* Control never gets here */
4913
4914           case PT_PXSPACE:  /* POSIX space */
4915           for (fi = min;; fi++)
4916             {
4917             RMATCH(eptr, ecode, offset_top, md, eptrb, RM61);
4918             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4919             if (fi >= max) RRETURN(MATCH_NOMATCH);
4920             if (eptr >= md->end_subject)
4921               {
4922               SCHECK_PARTIAL();
4923               RRETURN(MATCH_NOMATCH);
4924               }
4925             GETCHARINCTEST(c, eptr);
4926             if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4927                  c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
4928                    == prop_fail_result)
4929               RRETURN(MATCH_NOMATCH);
4930             }
4931           /* Control never gets here */
4932
4933           case PT_WORD:
4934           for (fi = min;; fi++)
4935             {
4936             int category;
4937             RMATCH(eptr, ecode, offset_top, md, eptrb, RM62);
4938             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4939             if (fi >= max) RRETURN(MATCH_NOMATCH);
4940             if (eptr >= md->end_subject)
4941               {
4942               SCHECK_PARTIAL();
4943               RRETURN(MATCH_NOMATCH);
4944               }
4945             GETCHARINCTEST(c, eptr);
4946             category = UCD_CATEGORY(c);
4947             if ((category == ucp_L ||
4948                  category == ucp_N ||
4949                  c == CHAR_UNDERSCORE)
4950                    == prop_fail_result)
4951               RRETURN(MATCH_NOMATCH);
4952             }
4953           /* Control never gets here */
4954
4955           case PT_CLIST:
4956           for (fi = min;; fi++)
4957             {
4958             const pcre_uint32 *cp;
4959             RMATCH(eptr, ecode, offset_top, md, eptrb, RM67);
4960             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4961             if (fi >= max) RRETURN(MATCH_NOMATCH);
4962             if (eptr >= md->end_subject)
4963               {
4964               SCHECK_PARTIAL();
4965               RRETURN(MATCH_NOMATCH);
4966               }
4967             GETCHARINCTEST(c, eptr);
4968             cp = PRIV(ucd_caseless_sets) + prop_value;
4969             for (;;)
4970               {
4971               if (c < *cp)
4972                 { if (prop_fail_result) break; else { RRETURN(MATCH_NOMATCH); } }
4973               if (c == *cp++)
4974                 { if (prop_fail_result) { RRETURN(MATCH_NOMATCH); } else break; }
4975               }
4976             }
4977           /* Control never gets here */
4978
4979           /* This should never occur */
4980           default:
4981           RRETURN(PCRE_ERROR_INTERNAL);
4982           }
4983         }
4984
4985       /* Match extended Unicode sequences. We will get here only if the
4986       support is in the binary; otherwise a compile-time error occurs. */
4987
4988       else if (ctype == OP_EXTUNI)
4989         {
4990         for (fi = min;; fi++)
4991           {
4992           RMATCH(eptr, ecode, offset_top, md, eptrb, RM41);
4993           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4994           if (fi >= max) RRETURN(MATCH_NOMATCH);
4995           if (eptr >= md->end_subject)
4996             {
4997             SCHECK_PARTIAL();
4998             RRETURN(MATCH_NOMATCH);
4999             }
5000           else
5001             {
5002             int lgb, rgb;
5003             GETCHARINCTEST(c, eptr);
5004             lgb = UCD_GRAPHBREAK(c);
5005             while (eptr < md->end_subject)
5006               {
5007               int len = 1;
5008               if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
5009               rgb = UCD_GRAPHBREAK(c);
5010               if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
5011               lgb = rgb;
5012               eptr += len;
5013               }
5014             }
5015           CHECK_PARTIAL();
5016           }
5017         }
5018       else
5019 #endif     /* SUPPORT_UCP */
5020
5021 #ifdef SUPPORT_UTF
5022       if (utf)
5023         {
5024         for (fi = min;; fi++)
5025           {
5026           RMATCH(eptr, ecode, offset_top, md, eptrb, RM42);
5027           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5028           if (fi >= max) RRETURN(MATCH_NOMATCH);
5029           if (eptr >= md->end_subject)
5030             {
5031             SCHECK_PARTIAL();
5032             RRETURN(MATCH_NOMATCH);
5033             }
5034           if (ctype == OP_ANY && IS_NEWLINE(eptr))
5035             RRETURN(MATCH_NOMATCH);
5036           GETCHARINC(c, eptr);
5037           switch(ctype)
5038             {
5039             case OP_ANY:               /* This is the non-NL case */
5040             if (md->partial != 0 &&    /* Take care with CRLF partial */
5041                 eptr >= md->end_subject &&
5042                 NLBLOCK->nltype == NLTYPE_FIXED &&
5043                 NLBLOCK->nllen == 2 &&
5044                 c == NLBLOCK->nl[0])
5045               {
5046               md->hitend = TRUE;
5047               if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5048               }
5049             break;
5050
5051             case OP_ALLANY:
5052             case OP_ANYBYTE:
5053             break;
5054
5055             case OP_ANYNL:
5056             switch(c)
5057               {
5058               default: RRETURN(MATCH_NOMATCH);
5059               case CHAR_CR:
5060               if (eptr < md->end_subject && RAWUCHAR(eptr) == CHAR_LF) eptr++;
5061               break;
5062
5063               case CHAR_LF:
5064               break;
5065
5066               case CHAR_VT:
5067               case CHAR_FF:
5068               case CHAR_NEL:
5069 #ifndef EBCDIC
5070               case 0x2028:
5071               case 0x2029:
5072 #endif  /* Not EBCDIC */
5073               if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
5074               break;
5075               }
5076             break;
5077
5078             case OP_NOT_HSPACE:
5079             switch(c)
5080               {
5081               HSPACE_CASES: RRETURN(MATCH_NOMATCH);
5082               default: break;
5083               }
5084             break;
5085
5086             case OP_HSPACE:
5087             switch(c)
5088               {
5089               HSPACE_CASES: break;
5090               default: RRETURN(MATCH_NOMATCH);
5091               }
5092             break;
5093
5094             case OP_NOT_VSPACE:
5095             switch(c)
5096               {
5097               VSPACE_CASES: RRETURN(MATCH_NOMATCH);
5098               default: break;
5099               }
5100             break;
5101
5102             case OP_VSPACE:
5103             switch(c)
5104               {
5105               VSPACE_CASES: break;
5106               default: RRETURN(MATCH_NOMATCH);
5107               }
5108             break;
5109
5110             case OP_NOT_DIGIT:
5111             if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
5112               RRETURN(MATCH_NOMATCH);
5113             break;
5114
5115             case OP_DIGIT:
5116             if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0)
5117               RRETURN(MATCH_NOMATCH);
5118             break;
5119
5120             case OP_NOT_WHITESPACE:
5121             if (c < 256 && (md->ctypes[c] & ctype_space) != 0)
5122               RRETURN(MATCH_NOMATCH);
5123             break;
5124
5125             case OP_WHITESPACE:
5126             if (c >= 256 || (md->ctypes[c] & ctype_space) == 0)
5127               RRETURN(MATCH_NOMATCH);
5128             break;
5129
5130             case OP_NOT_WORDCHAR:
5131             if (c < 256 && (md->ctypes[c] & ctype_word) != 0)
5132               RRETURN(MATCH_NOMATCH);
5133             break;
5134
5135             case OP_WORDCHAR:
5136             if (c >= 256 || (md->ctypes[c] & ctype_word) == 0)
5137               RRETURN(MATCH_NOMATCH);
5138             break;
5139
5140             default:
5141             RRETURN(PCRE_ERROR_INTERNAL);
5142             }
5143           }
5144         }
5145       else
5146 #endif
5147       /* Not UTF mode */
5148         {
5149         for (fi = min;; fi++)
5150           {
5151           RMATCH(eptr, ecode, offset_top, md, eptrb, RM43);
5152           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5153           if (fi >= max) RRETURN(MATCH_NOMATCH);
5154           if (eptr >= md->end_subject)
5155             {
5156             SCHECK_PARTIAL();
5157             RRETURN(MATCH_NOMATCH);
5158             }
5159           if (ctype == OP_ANY && IS_NEWLINE(eptr))
5160             RRETURN(MATCH_NOMATCH);
5161           c = *eptr++;
5162           switch(ctype)
5163             {
5164             case OP_ANY:               /* This is the non-NL case */
5165             if (md->partial != 0 &&    /* Take care with CRLF partial */
5166                 eptr >= md->end_subject &&
5167                 NLBLOCK->nltype == NLTYPE_FIXED &&
5168                 NLBLOCK->nllen == 2 &&
5169                 c == NLBLOCK->nl[0])
5170               {
5171               md->hitend = TRUE;
5172               if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5173               }
5174             break;
5175
5176             case OP_ALLANY:
5177             case OP_ANYBYTE:
5178             break;
5179
5180             case OP_ANYNL:
5181             switch(c)
5182               {
5183               default: RRETURN(MATCH_NOMATCH);
5184               case CHAR_CR:
5185               if (eptr < md->end_subject && *eptr == CHAR_LF) eptr++;
5186               break;
5187
5188               case CHAR_LF:
5189               break;
5190
5191               case CHAR_VT:
5192               case CHAR_FF:
5193               case CHAR_NEL:
5194 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5195               case 0x2028:
5196               case 0x2029:
5197 #endif
5198               if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
5199               break;
5200               }
5201             break;
5202
5203             case OP_NOT_HSPACE:
5204             switch(c)
5205               {
5206               default: break;
5207               HSPACE_BYTE_CASES:
5208 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5209               HSPACE_MULTIBYTE_CASES:
5210 #endif
5211               RRETURN(MATCH_NOMATCH);
5212               }
5213             break;
5214
5215             case OP_HSPACE:
5216             switch(c)
5217               {
5218               default: RRETURN(MATCH_NOMATCH);
5219               HSPACE_BYTE_CASES:
5220 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5221               HSPACE_MULTIBYTE_CASES:
5222 #endif
5223               break;
5224               }
5225             break;
5226
5227             case OP_NOT_VSPACE:
5228             switch(c)
5229               {
5230               default: break;
5231               VSPACE_BYTE_CASES:
5232 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5233               VSPACE_MULTIBYTE_CASES:
5234 #endif
5235               RRETURN(MATCH_NOMATCH);
5236               }
5237             break;
5238
5239             case OP_VSPACE:
5240             switch(c)
5241               {
5242               default: RRETURN(MATCH_NOMATCH);
5243               VSPACE_BYTE_CASES:
5244 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5245               VSPACE_MULTIBYTE_CASES:
5246 #endif
5247               break;
5248               }
5249             break;
5250
5251             case OP_NOT_DIGIT:
5252             if (MAX_255(c) && (md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
5253             break;
5254
5255             case OP_DIGIT:
5256             if (!MAX_255(c) || (md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
5257             break;
5258
5259             case OP_NOT_WHITESPACE:
5260             if (MAX_255(c) && (md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
5261             break;
5262
5263             case OP_WHITESPACE:
5264             if (!MAX_255(c) || (md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
5265             break;
5266
5267             case OP_NOT_WORDCHAR:
5268             if (MAX_255(c) && (md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH);
5269             break;
5270
5271             case OP_WORDCHAR:
5272             if (!MAX_255(c) || (md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH);
5273             break;
5274
5275             default:
5276             RRETURN(PCRE_ERROR_INTERNAL);
5277             }
5278           }
5279         }
5280       /* Control never gets here */
5281       }
5282
5283     /* If maximizing, it is worth using inline code for speed, doing the type
5284     test once at the start (i.e. keep it out of the loop). Again, keep the
5285     UTF-8 and UCP stuff separate. */
5286
5287     else
5288       {
5289       pp = eptr;  /* Remember where we started */
5290
5291 #ifdef SUPPORT_UCP
5292       if (prop_type >= 0)
5293         {
5294         switch(prop_type)
5295           {
5296           case PT_ANY:
5297           for (i = min; i < max; i++)
5298             {
5299             int len = 1;
5300             if (eptr >= md->end_subject)
5301               {
5302               SCHECK_PARTIAL();
5303               break;
5304               }
5305             GETCHARLENTEST(c, eptr, len);
5306             if (prop_fail_result) break;
5307             eptr+= len;
5308             }
5309           break;
5310
5311           case PT_LAMP:
5312           for (i = min; i < max; i++)
5313             {
5314             int chartype;
5315             int len = 1;
5316             if (eptr >= md->end_subject)
5317               {
5318               SCHECK_PARTIAL();
5319               break;
5320               }
5321             GETCHARLENTEST(c, eptr, len);
5322             chartype = UCD_CHARTYPE(c);
5323             if ((chartype == ucp_Lu ||
5324                  chartype == ucp_Ll ||
5325                  chartype == ucp_Lt) == prop_fail_result)
5326               break;
5327             eptr+= len;
5328             }
5329           break;
5330
5331           case PT_GC:
5332           for (i = min; i < max; i++)
5333             {
5334             int len = 1;
5335             if (eptr >= md->end_subject)
5336               {
5337               SCHECK_PARTIAL();
5338               break;
5339               }
5340             GETCHARLENTEST(c, eptr, len);
5341             if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result) break;
5342             eptr+= len;
5343             }
5344           break;
5345
5346           case PT_PC:
5347           for (i = min; i < max; i++)
5348             {
5349             int len = 1;
5350             if (eptr >= md->end_subject)
5351               {
5352               SCHECK_PARTIAL();
5353               break;
5354               }
5355             GETCHARLENTEST(c, eptr, len);
5356             if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result) break;
5357             eptr+= len;
5358             }
5359           break;
5360
5361           case PT_SC:
5362           for (i = min; i < max; i++)
5363             {
5364             int len = 1;
5365             if (eptr >= md->end_subject)
5366               {
5367               SCHECK_PARTIAL();
5368               break;
5369               }
5370             GETCHARLENTEST(c, eptr, len);
5371             if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result) break;
5372             eptr+= len;
5373             }
5374           break;
5375
5376           case PT_ALNUM:
5377           for (i = min; i < max; i++)
5378             {
5379             int category;
5380             int len = 1;
5381             if (eptr >= md->end_subject)
5382               {
5383               SCHECK_PARTIAL();
5384               break;
5385               }
5386             GETCHARLENTEST(c, eptr, len);
5387             category = UCD_CATEGORY(c);
5388             if ((category == ucp_L || category == ucp_N) == prop_fail_result)
5389               break;
5390             eptr+= len;
5391             }
5392           break;
5393
5394           case PT_SPACE:    /* Perl space */
5395           for (i = min; i < max; i++)
5396             {
5397             int len = 1;
5398             if (eptr >= md->end_subject)
5399               {
5400               SCHECK_PARTIAL();
5401               break;
5402               }
5403             GETCHARLENTEST(c, eptr, len);
5404             if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
5405                  c == CHAR_FF || c == CHAR_CR)
5406                  == prop_fail_result)
5407               break;
5408             eptr+= len;
5409             }
5410           break;
5411
5412           case PT_PXSPACE:  /* POSIX space */
5413           for (i = min; i < max; i++)
5414             {
5415             int len = 1;
5416             if (eptr >= md->end_subject)
5417               {
5418               SCHECK_PARTIAL();
5419               break;
5420               }
5421             GETCHARLENTEST(c, eptr, len);
5422             if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
5423                  c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
5424                  == prop_fail_result)
5425               break;
5426             eptr+= len;
5427             }
5428           break;
5429
5430           case PT_WORD:
5431           for (i = min; i < max; i++)
5432             {
5433             int category;
5434             int len = 1;
5435             if (eptr >= md->end_subject)
5436               {
5437               SCHECK_PARTIAL();
5438               break;
5439               }
5440             GETCHARLENTEST(c, eptr, len);
5441             category = UCD_CATEGORY(c);
5442             if ((category == ucp_L || category == ucp_N ||
5443                  c == CHAR_UNDERSCORE) == prop_fail_result)
5444               break;
5445             eptr+= len;
5446             }
5447           break;
5448
5449           case PT_CLIST:
5450           for (i = min; i < max; i++)
5451             {
5452             const pcre_uint32 *cp;
5453             int len = 1;
5454             if (eptr >= md->end_subject)
5455               {
5456               SCHECK_PARTIAL();
5457               break;
5458               }
5459             GETCHARLENTEST(c, eptr, len);
5460             cp = PRIV(ucd_caseless_sets) + prop_value;
5461             for (;;)
5462               {
5463               if (c < *cp)
5464                 { if (prop_fail_result) break; else goto GOT_MAX; }
5465               if (c == *cp++)
5466                 { if (prop_fail_result) goto GOT_MAX; else break; }
5467               }
5468             eptr += len;
5469             }
5470           GOT_MAX:
5471           break;
5472
5473           default:
5474           RRETURN(PCRE_ERROR_INTERNAL);
5475           }
5476
5477         /* eptr is now past the end of the maximum run */
5478
5479         if (possessive) continue;
5480         for(;;)
5481           {
5482           RMATCH(eptr, ecode, offset_top, md, eptrb, RM44);
5483           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5484           if (eptr-- == pp) break;        /* Stop if tried at original pos */
5485           if (utf) BACKCHAR(eptr);
5486           }
5487         }
5488
5489       /* Match extended Unicode sequences. We will get here only if the
5490       support is in the binary; otherwise a compile-time error occurs. */
5491
5492       else if (ctype == OP_EXTUNI)
5493         {
5494         for (i = min; i < max; i++)
5495           {
5496           if (eptr >= md->end_subject)
5497             {
5498             SCHECK_PARTIAL();
5499             break;
5500             }
5501           else
5502             {
5503             int lgb, rgb;
5504             GETCHARINCTEST(c, eptr);
5505             lgb = UCD_GRAPHBREAK(c);
5506             while (eptr < md->end_subject)
5507               {
5508               int len = 1;
5509               if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
5510               rgb = UCD_GRAPHBREAK(c);
5511               if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
5512               lgb = rgb;
5513               eptr += len;
5514               }
5515             }
5516           CHECK_PARTIAL();
5517           }
5518
5519         /* eptr is now past the end of the maximum run */
5520
5521         if (possessive) continue;
5522
5523         for(;;)
5524           {
5525           RMATCH(eptr, ecode, offset_top, md, eptrb, RM45);
5526           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5527           if (eptr-- == pp) break;        /* Stop if tried at original pos */
5528           for (;;)                        /* Move back over one extended */
5529             {
5530             if (!utf) c = *eptr; else
5531               {
5532               BACKCHAR(eptr);
5533               GETCHAR(c, eptr);
5534               }
5535             if (UCD_CATEGORY(c) != ucp_M) break;
5536             eptr--;
5537             }
5538           }
5539         }
5540
5541       else
5542 #endif   /* SUPPORT_UCP */
5543
5544 #ifdef SUPPORT_UTF
5545       if (utf)
5546         {
5547         switch(ctype)
5548           {
5549           case OP_ANY:
5550           if (max < INT_MAX)
5551             {
5552             for (i = min; i < max; i++)
5553               {
5554               if (eptr >= md->end_subject)
5555                 {
5556                 SCHECK_PARTIAL();
5557                 break;
5558                 }
5559               if (IS_NEWLINE(eptr)) break;
5560               if (md->partial != 0 &&    /* Take care with CRLF partial */
5561                   eptr + 1 >= md->end_subject &&
5562                   NLBLOCK->nltype == NLTYPE_FIXED &&
5563                   NLBLOCK->nllen == 2 &&
5564                   RAWUCHAR(eptr) == NLBLOCK->nl[0])
5565                 {
5566                 md->hitend = TRUE;
5567                 if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5568                 }
5569               eptr++;
5570               ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
5571               }
5572             }
5573
5574           /* Handle unlimited UTF-8 repeat */
5575
5576           else
5577             {
5578             for (i = min; i < max; i++)
5579               {
5580               if (eptr >= md->end_subject)
5581                 {
5582                 SCHECK_PARTIAL();
5583                 break;
5584                 }
5585               if (IS_NEWLINE(eptr)) break;
5586               if (md->partial != 0 &&    /* Take care with CRLF partial */
5587                   eptr + 1 >= md->end_subject &&
5588                   NLBLOCK->nltype == NLTYPE_FIXED &&
5589                   NLBLOCK->nllen == 2 &&
5590                   RAWUCHAR(eptr) == NLBLOCK->nl[0])
5591                 {
5592                 md->hitend = TRUE;
5593                 if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5594                 }
5595               eptr++;
5596               ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
5597               }
5598             }
5599           break;
5600
5601           case OP_ALLANY:
5602           if (max < INT_MAX)
5603             {
5604             for (i = min; i < max; i++)
5605               {
5606               if (eptr >= md->end_subject)
5607                 {
5608                 SCHECK_PARTIAL();
5609                 break;
5610                 }
5611               eptr++;
5612               ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
5613               }
5614             }
5615           else
5616             {
5617             eptr = md->end_subject;   /* Unlimited UTF-8 repeat */
5618             SCHECK_PARTIAL();
5619             }
5620           break;
5621
5622           /* The byte case is the same as non-UTF8 */
5623
5624           case OP_ANYBYTE:
5625           c = max - min;
5626           if (c > (unsigned int)(md->end_subject - eptr))
5627             {
5628             eptr = md->end_subject;
5629             SCHECK_PARTIAL();
5630             }
5631           else eptr += c;
5632           break;
5633
5634           case OP_ANYNL:
5635           for (i = min; i < max; i++)
5636             {
5637             int len = 1;
5638             if (eptr >= md->end_subject)
5639               {
5640               SCHECK_PARTIAL();
5641               break;
5642               }
5643             GETCHARLEN(c, eptr, len);
5644             if (c == CHAR_CR)
5645               {
5646               if (++eptr >= md->end_subject) break;
5647               if (RAWUCHAR(eptr) == CHAR_LF) eptr++;
5648               }
5649             else
5650               {
5651               if (c != CHAR_LF &&
5652                   (md->bsr_anycrlf ||
5653                    (c != CHAR_VT && c != CHAR_FF && c != CHAR_NEL
5654 #ifndef EBCDIC
5655                     && c != 0x2028 && c != 0x2029
5656 #endif  /* Not EBCDIC */
5657                     )))
5658                 break;
5659               eptr += len;
5660               }
5661             }
5662           break;
5663
5664           case OP_NOT_HSPACE:
5665           case OP_HSPACE:
5666           for (i = min; i < max; i++)
5667             {
5668             BOOL gotspace;
5669             int len = 1;
5670             if (eptr >= md->end_subject)
5671               {
5672               SCHECK_PARTIAL();
5673               break;
5674               }
5675             GETCHARLEN(c, eptr, len);
5676             switch(c)
5677               {
5678               HSPACE_CASES: gotspace = TRUE; break;
5679               default: gotspace = FALSE; break;
5680               }
5681             if (gotspace == (ctype == OP_NOT_HSPACE)) break;
5682             eptr += len;
5683             }
5684           break;
5685
5686           case OP_NOT_VSPACE:
5687           case OP_VSPACE:
5688           for (i = min; i < max; i++)
5689             {
5690             BOOL gotspace;
5691             int len = 1;
5692             if (eptr >= md->end_subject)
5693               {
5694               SCHECK_PARTIAL();
5695               break;
5696               }
5697             GETCHARLEN(c, eptr, len);
5698             switch(c)
5699               {
5700               VSPACE_CASES: gotspace = TRUE; break;
5701               default: gotspace = FALSE; break;
5702               }
5703             if (gotspace == (ctype == OP_NOT_VSPACE)) break;
5704             eptr += len;
5705             }
5706           break;
5707
5708           case OP_NOT_DIGIT:
5709           for (i = min; i < max; i++)
5710             {
5711             int len = 1;
5712             if (eptr >= md->end_subject)
5713               {
5714               SCHECK_PARTIAL();
5715               break;
5716               }
5717             GETCHARLEN(c, eptr, len);
5718             if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;
5719             eptr+= len;
5720             }
5721           break;
5722
5723           case OP_DIGIT:
5724           for (i = min; i < max; i++)
5725             {
5726             int len = 1;
5727             if (eptr >= md->end_subject)
5728               {
5729               SCHECK_PARTIAL();
5730               break;
5731               }
5732             GETCHARLEN(c, eptr, len);
5733             if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break;
5734             eptr+= len;
5735             }
5736           break;
5737
5738           case OP_NOT_WHITESPACE:
5739           for (i = min; i < max; i++)
5740             {
5741             int len = 1;
5742             if (eptr >= md->end_subject)
5743               {
5744               SCHECK_PARTIAL();
5745               break;
5746               }
5747             GETCHARLEN(c, eptr, len);
5748             if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;
5749             eptr+= len;
5750             }
5751           break;
5752
5753           case OP_WHITESPACE:
5754           for (i = min; i < max; i++)
5755             {
5756             int len = 1;
5757             if (eptr >= md->end_subject)
5758               {
5759               SCHECK_PARTIAL();
5760               break;
5761               }
5762             GETCHARLEN(c, eptr, len);
5763             if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break;
5764             eptr+= len;
5765             }
5766           break;
5767
5768           case OP_NOT_WORDCHAR:
5769           for (i = min; i < max; i++)
5770             {
5771             int len = 1;
5772             if (eptr >= md->end_subject)
5773               {
5774               SCHECK_PARTIAL();
5775               break;
5776               }
5777             GETCHARLEN(c, eptr, len);
5778             if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;
5779             eptr+= len;
5780             }
5781           break;
5782
5783           case OP_WORDCHAR:
5784           for (i = min; i < max; i++)
5785             {
5786             int len = 1;
5787             if (eptr >= md->end_subject)
5788               {
5789               SCHECK_PARTIAL();
5790               break;
5791               }
5792             GETCHARLEN(c, eptr, len);
5793             if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break;
5794             eptr+= len;
5795             }
5796           break;
5797
5798           default:
5799           RRETURN(PCRE_ERROR_INTERNAL);
5800           }
5801
5802         /* eptr is now past the end of the maximum run. If possessive, we are
5803         done (no backing up). Otherwise, match at this position; anything other
5804         than no match is immediately returned. For nomatch, back up one
5805         character, unless we are matching \R and the last thing matched was
5806         \r\n, in which case, back up two bytes. */
5807
5808         if (possessive) continue;
5809         for(;;)
5810           {
5811           RMATCH(eptr, ecode, offset_top, md, eptrb, RM46);
5812           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5813           if (eptr-- == pp) break;        /* Stop if tried at original pos */
5814           BACKCHAR(eptr);
5815           if (ctype == OP_ANYNL && eptr > pp  && RAWUCHAR(eptr) == CHAR_NL &&
5816               RAWUCHAR(eptr - 1) == CHAR_CR) eptr--;
5817           }
5818         }
5819       else
5820 #endif  /* SUPPORT_UTF */
5821       /* Not UTF mode */
5822         {
5823         switch(ctype)
5824           {
5825           case OP_ANY:
5826           for (i = min; i < max; i++)
5827             {
5828             if (eptr >= md->end_subject)
5829               {
5830               SCHECK_PARTIAL();
5831               break;
5832               }
5833             if (IS_NEWLINE(eptr)) break;
5834             if (md->partial != 0 &&    /* Take care with CRLF partial */
5835                 eptr + 1 >= md->end_subject &&
5836                 NLBLOCK->nltype == NLTYPE_FIXED &&
5837                 NLBLOCK->nllen == 2 &&
5838                 *eptr == NLBLOCK->nl[0])
5839               {
5840               md->hitend = TRUE;
5841               if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5842               }
5843             eptr++;
5844             }
5845           break;
5846
5847           case OP_ALLANY:
5848           case OP_ANYBYTE:
5849           c = max - min;
5850           if (c > (unsigned int)(md->end_subject - eptr))
5851             {
5852             eptr = md->end_subject;
5853             SCHECK_PARTIAL();
5854             }
5855           else eptr += c;
5856           break;
5857
5858           case OP_ANYNL:
5859           for (i = min; i < max; i++)
5860             {
5861             if (eptr >= md->end_subject)
5862               {
5863               SCHECK_PARTIAL();
5864               break;
5865               }
5866             c = *eptr;
5867             if (c == CHAR_CR)
5868               {
5869               if (++eptr >= md->end_subject) break;
5870               if (*eptr == CHAR_LF) eptr++;
5871               }
5872             else
5873               {
5874               if (c != CHAR_LF && (md->bsr_anycrlf ||
5875                  (c != CHAR_VT && c != CHAR_FF && c != CHAR_NEL
5876 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5877                  && c != 0x2028 && c != 0x2029
5878 #endif
5879                  ))) break;
5880               eptr++;
5881               }
5882             }
5883           break;
5884
5885           case OP_NOT_HSPACE:
5886           for (i = min; i < max; i++)
5887             {
5888             if (eptr >= md->end_subject)
5889               {
5890               SCHECK_PARTIAL();
5891               break;
5892               }
5893             switch(*eptr)
5894               {
5895               default: eptr++; break;
5896               HSPACE_BYTE_CASES:
5897 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5898               HSPACE_MULTIBYTE_CASES:
5899 #endif
5900               goto ENDLOOP00;
5901               }
5902             }
5903           ENDLOOP00:
5904           break;
5905
5906           case OP_HSPACE:
5907           for (i = min; i < max; i++)
5908             {
5909             if (eptr >= md->end_subject)
5910               {
5911               SCHECK_PARTIAL();
5912               break;
5913               }
5914             switch(*eptr)
5915               {
5916               default: goto ENDLOOP01;
5917               HSPACE_BYTE_CASES:
5918 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5919               HSPACE_MULTIBYTE_CASES:
5920 #endif
5921               eptr++; break;
5922               }
5923             }
5924           ENDLOOP01:
5925           break;
5926
5927           case OP_NOT_VSPACE:
5928           for (i = min; i < max; i++)
5929             {
5930             if (eptr >= md->end_subject)
5931               {
5932               SCHECK_PARTIAL();
5933               break;
5934               }
5935             switch(*eptr)
5936               {
5937               default: eptr++; break;
5938               VSPACE_BYTE_CASES:
5939 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5940               VSPACE_MULTIBYTE_CASES:
5941 #endif
5942               goto ENDLOOP02;
5943               }
5944             }
5945           ENDLOOP02:
5946           break;
5947
5948           case OP_VSPACE:
5949           for (i = min; i < max; i++)
5950             {
5951             if (eptr >= md->end_subject)
5952               {
5953               SCHECK_PARTIAL();
5954               break;
5955               }
5956             switch(*eptr)
5957               {
5958               default: goto ENDLOOP03;
5959               VSPACE_BYTE_CASES:
5960 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5961               VSPACE_MULTIBYTE_CASES:
5962 #endif
5963               eptr++; break;
5964               }
5965             }
5966           ENDLOOP03:
5967           break;
5968
5969           case OP_NOT_DIGIT:
5970           for (i = min; i < max; i++)
5971             {
5972             if (eptr >= md->end_subject)
5973               {
5974               SCHECK_PARTIAL();
5975               break;
5976               }
5977             if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_digit) != 0) break;
5978             eptr++;
5979             }
5980           break;
5981
5982           case OP_DIGIT:
5983           for (i = min; i < max; i++)
5984             {
5985             if (eptr >= md->end_subject)
5986               {
5987               SCHECK_PARTIAL();
5988               break;
5989               }
5990             if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_digit) == 0) break;
5991             eptr++;
5992             }
5993           break;
5994
5995           case OP_NOT_WHITESPACE:
5996           for (i = min; i < max; i++)
5997             {
5998             if (eptr >= md->end_subject)
5999               {
6000               SCHECK_PARTIAL();
6001               break;
6002               }
6003             if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_space) != 0) break;
6004             eptr++;
6005             }
6006           break;
6007
6008           case OP_WHITESPACE:
6009           for (i = min; i < max; i++)
6010             {
6011             if (eptr >= md->end_subject)
6012               {
6013               SCHECK_PARTIAL();
6014               break;
6015               }
6016             if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_space) == 0) break;
6017             eptr++;
6018             }
6019           break;
6020
6021           case OP_NOT_WORDCHAR:
6022           for (i = min; i < max; i++)
6023             {
6024             if (eptr >= md->end_subject)
6025               {
6026               SCHECK_PARTIAL();
6027               break;
6028               }
6029             if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_word) != 0) break;
6030             eptr++;
6031             }
6032           break;
6033
6034           case OP_WORDCHAR:
6035           for (i = min; i < max; i++)
6036             {
6037             if (eptr >= md->end_subject)
6038               {
6039               SCHECK_PARTIAL();
6040               break;
6041               }
6042             if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_word) == 0) break;
6043             eptr++;
6044             }
6045           break;
6046
6047           default:
6048           RRETURN(PCRE_ERROR_INTERNAL);
6049           }
6050
6051         /* eptr is now past the end of the maximum run. If possessive, we are
6052         done (no backing up). Otherwise, match at this position; anything other
6053         than no match is immediately returned. For nomatch, back up one
6054         character (byte), unless we are matching \R and the last thing matched
6055         was \r\n, in which case, back up two bytes. */
6056
6057         if (possessive) continue;
6058         while (eptr >= pp)
6059           {
6060           RMATCH(eptr, ecode, offset_top, md, eptrb, RM47);
6061           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6062           eptr--;
6063           if (ctype == OP_ANYNL && eptr > pp  && *eptr == CHAR_LF &&
6064               eptr[-1] == CHAR_CR) eptr--;
6065           }
6066         }
6067
6068       /* Get here if we can't make it match with any permitted repetitions */
6069
6070       RRETURN(MATCH_NOMATCH);
6071       }
6072     /* Control never gets here */
6073
6074     /* There's been some horrible disaster. Arrival here can only mean there is
6075     something seriously wrong in the code above or the OP_xxx definitions. */
6076
6077     default:
6078     DPRINTF(("Unknown opcode %d\n", *ecode));
6079     RRETURN(PCRE_ERROR_UNKNOWN_OPCODE);
6080     }
6081
6082   /* Do not stick any code in here without much thought; it is assumed
6083   that "continue" in the code above comes out to here to repeat the main
6084   loop. */
6085
6086   }             /* End of main loop */
6087 /* Control never reaches here */
6088
6089
6090 /* When compiling to use the heap rather than the stack for recursive calls to
6091 match(), the RRETURN() macro jumps here. The number that is saved in
6092 frame->Xwhere indicates which label we actually want to return to. */
6093
6094 #ifdef NO_RECURSE
6095 #define LBL(val) case val: goto L_RM##val;
6096 HEAP_RETURN:
6097 switch (frame->Xwhere)
6098   {
6099   LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
6100   LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
6101   LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
6102   LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
6103   LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63) LBL(64)
6104   LBL(65) LBL(66)
6105 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
6106   LBL(21)
6107 #endif
6108 #ifdef SUPPORT_UTF
6109   LBL(16) LBL(18) LBL(20)
6110   LBL(22) LBL(23) LBL(28) LBL(30)
6111   LBL(32) LBL(34) LBL(42) LBL(46)
6112 #ifdef SUPPORT_UCP
6113   LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
6114   LBL(59) LBL(60) LBL(61) LBL(62) LBL(67)
6115 #endif  /* SUPPORT_UCP */
6116 #endif  /* SUPPORT_UTF */
6117   default:
6118   DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
6119   return PCRE_ERROR_INTERNAL;
6120   }
6121 #undef LBL
6122 #endif  /* NO_RECURSE */
6123 }
6124
6125
6126 /***************************************************************************
6127 ****************************************************************************
6128                    RECURSION IN THE match() FUNCTION
6129
6130 Undefine all the macros that were defined above to handle this. */
6131
6132 #ifdef NO_RECURSE
6133 #undef eptr
6134 #undef ecode
6135 #undef mstart
6136 #undef offset_top
6137 #undef eptrb
6138 #undef flags
6139
6140 #undef callpat
6141 #undef charptr
6142 #undef data
6143 #undef next
6144 #undef pp
6145 #undef prev
6146 #undef saved_eptr
6147
6148 #undef new_recursive
6149
6150 #undef cur_is_word
6151 #undef condition
6152 #undef prev_is_word
6153
6154 #undef ctype
6155 #undef length
6156 #undef max
6157 #undef min
6158 #undef number
6159 #undef offset
6160 #undef op
6161 #undef save_capture_last
6162 #undef save_offset1
6163 #undef save_offset2
6164 #undef save_offset3
6165 #undef stacksave
6166
6167 #undef newptrb
6168
6169 #endif
6170
6171 /* These two are defined as macros in both cases */
6172
6173 #undef fc
6174 #undef fi
6175
6176 /***************************************************************************
6177 ***************************************************************************/
6178
6179
6180 #ifdef NO_RECURSE
6181 /*************************************************
6182 *          Release allocated heap frames         *
6183 *************************************************/
6184
6185 /* This function releases all the allocated frames. The base frame is on the
6186 machine stack, and so must not be freed.
6187
6188 Argument: the address of the base frame
6189 Returns:  nothing
6190 */
6191
6192 static void
6193 release_match_heapframes (heapframe *frame_base)
6194 {
6195 heapframe *nextframe = frame_base->Xnextframe;
6196 while (nextframe != NULL)
6197   {
6198   heapframe *oldframe = nextframe;
6199   nextframe = nextframe->Xnextframe;
6200   (PUBL(stack_free))(oldframe);
6201   }
6202 }
6203 #endif
6204
6205
6206 /*************************************************
6207 *         Execute a Regular Expression           *
6208 *************************************************/
6209
6210 /* This function applies a compiled re to a subject string and picks out
6211 portions of the string if it matches. Two elements in the vector are set for
6212 each substring: the offsets to the start and end of the substring.
6213
6214 Arguments:
6215   argument_re     points to the compiled expression
6216   extra_data      points to extra data or is NULL
6217   subject         points to the subject string
6218   length          length of subject string (may contain binary zeros)
6219   start_offset    where to start in the subject string
6220   options         option bits
6221   offsets         points to a vector of ints to be filled in with offsets
6222   offsetcount     the number of elements in the vector
6223
6224 Returns:          > 0 => success; value is the number of elements filled in
6225                   = 0 => success, but offsets is not big enough
6226                    -1 => failed to match
6227                  < -1 => some kind of unexpected problem
6228 */
6229
6230 #if defined COMPILE_PCRE8
6231 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
6232 pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
6233   PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
6234   int offsetcount)
6235 #elif defined COMPILE_PCRE16
6236 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
6237 pcre16_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
6238   PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,
6239   int offsetcount)
6240 #elif defined COMPILE_PCRE32
6241 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
6242 pcre32_exec(const pcre32 *argument_re, const pcre32_extra *extra_data,
6243   PCRE_SPTR32 subject, int length, int start_offset, int options, int *offsets,
6244   int offsetcount)
6245 #endif
6246 {
6247 int rc, ocount, arg_offset_max;
6248 int newline;
6249 BOOL using_temporary_offsets = FALSE;
6250 BOOL anchored;
6251 BOOL startline;
6252 BOOL firstline;
6253 BOOL utf;
6254 BOOL has_first_char = FALSE;
6255 BOOL has_req_char = FALSE;
6256 pcre_uchar first_char = 0;
6257 pcre_uchar first_char2 = 0;
6258 pcre_uchar req_char = 0;
6259 pcre_uchar req_char2 = 0;
6260 match_data match_block;
6261 match_data *md = &match_block;
6262 const pcre_uint8 *tables;
6263 const pcre_uint8 *start_bits = NULL;
6264 PCRE_PUCHAR start_match = (PCRE_PUCHAR)subject + start_offset;
6265 PCRE_PUCHAR end_subject;
6266 PCRE_PUCHAR start_partial = NULL;
6267 PCRE_PUCHAR req_char_ptr = start_match - 1;
6268
6269 const pcre_study_data *study;
6270 const REAL_PCRE *re = (const REAL_PCRE *)argument_re;
6271
6272 #ifdef NO_RECURSE
6273 heapframe frame_zero;
6274 frame_zero.Xprevframe = NULL;            /* Marks the top level */
6275 frame_zero.Xnextframe = NULL;            /* None are allocated yet */
6276 md->match_frames_base = &frame_zero;
6277 #endif
6278
6279 /* Check for the special magic call that measures the size of the stack used
6280 per recursive call of match(). Without the funny casting for sizeof, a Windows
6281 compiler gave this error: "unary minus operator applied to unsigned type,
6282 result still unsigned". Hopefully the cast fixes that. */
6283
6284 if (re == NULL && extra_data == NULL && subject == NULL && length == -999 &&
6285     start_offset == -999)
6286 #ifdef NO_RECURSE
6287   return -((int)sizeof(heapframe));
6288 #else
6289   return match(NULL, NULL, NULL, 0, NULL, NULL, 0);
6290 #endif
6291
6292 /* Plausibility checks */
6293
6294 if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
6295 if (re == NULL || subject == NULL || (offsets == NULL && offsetcount > 0))
6296   return PCRE_ERROR_NULL;
6297 if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
6298 if (length < 0) return PCRE_ERROR_BADLENGTH;
6299 if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
6300
6301 /* Check that the first field in the block is the magic number. If it is not,
6302 return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
6303 REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
6304 means that the pattern is likely compiled with different endianness. */
6305
6306 if (re->magic_number != MAGIC_NUMBER)
6307   return re->magic_number == REVERSED_MAGIC_NUMBER?
6308     PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
6309 if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
6310
6311 /* These two settings are used in the code for checking a UTF-8 string that
6312 follows immediately afterwards. Other values in the md block are used only
6313 during "normal" pcre_exec() processing, not when the JIT support is in use,
6314 so they are set up later. */
6315
6316 /* PCRE_UTF16 has the same value as PCRE_UTF8. */
6317 utf = md->utf = (re->options & PCRE_UTF8) != 0;
6318 md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :
6319               ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;
6320
6321 /* Check a UTF-8 string if required. Pass back the character offset and error
6322 code for an invalid string if a results vector is available. */
6323
6324 #ifdef SUPPORT_UTF
6325 if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
6326   {
6327   int erroroffset;
6328   int errorcode = PRIV(valid_utf)((PCRE_PUCHAR)subject, length, &erroroffset);
6329   if (errorcode != 0)
6330     {
6331     if (offsetcount >= 2)
6332       {
6333       offsets[0] = erroroffset;
6334       offsets[1] = errorcode;
6335       }
6336 #if defined COMPILE_PCRE8
6337     return (errorcode <= PCRE_UTF8_ERR5 && md->partial > 1)?
6338       PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
6339 #elif defined COMPILE_PCRE16
6340     return (errorcode <= PCRE_UTF16_ERR1 && md->partial > 1)?
6341       PCRE_ERROR_SHORTUTF16 : PCRE_ERROR_BADUTF16;
6342 #elif defined COMPILE_PCRE32
6343     return PCRE_ERROR_BADUTF32;
6344 #endif
6345     }
6346 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
6347   /* Check that a start_offset points to the start of a UTF character. */
6348   if (start_offset > 0 && start_offset < length &&
6349       NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))
6350     return PCRE_ERROR_BADUTF8_OFFSET;
6351 #endif
6352   }
6353 #endif
6354
6355 /* If the pattern was successfully studied with JIT support, run the JIT
6356 executable instead of the rest of this function. Most options must be set at
6357 compile time for the JIT code to be usable. Fallback to the normal code path if
6358 an unsupported flag is set. */
6359
6360 #ifdef SUPPORT_JIT
6361 if (extra_data != NULL
6362     && (extra_data->flags & (PCRE_EXTRA_EXECUTABLE_JIT |
6363                              PCRE_EXTRA_TABLES)) == PCRE_EXTRA_EXECUTABLE_JIT
6364     && extra_data->executable_jit != NULL
6365     && (options & ~PUBLIC_JIT_EXEC_OPTIONS) == 0)
6366   {
6367   rc = PRIV(jit_exec)(extra_data, (const pcre_uchar *)subject, length,
6368        start_offset, options, offsets, offsetcount);
6369
6370   /* PCRE_ERROR_NULL means that the selected normal or partial matching
6371   mode is not compiled. In this case we simply fallback to interpreter. */
6372
6373   if (rc != PCRE_ERROR_JIT_BADOPTION) return rc;
6374   }
6375 #endif
6376
6377 /* Carry on with non-JIT matching. This information is for finding all the
6378 numbers associated with a given name, for condition testing. */
6379
6380 md->name_table = (pcre_uchar *)re + re->name_table_offset;
6381 md->name_count = re->name_count;
6382 md->name_entry_size = re->name_entry_size;
6383
6384 /* Fish out the optional data from the extra_data structure, first setting
6385 the default values. */
6386
6387 study = NULL;
6388 md->match_limit = MATCH_LIMIT;
6389 md->match_limit_recursion = MATCH_LIMIT_RECURSION;
6390 md->callout_data = NULL;
6391
6392 /* The table pointer is always in native byte order. */
6393
6394 tables = re->tables;
6395
6396 if (extra_data != NULL)
6397   {
6398   register unsigned int flags = extra_data->flags;
6399   if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
6400     study = (const pcre_study_data *)extra_data->study_data;
6401   if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
6402     md->match_limit = extra_data->match_limit;
6403   if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
6404     md->match_limit_recursion = extra_data->match_limit_recursion;
6405   if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
6406     md->callout_data = extra_data->callout_data;
6407   if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
6408   }
6409
6410 /* If the exec call supplied NULL for tables, use the inbuilt ones. This
6411 is a feature that makes it possible to save compiled regex and re-use them
6412 in other programs later. */
6413
6414 if (tables == NULL) tables = PRIV(default_tables);
6415
6416 /* Set up other data */
6417
6418 anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
6419 startline = (re->flags & PCRE_STARTLINE) != 0;
6420 firstline = (re->options & PCRE_FIRSTLINE) != 0;
6421
6422 /* The code starts after the real_pcre block and the capture name table. */
6423
6424 md->start_code = (const pcre_uchar *)re + re->name_table_offset +
6425   re->name_count * re->name_entry_size;
6426
6427 md->start_subject = (PCRE_PUCHAR)subject;
6428 md->start_offset = start_offset;
6429 md->end_subject = md->start_subject + length;
6430 end_subject = md->end_subject;
6431
6432 md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
6433 md->use_ucp = (re->options & PCRE_UCP) != 0;
6434 md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
6435 md->ignore_skip_arg = FALSE;
6436
6437 /* Some options are unpacked into BOOL variables in the hope that testing
6438 them will be faster than individual option bits. */
6439
6440 md->notbol = (options & PCRE_NOTBOL) != 0;
6441 md->noteol = (options & PCRE_NOTEOL) != 0;
6442 md->notempty = (options & PCRE_NOTEMPTY) != 0;
6443 md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
6444
6445 md->hitend = FALSE;
6446 md->mark = md->nomatch_mark = NULL;     /* In case never set */
6447
6448 md->recursive = NULL;                   /* No recursion at top level */
6449 md->hasthen = (re->flags & PCRE_HASTHEN) != 0;
6450
6451 md->lcc = tables + lcc_offset;
6452 md->fcc = tables + fcc_offset;
6453 md->ctypes = tables + ctypes_offset;
6454
6455 /* Handle different \R options. */
6456
6457 switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))
6458   {
6459   case 0:
6460   if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
6461     md->bsr_anycrlf = (re->options & PCRE_BSR_ANYCRLF) != 0;
6462   else
6463 #ifdef BSR_ANYCRLF
6464   md->bsr_anycrlf = TRUE;
6465 #else
6466   md->bsr_anycrlf = FALSE;
6467 #endif
6468   break;
6469
6470   case PCRE_BSR_ANYCRLF:
6471   md->bsr_anycrlf = TRUE;
6472   break;
6473
6474   case PCRE_BSR_UNICODE:
6475   md->bsr_anycrlf = FALSE;
6476   break;
6477
6478   default: return PCRE_ERROR_BADNEWLINE;
6479   }
6480
6481 /* Handle different types of newline. The three bits give eight cases. If
6482 nothing is set at run time, whatever was used at compile time applies. */
6483
6484 switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options :
6485         (pcre_uint32)options) & PCRE_NEWLINE_BITS)
6486   {
6487   case 0: newline = NEWLINE; break;   /* Compile-time default */
6488   case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
6489   case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
6490   case PCRE_NEWLINE_CR+
6491        PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
6492   case PCRE_NEWLINE_ANY: newline = -1; break;
6493   case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
6494   default: return PCRE_ERROR_BADNEWLINE;
6495   }
6496
6497 if (newline == -2)
6498   {
6499   md->nltype = NLTYPE_ANYCRLF;
6500   }
6501 else if (newline < 0)
6502   {
6503   md->nltype = NLTYPE_ANY;
6504   }
6505 else
6506   {
6507   md->nltype = NLTYPE_FIXED;
6508   if (newline > 255)
6509     {
6510     md->nllen = 2;
6511     md->nl[0] = (newline >> 8) & 255;
6512     md->nl[1] = newline & 255;
6513     }
6514   else
6515     {
6516     md->nllen = 1;
6517     md->nl[0] = newline;
6518     }
6519   }
6520
6521 /* Partial matching was originally supported only for a restricted set of
6522 regexes; from release 8.00 there are no restrictions, but the bits are still
6523 defined (though never set). So there's no harm in leaving this code. */
6524
6525 if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
6526   return PCRE_ERROR_BADPARTIAL;
6527
6528 /* If the expression has got more back references than the offsets supplied can
6529 hold, we get a temporary chunk of working store to use during the matching.
6530 Otherwise, we can use the vector supplied, rounding down its size to a multiple
6531 of 3. */
6532
6533 ocount = offsetcount - (offsetcount % 3);
6534 arg_offset_max = (2*ocount)/3;
6535
6536 if (re->top_backref > 0 && re->top_backref >= ocount/3)
6537   {
6538   ocount = re->top_backref * 3 + 3;
6539   md->offset_vector = (int *)(PUBL(malloc))(ocount * sizeof(int));
6540   if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
6541   using_temporary_offsets = TRUE;
6542   DPRINTF(("Got memory to hold back references\n"));
6543   }
6544 else md->offset_vector = offsets;
6545
6546 md->offset_end = ocount;
6547 md->offset_max = (2*ocount)/3;
6548 md->offset_overflow = FALSE;
6549 md->capture_last = -1;
6550
6551 /* Reset the working variable associated with each extraction. These should
6552 never be used unless previously set, but they get saved and restored, and so we
6553 initialize them to avoid reading uninitialized locations. Also, unset the
6554 offsets for the matched string. This is really just for tidiness with callouts,
6555 in case they inspect these fields. */
6556
6557 if (md->offset_vector != NULL)
6558   {
6559   register int *iptr = md->offset_vector + ocount;
6560   register int *iend = iptr - re->top_bracket;
6561   if (iend < md->offset_vector + 2) iend = md->offset_vector + 2;
6562   while (--iptr >= iend) *iptr = -1;
6563   md->offset_vector[0] = md->offset_vector[1] = -1;
6564   }
6565
6566 /* Set up the first character to match, if available. The first_char value is
6567 never set for an anchored regular expression, but the anchoring may be forced
6568 at run time, so we have to test for anchoring. The first char may be unset for
6569 an unanchored pattern, of course. If there's no first char and the pattern was
6570 studied, there may be a bitmap of possible first characters. */
6571
6572 if (!anchored)
6573   {
6574   if ((re->flags & PCRE_FIRSTSET) != 0)
6575     {
6576     has_first_char = TRUE;
6577     first_char = first_char2 = (pcre_uchar)(re->first_char);
6578     if ((re->flags & PCRE_FCH_CASELESS) != 0)
6579       {
6580       first_char2 = TABLE_GET(first_char, md->fcc, first_char);
6581 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
6582       if (utf && first_char > 127)
6583         first_char2 = UCD_OTHERCASE(first_char);
6584 #endif
6585       }
6586     }
6587   else
6588     if (!startline && study != NULL &&
6589       (study->flags & PCRE_STUDY_MAPPED) != 0)
6590         start_bits = study->start_bits;
6591   }
6592
6593 /* For anchored or unanchored matches, there may be a "last known required
6594 character" set. */
6595
6596 if ((re->flags & PCRE_REQCHSET) != 0)
6597   {
6598   has_req_char = TRUE;
6599   req_char = req_char2 = (pcre_uchar)(re->req_char);
6600   if ((re->flags & PCRE_RCH_CASELESS) != 0)
6601     {
6602     req_char2 = TABLE_GET(req_char, md->fcc, req_char);
6603 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
6604     if (utf && req_char > 127)
6605       req_char2 = UCD_OTHERCASE(req_char);
6606 #endif
6607     }
6608   }
6609
6610
6611 /* ==========================================================================*/
6612
6613 /* Loop for handling unanchored repeated matching attempts; for anchored regexs
6614 the loop runs just once. */
6615
6616 for(;;)
6617   {
6618   PCRE_PUCHAR save_end_subject = end_subject;
6619   PCRE_PUCHAR new_start_match;
6620
6621   /* If firstline is TRUE, the start of the match is constrained to the first
6622   line of a multiline string. That is, the match must be before or at the first
6623   newline. Implement this by temporarily adjusting end_subject so that we stop
6624   scanning at a newline. If the match fails at the newline, later code breaks
6625   this loop. */
6626
6627   if (firstline)
6628     {
6629     PCRE_PUCHAR t = start_match;
6630 #ifdef SUPPORT_UTF
6631     if (utf)
6632       {
6633       while (t < md->end_subject && !IS_NEWLINE(t))
6634         {
6635         t++;
6636         ACROSSCHAR(t < end_subject, *t, t++);
6637         }
6638       }
6639     else
6640 #endif
6641     while (t < md->end_subject && !IS_NEWLINE(t)) t++;
6642     end_subject = t;
6643     }
6644
6645   /* There are some optimizations that avoid running the match if a known
6646   starting point is not found, or if a known later character is not present.
6647   However, there is an option that disables these, for testing and for ensuring
6648   that all callouts do actually occur. The option can be set in the regex by
6649   (*NO_START_OPT) or passed in match-time options. */
6650
6651   if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)
6652     {
6653     /* Advance to a unique first char if there is one. */
6654
6655     if (has_first_char)
6656       {
6657       pcre_uchar smc;
6658
6659       if (first_char != first_char2)
6660         while (start_match < end_subject &&
6661           (smc = RAWUCHARTEST(start_match)) != first_char && smc != first_char2)
6662           start_match++;
6663       else
6664         while (start_match < end_subject && RAWUCHARTEST(start_match) != first_char)
6665           start_match++;
6666       }
6667
6668     /* Or to just after a linebreak for a multiline match */
6669
6670     else if (startline)
6671       {
6672       if (start_match > md->start_subject + start_offset)
6673         {
6674 #ifdef SUPPORT_UTF
6675         if (utf)
6676           {
6677           while (start_match < end_subject && !WAS_NEWLINE(start_match))
6678             {
6679             start_match++;
6680             ACROSSCHAR(start_match < end_subject, *start_match,
6681               start_match++);
6682             }
6683           }
6684         else
6685 #endif
6686         while (start_match < end_subject && !WAS_NEWLINE(start_match))
6687           start_match++;
6688
6689         /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
6690         and we are now at a LF, advance the match position by one more character.
6691         */
6692
6693         if (start_match[-1] == CHAR_CR &&
6694              (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
6695              start_match < end_subject &&
6696              RAWUCHARTEST(start_match) == CHAR_NL)
6697           start_match++;
6698         }
6699       }
6700
6701     /* Or to a non-unique first byte after study */
6702
6703     else if (start_bits != NULL)
6704       {
6705       while (start_match < end_subject)
6706         {
6707         register pcre_uint32 c = RAWUCHARTEST(start_match);
6708 #ifndef COMPILE_PCRE8
6709         if (c > 255) c = 255;
6710 #endif
6711         if ((start_bits[c/8] & (1 << (c&7))) == 0)
6712           {
6713           start_match++;
6714 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
6715           /* In non 8-bit mode, the iteration will stop for
6716           characters > 255 at the beginning or not stop at all. */
6717           if (utf)
6718             ACROSSCHAR(start_match < end_subject, *start_match,
6719               start_match++);
6720 #endif
6721           }
6722         else break;
6723         }
6724       }
6725     }   /* Starting optimizations */
6726
6727   /* Restore fudged end_subject */
6728
6729   end_subject = save_end_subject;
6730
6731   /* The following two optimizations are disabled for partial matching or if
6732   disabling is explicitly requested. */
6733
6734   if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0 && !md->partial)
6735     {
6736     /* If the pattern was studied, a minimum subject length may be set. This is
6737     a lower bound; no actual string of that length may actually match the
6738     pattern. Although the value is, strictly, in characters, we treat it as
6739     bytes to avoid spending too much time in this optimization. */
6740
6741     if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 &&
6742         (pcre_uint32)(end_subject - start_match) < study->minlength)
6743       {
6744       rc = MATCH_NOMATCH;
6745       break;
6746       }
6747
6748     /* If req_char is set, we know that that character must appear in the
6749     subject for the match to succeed. If the first character is set, req_char
6750     must be later in the subject; otherwise the test starts at the match point.
6751     This optimization can save a huge amount of backtracking in patterns with
6752     nested unlimited repeats that aren't going to match. Writing separate code
6753     for cased/caseless versions makes it go faster, as does using an
6754     autoincrement and backing off on a match.
6755
6756     HOWEVER: when the subject string is very, very long, searching to its end
6757     can take a long time, and give bad performance on quite ordinary patterns.
6758     This showed up when somebody was matching something like /^\d+C/ on a
6759     32-megabyte string... so we don't do this when the string is sufficiently
6760     long. */
6761
6762     if (has_req_char && end_subject - start_match < REQ_BYTE_MAX)
6763       {
6764       register PCRE_PUCHAR p = start_match + (has_first_char? 1:0);
6765
6766       /* We don't need to repeat the search if we haven't yet reached the
6767       place we found it at last time. */
6768
6769       if (p > req_char_ptr)
6770         {
6771         if (req_char != req_char2)
6772           {
6773           while (p < end_subject)
6774             {
6775             register pcre_uint32 pp = RAWUCHARINCTEST(p);
6776             if (pp == req_char || pp == req_char2) { p--; break; }
6777             }
6778           }
6779         else
6780           {
6781           while (p < end_subject)
6782             {
6783             if (RAWUCHARINCTEST(p) == req_char) { p--; break; }
6784             }
6785           }
6786
6787         /* If we can't find the required character, break the matching loop,
6788         forcing a match failure. */
6789
6790         if (p >= end_subject)
6791           {
6792           rc = MATCH_NOMATCH;
6793           break;
6794           }
6795
6796         /* If we have found the required character, save the point where we
6797         found it, so that we don't search again next time round the loop if
6798         the start hasn't passed this character yet. */
6799
6800         req_char_ptr = p;
6801         }
6802       }
6803     }
6804
6805 #ifdef PCRE_DEBUG  /* Sigh. Some compilers never learn. */
6806   printf(">>>> Match against: ");
6807   pchars(start_match, end_subject - start_match, TRUE, md);
6808   printf("\n");
6809 #endif
6810
6811   /* OK, we can now run the match. If "hitend" is set afterwards, remember the
6812   first starting point for which a partial match was found. */
6813
6814   md->start_match_ptr = start_match;
6815   md->start_used_ptr = start_match;
6816   md->match_call_count = 0;
6817   md->match_function_type = 0;
6818   md->end_offset_top = 0;
6819   rc = match(start_match, md->start_code, start_match, 2, md, NULL, 0);
6820   if (md->hitend && start_partial == NULL) start_partial = md->start_used_ptr;
6821
6822   switch(rc)
6823     {
6824     /* If MATCH_SKIP_ARG reaches this level it means that a MARK that matched
6825     the SKIP's arg was not found. In this circumstance, Perl ignores the SKIP
6826     entirely. The only way we can do that is to re-do the match at the same
6827     point, with a flag to force SKIP with an argument to be ignored. Just
6828     treating this case as NOMATCH does not work because it does not check other
6829     alternatives in patterns such as A(*SKIP:A)B|AC when the subject is AC. */
6830
6831     case MATCH_SKIP_ARG:
6832     new_start_match = start_match;
6833     md->ignore_skip_arg = TRUE;
6834     break;
6835
6836     /* SKIP passes back the next starting point explicitly, but if it is the
6837     same as the match we have just done, treat it as NOMATCH. */
6838
6839     case MATCH_SKIP:
6840     if (md->start_match_ptr != start_match)
6841       {
6842       new_start_match = md->start_match_ptr;
6843       break;
6844       }
6845     /* Fall through */
6846
6847     /* NOMATCH and PRUNE advance by one character. THEN at this level acts
6848     exactly like PRUNE. Unset the ignore SKIP-with-argument flag. */
6849
6850     case MATCH_NOMATCH:
6851     case MATCH_PRUNE:
6852     case MATCH_THEN:
6853     md->ignore_skip_arg = FALSE;
6854     new_start_match = start_match + 1;
6855 #ifdef SUPPORT_UTF
6856     if (utf)
6857       ACROSSCHAR(new_start_match < end_subject, *new_start_match,
6858         new_start_match++);
6859 #endif
6860     break;
6861
6862     /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
6863
6864     case MATCH_COMMIT:
6865     rc = MATCH_NOMATCH;
6866     goto ENDLOOP;
6867
6868     /* Any other return is either a match, or some kind of error. */
6869
6870     default:
6871     goto ENDLOOP;
6872     }
6873
6874   /* Control reaches here for the various types of "no match at this point"
6875   result. Reset the code to MATCH_NOMATCH for subsequent checking. */
6876
6877   rc = MATCH_NOMATCH;
6878
6879   /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
6880   newline in the subject (though it may continue over the newline). Therefore,
6881   if we have just failed to match, starting at a newline, do not continue. */
6882
6883   if (firstline && IS_NEWLINE(start_match)) break;
6884
6885   /* Advance to new matching position */
6886
6887   start_match = new_start_match;
6888
6889   /* Break the loop if the pattern is anchored or if we have passed the end of
6890   the subject. */
6891
6892   if (anchored || start_match > end_subject) break;
6893
6894   /* If we have just passed a CR and we are now at a LF, and the pattern does
6895   not contain any explicit matches for \r or \n, and the newline option is CRLF
6896   or ANY or ANYCRLF, advance the match position by one more character. In
6897   normal matching start_match will aways be greater than the first position at
6898   this stage, but a failed *SKIP can cause a return at the same point, which is
6899   why the first test exists. */
6900
6901   if (start_match > (PCRE_PUCHAR)subject + start_offset &&
6902       start_match[-1] == CHAR_CR &&
6903       start_match < end_subject &&
6904       *start_match == CHAR_NL &&
6905       (re->flags & PCRE_HASCRORLF) == 0 &&
6906         (md->nltype == NLTYPE_ANY ||
6907          md->nltype == NLTYPE_ANYCRLF ||
6908          md->nllen == 2))
6909     start_match++;
6910
6911   md->mark = NULL;   /* Reset for start of next match attempt */
6912   }                  /* End of for(;;) "bumpalong" loop */
6913
6914 /* ==========================================================================*/
6915
6916 /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
6917 conditions is true:
6918
6919 (1) The pattern is anchored or the match was failed by (*COMMIT);
6920
6921 (2) We are past the end of the subject;
6922
6923 (3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because
6924     this option requests that a match occur at or before the first newline in
6925     the subject.
6926
6927 When we have a match and the offset vector is big enough to deal with any
6928 backreferences, captured substring offsets will already be set up. In the case
6929 where we had to get some local store to hold offsets for backreference
6930 processing, copy those that we can. In this case there need not be overflow if
6931 certain parts of the pattern were not used, even though there are more
6932 capturing parentheses than vector slots. */
6933
6934 ENDLOOP:
6935
6936 if (rc == MATCH_MATCH || rc == MATCH_ACCEPT)
6937   {
6938   if (using_temporary_offsets)
6939     {
6940     if (arg_offset_max >= 4)
6941       {
6942       memcpy(offsets + 2, md->offset_vector + 2,
6943         (arg_offset_max - 2) * sizeof(int));
6944       DPRINTF(("Copied offsets from temporary memory\n"));
6945       }
6946     if (md->end_offset_top > arg_offset_max) md->offset_overflow = TRUE;
6947     DPRINTF(("Freeing temporary memory\n"));
6948     (PUBL(free))(md->offset_vector);
6949     }
6950
6951   /* Set the return code to the number of captured strings, or 0 if there were
6952   too many to fit into the vector. */
6953
6954   rc = (md->offset_overflow && md->end_offset_top >= arg_offset_max)?
6955     0 : md->end_offset_top/2;
6956
6957   /* If there is space in the offset vector, set any unused pairs at the end of
6958   the pattern to -1 for backwards compatibility. It is documented that this
6959   happens. In earlier versions, the whole set of potential capturing offsets
6960   was set to -1 each time round the loop, but this is handled differently now.
6961   "Gaps" are set to -1 dynamically instead (this fixes a bug). Thus, it is only
6962   those at the end that need unsetting here. We can't just unset them all at
6963   the start of the whole thing because they may get set in one branch that is
6964   not the final matching branch. */
6965
6966   if (md->end_offset_top/2 <= re->top_bracket && offsets != NULL)
6967     {
6968     register int *iptr, *iend;
6969     int resetcount = 2 + re->top_bracket * 2;
6970     if (resetcount > offsetcount) resetcount = offsetcount;
6971     iptr = offsets + md->end_offset_top;
6972     iend = offsets + resetcount;
6973     while (iptr < iend) *iptr++ = -1;
6974     }
6975
6976   /* If there is space, set up the whole thing as substring 0. The value of
6977   md->start_match_ptr might be modified if \K was encountered on the success
6978   matching path. */
6979
6980   if (offsetcount < 2) rc = 0; else
6981     {
6982     offsets[0] = (int)(md->start_match_ptr - md->start_subject);
6983     offsets[1] = (int)(md->end_match_ptr - md->start_subject);
6984     }
6985
6986   /* Return MARK data if requested */
6987
6988   if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
6989     *(extra_data->mark) = (pcre_uchar *)md->mark;
6990   DPRINTF((">>>> returning %d\n", rc));
6991 #ifdef NO_RECURSE
6992   release_match_heapframes(&frame_zero);
6993 #endif
6994   return rc;
6995   }
6996
6997 /* Control gets here if there has been an error, or if the overall match
6998 attempt has failed at all permitted starting positions. */
6999
7000 if (using_temporary_offsets)
7001   {
7002   DPRINTF(("Freeing temporary memory\n"));
7003   (PUBL(free))(md->offset_vector);
7004   }
7005
7006 /* For anything other than nomatch or partial match, just return the code. */
7007
7008 if (rc != MATCH_NOMATCH && rc != PCRE_ERROR_PARTIAL)
7009   {
7010   DPRINTF((">>>> error: returning %d\n", rc));
7011 #ifdef NO_RECURSE
7012   release_match_heapframes(&frame_zero);
7013 #endif
7014   return rc;
7015   }
7016
7017 /* Handle partial matches - disable any mark data */
7018
7019 if (start_partial != NULL)
7020   {
7021   DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
7022   md->mark = NULL;
7023   if (offsetcount > 1)
7024     {
7025     offsets[0] = (int)(start_partial - (PCRE_PUCHAR)subject);
7026     offsets[1] = (int)(end_subject - (PCRE_PUCHAR)subject);
7027     }
7028   rc = PCRE_ERROR_PARTIAL;
7029   }
7030
7031 /* This is the classic nomatch case */
7032
7033 else
7034   {
7035   DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
7036   rc = PCRE_ERROR_NOMATCH;
7037   }
7038
7039 /* Return the MARK data if it has been requested. */
7040
7041 if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
7042   *(extra_data->mark) = (pcre_uchar *)md->nomatch_mark;
7043 #ifdef NO_RECURSE
7044   release_match_heapframes(&frame_zero);
7045 #endif
7046 return rc;
7047 }
7048
7049 /* End of pcre_exec.c */