js/src/jsregexp.cpp

   1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
   2  * vim: set sw=4 ts=8 et tw=78:
   3  *
   4  * ***** BEGIN LICENSE BLOCK *****
   5  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
   6  *
   7  * The contents of this file are subject to the Mozilla Public License Version
   8  * 1.1 (the "License"); you may not use this file except in compliance with
   9  * the License. You may obtain a copy of the License at
  10  * http://www.mozilla.org/MPL/
  11  *
  12  * Software distributed under the License is distributed on an "AS IS" basis,
  13  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
  14  * for the specific language governing rights and limitations under the
  15  * License.
  16  *
  17  * The Original Code is Mozilla Communicator client code, released
  18  * March 31, 1998.
  19  *
  20  * The Initial Developer of the Original Code is
  21  * Netscape Communications Corporation.
  22  * Portions created by the Initial Developer are Copyright (C) 1998
  23  * the Initial Developer. All Rights Reserved.
  24  *
  25  * Contributor(s):
  26  *
  27  * Alternatively, the contents of this file may be used under the terms of
  28  * either of the GNU General Public License Version 2 or later (the "GPL"),
  29  * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
  30  * in which case the provisions of the GPL or the LGPL are applicable instead
  31  * of those above. If you wish to allow use of your version of this file only
  32  * under the terms of either the GPL or the LGPL, and not to allow others to
  33  * use your version of this file under the terms of the MPL, indicate your
  34  * decision by deleting the provisions above and replace them with the notice
  35  * and other provisions required by the GPL or the LGPL. If you do not delete
  36  * the provisions above, a recipient may use your version of this file under
  37  * the terms of any one of the MPL, the GPL or the LGPL.
  38  *
  39  * ***** END LICENSE BLOCK ***** */
  40
  41 /*
  42  * JS regular expressions, after Perl.
  43  */
  44 #include <stdlib.h>
  45 #include <string.h>
  46 #include <stdarg.h>
  47 #include "jstypes.h"
  48 #include "jsstdint.h"
  49 #include "jsarena.h" /* Added by JSIFY */
  50 #include "jsutil.h" /* Added by JSIFY */
  51 #include "jsapi.h"
  52 #include "jsarray.h"
  53 #include "jsatom.h"
  54 #include "jsbuiltins.h"
  55 #include "jscntxt.h"
  56 #include "jsversion.h"
  57 #include "jsfun.h"
  58 #include "jsgc.h"
  59 #include "jsinterp.h"
  60 #include "jslock.h"
  61 #include "jsnum.h"
  62 #include "jsobj.h"
  63 #include "jsopcode.h"
  64 #include "jsregexp.h"
  65 #include "jsscan.h"
  66 #include "jsscope.h"
  67 #include "jsstaticcheck.h"
  68 #include "jsstr.h"
  69 #include "jsvector.h"
  70
  71 #ifdef JS_TRACER
  72 #include "jstracer.h"
  73 using namespace avmplus;
  74 using namespace nanojit;
  75 #endif
  76
  77 #include "jsobjinlines.h"
  78
  79 using namespace js;
  80
  81 typedef enum REOp {
  82 #define REOP_DEF(opcode, name) opcode,
  83 #include "jsreops.tbl"
  84 #undef REOP_DEF
  85     REOP_LIMIT /* META: no operator >= to this */
  86 } REOp;
  87
  88 #define REOP_IS_SIMPLE(op)  ((op) <= REOP_NCLASS)
  89
  90 #ifdef REGEXP_DEBUG
  91 const char *reop_names[] = {
  92 #define REOP_DEF(opcode, name) name,
  93 #include "jsreops.tbl"
  94 #undef REOP_DEF
  95     NULL
  96 };
  97 #endif
  98
  99 #ifdef __GNUC__
 100 static int
 101 re_debug(const char *fmt, ...) __attribute__ ((format(printf, 1, 2)));
 102 #endif
 103
 104 #ifdef REGEXP_DEBUG
 105 static int
 106 re_debug(const char *fmt, ...)
 107 {
 108     va_list ap;
 109     int retval;
 110
 111     va_start(ap, fmt);
 112     retval = vprintf(fmt, ap);
 113     va_end(ap);
 114     return retval;
 115 }
 116
 117 static void
 118 re_debug_chars(const jschar *chrs, size_t length)
 119 {
 120     int i = 0;
 121
 122     printf(" \"");
 123     while (*chrs && i++ < length) {
 124         putchar((char)*chrs++);
 125     }
 126     printf("\"");
 127 }
 128 #else  /* !REGEXP_DEBUG */
 129 /* This should be optimized to a no-op by our tier-1 compilers. */
 130 static int
 131 re_debug(const char *fmt, ...)
 132 {
 133     return 0;
 134 }
 135
 136 static void
 137 re_debug_chars(const jschar *chrs, size_t length)
 138 {
 139 }
 140 #endif /* !REGEXP_DEBUG */
 141
 142 struct RENode {
 143     REOp            op;         /* r.e. op bytecode */
 144     RENode          *next;      /* next in concatenation order */
 145     void            *kid;       /* first operand */
 146     union {
 147         void        *kid2;      /* second operand */
 148         jsint       num;        /* could be a number */
 149         size_t      parenIndex; /* or a parenthesis index */
 150         struct {                /* or a quantifier range */
 151             uintN  min;
 152             uintN  max;
 153             JSPackedBool greedy;
 154         } range;
 155         struct {                /* or a character class */
 156             size_t  startIndex;
 157             size_t  kidlen;     /* length of string at kid, in jschars */
 158             size_t  index;      /* index into class list */
 159             uint16  bmsize;     /* bitmap size, based on max char code */
 160             JSPackedBool sense;
 161         } ucclass;
 162         struct {                /* or a literal sequence */
 163             jschar  chr;        /* of one character */
 164             size_t  length;     /* or many (via the kid) */
 165         } flat;
 166         struct {
 167             RENode  *kid2;      /* second operand from ALT */
 168             jschar  ch1;        /* match char for ALTPREREQ */
 169             jschar  ch2;        /* ditto, or class index for ALTPREREQ2 */
 170         } altprereq;
 171     } u;
 172 };
 173
 174 #define RE_IS_LETTER(c)     (((c >= 'A') && (c <= 'Z')) ||                    \
 175                              ((c >= 'a') && (c <= 'z')) )
 176 #define RE_IS_LINE_TERM(c)  ((c == '\n') || (c == '\r') ||                    \
 177                              (c == LINE_SEPARATOR) || (c == PARA_SEPARATOR))
 178
 179 #define CLASS_CACHE_SIZE    4
 180
 181 typedef struct CompilerState {
 182     JSContext       *context;
 183     TokenStream     *tokenStream; /* For reporting errors */
 184     const jschar    *cpbegin;
 185     const jschar    *cpend;
 186     const jschar    *cp;
 187     size_t          parenCount;
 188     size_t          classCount;   /* number of [] encountered */
 189     size_t          treeDepth;    /* maximum depth of parse tree */
 190     size_t          progLength;   /* estimated bytecode length */
 191     RENode          *result;
 192     size_t          classBitmapsMem; /* memory to hold all class bitmaps */
 193     struct {
 194         const jschar *start;        /* small cache of class strings */
 195         size_t length;              /* since they're often the same */
 196         size_t index;
 197     } classCache[CLASS_CACHE_SIZE];
 198     uint16          flags;
 199 } CompilerState;
 200
 201 typedef struct EmitStateStackEntry {
 202     jsbytecode      *altHead;       /* start of REOP_ALT* opcode */
 203     jsbytecode      *nextAltFixup;  /* fixup pointer to next-alt offset */
 204     jsbytecode      *nextTermFixup; /* fixup ptr. to REOP_JUMP offset */
 205     jsbytecode      *endTermFixup;  /* fixup ptr. to REOPT_ALTPREREQ* offset */
 206     RENode          *continueNode;  /* original REOP_ALT* node being stacked */
 207     jsbytecode      continueOp;     /* REOP_JUMP or REOP_ENDALT continuation */
 208     JSPackedBool    jumpToJumpFlag; /* true if we've patched jump-to-jump to
 209                                        avoid 16-bit unsigned offset overflow */
 210 } EmitStateStackEntry;
 211
 212 /*
 213  * Immediate operand sizes and getter/setters.  Unlike the ones in jsopcode.h,
 214  * the getters and setters take the pc of the offset, not of the opcode before
 215  * the offset.
 216  */
 217 #define ARG_LEN             2
 218 #define GET_ARG(pc)         ((uint16)(((pc)[0] << 8) | (pc)[1]))
 219 #define SET_ARG(pc, arg)    ((pc)[0] = (jsbytecode) ((arg) >> 8),       \
 220                              (pc)[1] = (jsbytecode) (arg))
 221
 222 #define OFFSET_LEN          ARG_LEN
 223 #define OFFSET_MAX          (JS_BIT(ARG_LEN * 8) - 1)
 224 #define GET_OFFSET(pc)      GET_ARG(pc)
 225
 226 /*
 227  * Maximum supported tree depth is maximum size of EmitStateStackEntry stack.
 228  * For sanity, we limit it to 2^24 bytes.
 229  */
 230 #define TREE_DEPTH_MAX  (JS_BIT(24) / sizeof(EmitStateStackEntry))
 231
 232 /*
 233  * The maximum memory that can be allocated for class bitmaps.
 234  * For sanity, we limit it to 2^24 bytes.
 235  */
 236 #define CLASS_BITMAPS_MEM_LIMIT JS_BIT(24)
 237
 238 /*
 239  * Functions to get size and write/read bytecode that represent small indexes
 240  * compactly.
 241  * Each byte in the code represent 7-bit chunk of the index. 8th bit when set
 242  * indicates that the following byte brings more bits to the index. Otherwise
 243  * this is the last byte in the index bytecode representing highest index bits.
 244  */
 245 static size_t
 246 GetCompactIndexWidth(size_t index)
 247 {
 248     size_t width;
 249
 250     for (width = 1; (index >>= 7) != 0; ++width) { }
 251     return width;
 252 }
 253
 254 static JS_ALWAYS_INLINE jsbytecode *
 255 WriteCompactIndex(jsbytecode *pc, size_t index)
 256 {
 257     size_t next;
 258
 259     while ((next = index >> 7) != 0) {
 260         *pc++ = (jsbytecode)(index | 0x80);
 261         index = next;
 262     }
 263     *pc++ = (jsbytecode)index;
 264     return pc;
 265 }
 266
 267 static JS_ALWAYS_INLINE jsbytecode *
 268 ReadCompactIndex(jsbytecode *pc, size_t *result)
 269 {
 270     size_t nextByte;
 271
 272     nextByte = *pc++;
 273     if ((nextByte & 0x80) == 0) {
 274         /*
 275          * Short-circuit the most common case when compact index <= 127.
 276          */
 277         *result = nextByte;
 278     } else {
 279         size_t shift = 7;
 280         *result = 0x7F & nextByte;
 281         do {
 282             nextByte = *pc++;
 283             *result |= (nextByte & 0x7F) << shift;
 284             shift += 7;
 285         } while ((nextByte & 0x80) != 0);
 286     }
 287     return pc;
 288 }
 289
 290 typedef struct RECapture {
 291     ptrdiff_t index;           /* start of contents, -1 for empty  */
 292     size_t length;             /* length of capture */
 293 } RECapture;
 294
 295 typedef struct REMatchState {
 296     const jschar *cp;
 297     RECapture parens[1];      /* first of 're->parenCount' captures,
 298                                  allocated at end of this struct */
 299 } REMatchState;
 300
 301 struct REBackTrackData;
 302
 303 typedef struct REProgState {
 304     jsbytecode *continue_pc;        /* current continuation data */
 305     jsbytecode continue_op;
 306     ptrdiff_t index;                /* progress in text */
 307     size_t parenSoFar;              /* highest indexed paren started */
 308     union {
 309         struct {
 310             uintN min;             /* current quantifier limits */
 311             uintN max;
 312         } quantifier;
 313         struct {
 314             size_t top;             /* backtrack stack state */
 315             size_t sz;
 316         } assertion;
 317     } u;
 318 } REProgState;
 319
 320 typedef struct REBackTrackData {
 321     size_t sz;                      /* size of previous stack entry */
 322     jsbytecode *backtrack_pc;       /* where to backtrack to */
 323     jsbytecode backtrack_op;
 324     const jschar *cp;               /* index in text of match at backtrack */
 325     size_t parenIndex;              /* start index of saved paren contents */
 326     size_t parenCount;              /* # of saved paren contents */
 327     size_t saveStateStackTop;       /* number of parent states */
 328     /* saved parent states follow */
 329     /* saved paren contents follow */
 330 } REBackTrackData;
 331
 332 #define INITIAL_STATESTACK  100
 333 #define INITIAL_BACKTRACK   8000
 334
 335 typedef struct REGlobalData {
 336     JSContext *cx;
 337     JSRegExp *regexp;               /* the RE in execution */
 338     JSBool ok;                      /* runtime error (out_of_memory only?) */
 339     size_t start;                   /* offset to start at */
 340     ptrdiff_t skipped;              /* chars skipped anchoring this r.e. */
 341     const jschar    *cpbegin;       /* text base address */
 342     const jschar    *cpend;         /* text limit address */
 343
 344     REProgState *stateStack;        /* stack of state of current parents */
 345     size_t stateStackTop;
 346     size_t stateStackLimit;
 347
 348     REBackTrackData *backTrackStack;/* stack of matched-so-far positions */
 349     REBackTrackData *backTrackSP;
 350     size_t backTrackStackSize;
 351     size_t cursz;                   /* size of current stack entry */
 352     size_t backTrackCount;          /* how many times we've backtracked */
 353     size_t backTrackLimit;          /* upper limit on backtrack states */
 354 } REGlobalData;
 355
 356 void
 357 JSRegExpStatics::clearRoots()
 358 {
 359     input = NULL;
 360     cx->runtime->gcPoke = JS_TRUE;
 361 }
 362
 363 bool
 364 JSRegExpStatics::copy(const JSRegExpStatics& other)
 365 {
 366     clearRoots();
 367     input = other.input;
 368     multiline = other.multiline;
 369     lastMatch = other.lastMatch;
 370     lastParen = other.lastParen;
 371     leftContext = other.leftContext;
 372     rightContext = other.rightContext;
 373     if (!parens.resize(other.parens.length()))
 374         return false;
 375     memcpy(parens.begin(), other.parens.begin(), sizeof(JSSubString) * parens.length());
 376     return true;
 377 }
 378
 379 void
 380 JSRegExpStatics::clear()
 381 {
 382     clearRoots();
 383     multiline = false;
 384     lastMatch = lastParen = leftContext = rightContext = js_EmptySubString;
 385     parens.clear();
 386 }
 387
 388 /*
 389  * 1. If IgnoreCase is false, return ch.
 390  * 2. Let u be ch converted to upper case as if by calling
 391  *    String.prototype.toUpperCase on the one-character string ch.
 392  * 3. If u does not consist of a single character, return ch.
 393  * 4. Let cu be u's character.
 394  * 5. If ch's code point value is greater than or equal to decimal 128 and cu's
 395  *    code point value is less than decimal 128, then return ch.
 396  * 6. Return cu.
 397  */
 398 static JS_ALWAYS_INLINE uintN
 399 upcase(uintN ch)
 400 {
 401     uintN cu;
 402
 403     JS_ASSERT((uintN) (jschar) ch == ch);
 404     if (ch < 128) {
 405         if (ch - (uintN) 'a' <= (uintN) ('z' - 'a'))
 406             ch -= (uintN) ('a' - 'A');
 407         return ch;
 408     }
 409
 410     cu = JS_TOUPPER(ch);
 411     return (cu < 128) ? ch : cu;
 412 }
 413
 414 /*
 415  * Return the 'canonical' inverse upcase of |ch|. That is the character
 416  * |lch| such that |upcase(lch) == ch| and (|lch| is the lower-case form
 417  * of |ch| or is |ch|).
 418  */
 419 static inline jschar inverse_upcase(jschar ch)
 420 {
 421     jschar lch = JS_TOLOWER(ch);
 422     return (upcase(lch) == ch) ? lch : ch;
 423 }
 424
 425 /* Construct and initialize an RENode, returning NULL for out-of-memory */
 426 static RENode *
 427 NewRENode(CompilerState *state, REOp op)
 428 {
 429     JSContext *cx;
 430     RENode *ren;
 431
 432     cx = state->context;
 433     cx->tempPool.allocateCast<RENode *>(ren, sizeof *ren);
 434     if (!ren) {
 435         js_ReportOutOfScriptQuota(cx);
 436         return NULL;
 437     }
 438     ren->op = op;
 439     ren->next = NULL;
 440     ren->kid = NULL;
 441     return ren;
 442 }
 443
 444 /*
 445  * Validates and converts hex ascii value.
 446  */
 447 static JSBool
 448 isASCIIHexDigit(jschar c, uintN *digit)
 449 {
 450     uintN cv = c;
 451
 452     if (cv < '0')
 453         return JS_FALSE;
 454     if (cv <= '9') {
 455         *digit = cv - '0';
 456         return JS_TRUE;
 457     }
 458     cv |= 0x20;
 459     if (cv >= 'a' && cv <= 'f') {
 460         *digit = cv - 'a' + 10;
 461         return JS_TRUE;
 462     }
 463     return JS_FALSE;
 464 }
 465
 466
 467 typedef struct {
 468     REOp op;
 469     const jschar *errPos;
 470     size_t parenIndex;
 471 } REOpData;
 472
 473 static JSBool
 474 ReportRegExpErrorHelper(CompilerState *state, uintN flags, uintN errorNumber,
 475                         const jschar *arg)
 476 {
 477     if (state->tokenStream) {
 478         return ReportCompileErrorNumber(state->context, state->tokenStream,
 479                                         NULL, JSREPORT_UC | flags, errorNumber, arg);
 480     }
 481     return JS_ReportErrorFlagsAndNumberUC(state->context, flags,
 482                                           js_GetErrorMessage, NULL,
 483                                           errorNumber, arg);
 484 }
 485
 486 static JSBool
 487 ReportRegExpError(CompilerState *state, uintN flags, uintN errorNumber)
 488 {
 489     return ReportRegExpErrorHelper(state, flags, errorNumber, NULL);
 490 }
 491
 492 /*
 493  * Process the op against the two top operands, reducing them to a single
 494  * operand in the penultimate slot. Update progLength and treeDepth.
 495  */
 496 static JSBool
 497 ProcessOp(CompilerState *state, REOpData *opData, RENode **operandStack,
 498           intN operandSP)
 499 {
 500     RENode *result;
 501
 502     switch (opData->op) {
 503       case REOP_ALT:
 504         result = NewRENode(state, REOP_ALT);
 505         if (!result)
 506             return JS_FALSE;
 507         result->kid = operandStack[operandSP - 2];
 508         result->u.kid2 = operandStack[operandSP - 1];
 509         operandStack[operandSP - 2] = result;
 510
 511         if (state->treeDepth == TREE_DEPTH_MAX) {
 512             ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX);
 513             return JS_FALSE;
 514         }
 515         ++state->treeDepth;
 516
 517         /*
 518          * Look at both alternates to see if there's a FLAT or a CLASS at
 519          * the start of each. If so, use a prerequisite match.
 520          */
 521         if (((RENode *) result->kid)->op == REOP_FLAT &&
 522             ((RENode *) result->u.kid2)->op == REOP_FLAT &&
 523             (state->flags & JSREG_FOLD) == 0) {
 524             result->op = REOP_ALTPREREQ;
 525             result->u.altprereq.ch1 = ((RENode *) result->kid)->u.flat.chr;
 526             result->u.altprereq.ch2 = ((RENode *) result->u.kid2)->u.flat.chr;
 527             /* ALTPREREQ, <end>, uch1, uch2, <next>, ...,
 528                                             JUMP, <end> ... ENDALT */
 529             state->progLength += 13;
 530         }
 531         else
 532         if (((RENode *) result->kid)->op == REOP_CLASS &&
 533             ((RENode *) result->kid)->u.ucclass.index < 256 &&
 534             ((RENode *) result->u.kid2)->op == REOP_FLAT &&
 535             (state->flags & JSREG_FOLD) == 0) {
 536             result->op = REOP_ALTPREREQ2;
 537             result->u.altprereq.ch1 = ((RENode *) result->u.kid2)->u.flat.chr;
 538             result->u.altprereq.ch2 = jschar(((RENode *) result->kid)->u.ucclass.index);
 539             /* ALTPREREQ2, <end>, uch1, uch2, <next>, ...,
 540                                             JUMP, <end> ... ENDALT */
 541             state->progLength += 13;
 542         }
 543         else
 544         if (((RENode *) result->kid)->op == REOP_FLAT &&
 545             ((RENode *) result->u.kid2)->op == REOP_CLASS &&
 546             ((RENode *) result->u.kid2)->u.ucclass.index < 256 &&
 547             (state->flags & JSREG_FOLD) == 0) {
 548             result->op = REOP_ALTPREREQ2;
 549             result->u.altprereq.ch1 = ((RENode *) result->kid)->u.flat.chr;
 550             result->u.altprereq.ch2 =
 551                 jschar(((RENode *) result->u.kid2)->u.ucclass.index);
 552             /* ALTPREREQ2, <end>, uch1, uch2, <next>, ...,
 553                                           JUMP, <end> ... ENDALT */
 554             state->progLength += 13;
 555         }
 556         else {
 557             /* ALT, <next>, ..., JUMP, <end> ... ENDALT */
 558             state->progLength += 7;
 559         }
 560         break;
 561
 562       case REOP_CONCAT:
 563         result = operandStack[operandSP - 2];
 564         while (result->next)
 565             result = result->next;
 566         result->next = operandStack[operandSP - 1];
 567         break;
 568
 569       case REOP_ASSERT:
 570       case REOP_ASSERT_NOT:
 571       case REOP_LPARENNON:
 572       case REOP_LPAREN:
 573         /* These should have been processed by a close paren. */
 574         ReportRegExpErrorHelper(state, JSREPORT_ERROR, JSMSG_MISSING_PAREN,
 575                                 opData->errPos);
 576         return JS_FALSE;
 577
 578       default:;
 579     }
 580     return JS_TRUE;
 581 }
 582
 583 /*
 584  * Parser forward declarations.
 585  */
 586 static JSBool ParseTerm(CompilerState *state);
 587 static JSBool ParseQuantifier(CompilerState *state);
 588 static intN ParseMinMaxQuantifier(CompilerState *state, JSBool ignoreValues);
 589
 590 /*
 591  * Top-down regular expression grammar, based closely on Perl4.
 592  *
 593  *  regexp:     altern                  A regular expression is one or more
 594  *              altern '|' regexp       alternatives separated by vertical bar.
 595  */
 596 #define INITIAL_STACK_SIZE  128
 597
 598 static JSBool
 599 ParseRegExp(CompilerState *state)
 600 {
 601     size_t parenIndex;
 602     RENode *operand;
 603     REOpData *operatorStack;
 604     RENode **operandStack;
 605     REOp op;
 606     intN i;
 607     JSBool result = JS_FALSE;
 608
 609     intN operatorSP = 0, operatorStackSize = INITIAL_STACK_SIZE;
 610     intN operandSP = 0, operandStackSize = INITIAL_STACK_SIZE;
 611
 612     /* Watch out for empty regexp */
 613     if (state->cp == state->cpend) {
 614         state->result = NewRENode(state, REOP_EMPTY);
 615         return (state->result != NULL);
 616     }
 617
 618     operatorStack = (REOpData *)
 619         state->context->malloc(sizeof(REOpData) * operatorStackSize);
 620     if (!operatorStack)
 621         return JS_FALSE;
 622
 623     operandStack = (RENode **)
 624         state->context->malloc(sizeof(RENode *) * operandStackSize);
 625     if (!operandStack)
 626         goto out;
 627
 628     for (;;) {
 629         parenIndex = state->parenCount;
 630         if (state->cp == state->cpend) {
 631             /*
 632              * If we are at the end of the regexp and we're short one or more
 633              * operands, the regexp must have the form /x|/ or some such, with
 634              * left parentheses making us short more than one operand.
 635              */
 636             if (operatorSP >= operandSP) {
 637                 operand = NewRENode(state, REOP_EMPTY);
 638                 if (!operand)
 639                     goto out;
 640                 goto pushOperand;
 641             }
 642         } else {
 643             switch (*state->cp) {
 644               case '(':
 645                 ++state->cp;
 646                 if (state->cp + 1 < state->cpend &&
 647                     *state->cp == '?' &&
 648                     (state->cp[1] == '=' ||
 649                      state->cp[1] == '!' ||
 650                      state->cp[1] == ':')) {
 651                     switch (state->cp[1]) {
 652                       case '=':
 653                         op = REOP_ASSERT;
 654                         /* ASSERT, <next>, ... ASSERTTEST */
 655                         state->progLength += 4;
 656                         break;
 657                       case '!':
 658                         op = REOP_ASSERT_NOT;
 659                         /* ASSERTNOT, <next>, ... ASSERTNOTTEST */
 660                         state->progLength += 4;
 661                         break;
 662                       default:
 663                         op = REOP_LPARENNON;
 664                         break;
 665                     }
 666                     state->cp += 2;
 667                 } else {
 668                     op = REOP_LPAREN;
 669                     /* LPAREN, <index>, ... RPAREN, <index> */
 670                     state->progLength
 671                         += 2 * (1 + GetCompactIndexWidth(parenIndex));
 672                     state->parenCount++;
 673                     if (state->parenCount == 65535) {
 674                         ReportRegExpError(state, JSREPORT_ERROR,
 675                                           JSMSG_TOO_MANY_PARENS);
 676                         goto out;
 677                     }
 678                 }
 679                 goto pushOperator;
 680
 681               case ')':
 682                 /*
 683                  * If there's no stacked open parenthesis, throw syntax error.
 684                  */
 685                 for (i = operatorSP - 1; ; i--) {
 686                     if (i < 0) {
 687                         ReportRegExpError(state, JSREPORT_ERROR,
 688                                           JSMSG_UNMATCHED_RIGHT_PAREN);
 689                         goto out;
 690                     }
 691                     if (operatorStack[i].op == REOP_ASSERT ||
 692                         operatorStack[i].op == REOP_ASSERT_NOT ||
 693                         operatorStack[i].op == REOP_LPARENNON ||
 694                         operatorStack[i].op == REOP_LPAREN) {
 695                         break;
 696                     }
 697                 }
 698                 /* FALL THROUGH */
 699
 700               case '|':
 701                 /* Expected an operand before these, so make an empty one */
 702                 operand = NewRENode(state, REOP_EMPTY);
 703                 if (!operand)
 704                     goto out;
 705                 goto pushOperand;
 706
 707               default:
 708                 if (!ParseTerm(state))
 709                     goto out;
 710                 operand = state->result;
 711 pushOperand:
 712                 if (operandSP == operandStackSize) {
 713                     RENode **tmp;
 714                     operandStackSize += operandStackSize;
 715                     tmp = (RENode **)
 716                         state->context->realloc(operandStack,
 717                                                 sizeof(RENode *) * operandStackSize);
 718                     if (!tmp)
 719                         goto out;
 720                     operandStack = tmp;
 721                 }
 722                 operandStack[operandSP++] = operand;
 723                 break;
 724             }
 725         }
 726
 727         /* At the end; process remaining operators. */
 728 restartOperator:
 729         if (state->cp == state->cpend) {
 730             while (operatorSP) {
 731                 --operatorSP;
 732                 if (!ProcessOp(state, &operatorStack[operatorSP],
 733                                operandStack, operandSP))
 734                     goto out;
 735                 --operandSP;
 736             }
 737             JS_ASSERT(operandSP == 1);
 738             state->result = operandStack[0];
 739             result = JS_TRUE;
 740             goto out;
 741         }
 742
 743         switch (*state->cp) {
 744           case '|':
 745             /* Process any stacked 'concat' operators */
 746             ++state->cp;
 747             while (operatorSP &&
 748                    operatorStack[operatorSP - 1].op == REOP_CONCAT) {
 749                 --operatorSP;
 750                 if (!ProcessOp(state, &operatorStack[operatorSP],
 751                                operandStack, operandSP)) {
 752                     goto out;
 753                 }
 754                 --operandSP;
 755             }
 756             op = REOP_ALT;
 757             goto pushOperator;
 758
 759           case ')':
 760             /*
 761              * If there's no stacked open parenthesis, throw syntax error.
 762              */
 763             for (i = operatorSP - 1; ; i--) {
 764                 if (i < 0) {
 765                     ReportRegExpError(state, JSREPORT_ERROR,
 766                                       JSMSG_UNMATCHED_RIGHT_PAREN);
 767                     goto out;
 768                 }
 769                 if (operatorStack[i].op == REOP_ASSERT ||
 770                     operatorStack[i].op == REOP_ASSERT_NOT ||
 771                     operatorStack[i].op == REOP_LPARENNON ||
 772                     operatorStack[i].op == REOP_LPAREN) {
 773                     break;
 774                 }
 775             }
 776             ++state->cp;
 777
 778             /* Process everything on the stack until the open parenthesis. */
 779             for (;;) {
 780                 JS_ASSERT(operatorSP);
 781                 --operatorSP;
 782                 switch (operatorStack[operatorSP].op) {
 783                   case REOP_ASSERT:
 784                   case REOP_ASSERT_NOT:
 785                   case REOP_LPAREN:
 786                     operand = NewRENode(state, operatorStack[operatorSP].op);
 787                     if (!operand)
 788                         goto out;
 789                     operand->u.parenIndex =
 790                         operatorStack[operatorSP].parenIndex;
 791                     JS_ASSERT(operandSP);
 792                     operand->kid = operandStack[operandSP - 1];
 793                     operandStack[operandSP - 1] = operand;
 794                     if (state->treeDepth == TREE_DEPTH_MAX) {
 795                         ReportRegExpError(state, JSREPORT_ERROR,
 796                                           JSMSG_REGEXP_TOO_COMPLEX);
 797                         goto out;
 798                     }
 799                     ++state->treeDepth;
 800                     /* FALL THROUGH */
 801
 802                   case REOP_LPARENNON:
 803                     state->result = operandStack[operandSP - 1];
 804                     if (!ParseQuantifier(state))
 805                         goto out;
 806                     operandStack[operandSP - 1] = state->result;
 807                     goto restartOperator;
 808                   default:
 809                     if (!ProcessOp(state, &operatorStack[operatorSP],
 810                                    operandStack, operandSP))
 811                         goto out;
 812                     --operandSP;
 813                     break;
 814                 }
 815             }
 816             break;
 817
 818           case '{':
 819           {
 820             const jschar *errp = state->cp;
 821
 822             if (ParseMinMaxQuantifier(state, JS_TRUE) < 0) {
 823                 /*
 824                  * This didn't even scan correctly as a quantifier, so we should
 825                  * treat it as flat.
 826                  */
 827                 op = REOP_CONCAT;
 828                 goto pushOperator;
 829             }
 830
 831             state->cp = errp;
 832             /* FALL THROUGH */
 833           }
 834
 835           case '+':
 836           case '*':
 837           case '?':
 838             ReportRegExpErrorHelper(state, JSREPORT_ERROR, JSMSG_BAD_QUANTIFIER,
 839                                     state->cp);
 840             result = JS_FALSE;
 841             goto out;
 842
 843           default:
 844             /* Anything else is the start of the next term. */
 845             op = REOP_CONCAT;
 846 pushOperator:
 847             if (operatorSP == operatorStackSize) {
 848                 REOpData *tmp;
 849                 operatorStackSize += operatorStackSize;
 850                 tmp = (REOpData *)
 851                     state->context->realloc(operatorStack,
 852                                             sizeof(REOpData) * operatorStackSize);
 853                 if (!tmp)
 854                     goto out;
 855                 operatorStack = tmp;
 856             }
 857             operatorStack[operatorSP].op = op;
 858             operatorStack[operatorSP].errPos = state->cp;
 859             operatorStack[operatorSP++].parenIndex = parenIndex;
 860             break;
 861         }
 862     }
 863 out:
 864     if (operatorStack)
 865         state->context->free(operatorStack);
 866     if (operandStack)
 867         state->context->free(operandStack);
 868     return result;
 869 }
 870
 871 /*
 872  * Hack two bits in CompilerState.flags, for use within FindParenCount to flag
 873  * its being on the stack, and to propagate errors to its callers.
 874  */
 875 #define JSREG_FIND_PAREN_COUNT  0x8000
 876 #define JSREG_FIND_PAREN_ERROR  0x4000
 877
 878 /*
 879  * Magic return value from FindParenCount and GetDecimalValue, to indicate
 880  * overflow beyond GetDecimalValue's max parameter, or a computed maximum if
 881  * its findMax parameter is non-null.
 882  */
 883 #define OVERFLOW_VALUE          ((uintN)-1)
 884
 885 static uintN
 886 FindParenCount(CompilerState *state)
 887 {
 888     CompilerState temp;
 889     int i;
 890
 891     if (state->flags & JSREG_FIND_PAREN_COUNT)
 892         return OVERFLOW_VALUE;
 893
 894     /*
 895      * Copy state into temp, flag it so we never report an invalid backref,
 896      * and reset its members to parse the entire regexp.  This is obviously
 897      * suboptimal, but GetDecimalValue calls us only if a backref appears to
 898      * refer to a forward parenthetical, which is rare.
 899      */
 900     temp = *state;
 901     temp.flags |= JSREG_FIND_PAREN_COUNT;
 902     temp.cp = temp.cpbegin;
 903     temp.parenCount = 0;
 904     temp.classCount = 0;
 905     temp.progLength = 0;
 906     temp.treeDepth = 0;
 907     temp.classBitmapsMem = 0;
 908     for (i = 0; i < CLASS_CACHE_SIZE; i++)
 909         temp.classCache[i].start = NULL;
 910
 911     if (!ParseRegExp(&temp)) {
 912         state->flags |= JSREG_FIND_PAREN_ERROR;
 913         return OVERFLOW_VALUE;
 914     }
 915     return temp.parenCount;
 916 }
 917
 918 /*
 919  * Extract and return a decimal value at state->cp.  The initial character c
 920  * has already been read.  Return OVERFLOW_VALUE if the result exceeds max.
 921  * Callers who pass a non-null findMax should test JSREG_FIND_PAREN_ERROR in
 922  * state->flags to discover whether an error occurred under findMax.
 923  */
 924 static uintN
 925 GetDecimalValue(jschar c, uintN max, uintN (*findMax)(CompilerState *state),
 926                 CompilerState *state)
 927 {
 928     uintN value = JS7_UNDEC(c);
 929     JSBool overflow = (value > max && (!findMax || value > findMax(state)));
 930
 931     /* The following restriction allows simpler overflow checks. */
 932     JS_ASSERT(max <= ((uintN)-1 - 9) / 10);
 933     while (state->cp < state->cpend) {
 934         c = *state->cp;
 935         if (!JS7_ISDEC(c))
 936             break;
 937         value = 10 * value + JS7_UNDEC(c);
 938         if (!overflow && value > max && (!findMax || value > findMax(state)))
 939             overflow = JS_TRUE;
 940         ++state->cp;
 941     }
 942     return overflow ? OVERFLOW_VALUE : value;
 943 }
 944
 945 /*
 946  * Calculate the total size of the bitmap required for a class expression.
 947  */
 948 static JSBool
 949 CalculateBitmapSize(CompilerState *state, RENode *target, const jschar *src,
 950                     const jschar *end)
 951 {
 952     uintN max = 0;
 953     JSBool inRange = JS_FALSE;
 954     jschar c, rangeStart = 0;
 955     uintN n, digit, nDigits, i;
 956
 957     target->u.ucclass.bmsize = 0;
 958     target->u.ucclass.sense = JS_TRUE;
 959
 960     if (src == end)
 961         return JS_TRUE;
 962
 963     if (*src == '^') {
 964         ++src;
 965         target->u.ucclass.sense = JS_FALSE;
 966     }
 967
 968     while (src != end) {
 969         JSBool canStartRange = JS_TRUE;
 970         jschar localMax = 0;
 971
 972         switch (*src) {
 973           case '\\':
 974             ++src;
 975             c = *src++;
 976             switch (c) {
 977               case 'b':
 978                 localMax = 0x8;
 979                 break;
 980               case 'f':
 981                 localMax = 0xC;
 982                 break;
 983               case 'n':
 984                 localMax = 0xA;
 985                 break;
 986               case 'r':
 987                 localMax = 0xD;
 988                 break;
 989               case 't':
 990                 localMax = 0x9;
 991                 break;
 992               case 'v':
 993                 localMax = 0xB;
 994                 break;
 995               case 'c':
 996                 if (src < end && RE_IS_LETTER(*src)) {
 997                     localMax = (uintN) (*src++) & 0x1F;
 998                 } else {
 999                     --src;
1000                     localMax = '\\';
1001                 }
1002                 break;
1003               case 'x':
1004                 nDigits = 2;
1005                 goto lexHex;
1006               case 'u':
1007                 nDigits = 4;
1008 lexHex:
1009                 n = 0;
1010                 for (i = 0; (i < nDigits) && (src < end); i++) {
1011                     c = *src++;
1012                     if (!isASCIIHexDigit(c, &digit)) {
1013                         /*
1014                          * Back off to accepting the original
1015                          *'\' as a literal.
1016                          */
1017                         src -= i + 1;
1018                         n = '\\';
1019                         break;
1020                     }
1021                     n = (n << 4) | digit;
1022                 }
1023                 localMax = jschar(n);
1024                 break;
1025               case 'd':
1026                 canStartRange = JS_FALSE;
1027                 if (inRange) {
1028                     JS_ReportErrorNumber(state->context,
1029                                          js_GetErrorMessage, NULL,
1030                                          JSMSG_BAD_CLASS_RANGE);
1031                     return JS_FALSE;
1032                 }
1033                 localMax = '9';
1034                 break;
1035               case 'D':
1036               case 's':
1037               case 'S':
1038               case 'w':
1039               case 'W':
1040                 canStartRange = JS_FALSE;
1041                 if (inRange) {
1042                     JS_ReportErrorNumber(state->context,
1043                                          js_GetErrorMessage, NULL,
1044                                          JSMSG_BAD_CLASS_RANGE);
1045                     return JS_FALSE;
1046                 }
1047                 max = 65535;
1048
1049                 /*
1050                  * If this is the start of a range, ensure that it's less than
1051                  * the end.
1052                  */
1053                 localMax = 0;
1054                 break;
1055               case '0':
1056               case '1':
1057               case '2':
1058               case '3':
1059               case '4':
1060               case '5':
1061               case '6':
1062               case '7':
1063                 /*
1064                  *  This is a non-ECMA extension - decimal escapes (in this
1065                  *  case, octal!) are supposed to be an error inside class
1066                  *  ranges, but supported here for backwards compatibility.
1067                  *
1068                  */
1069                 n = JS7_UNDEC(c);
1070                 c = *src;
1071                 if ('0' <= c && c <= '7') {
1072                     src++;
1073                     n = 8 * n + JS7_UNDEC(c);
1074                     c = *src;
1075                     if ('0' <= c && c <= '7') {
1076                         src++;
1077                         i = 8 * n + JS7_UNDEC(c);
1078                         if (i <= 0377)
1079                             n = i;
1080                         else
1081                             src--;
1082                     }
1083                 }
1084                 localMax = jschar(n);
1085                 break;
1086
1087               default:
1088                 localMax = c;
1089                 break;
1090             }
1091             break;
1092           default:
1093             localMax = *src++;
1094             break;
1095         }
1096
1097         if (inRange) {
1098             /* Throw a SyntaxError here, per ECMA-262, 15.10.2.15. */
1099             if (rangeStart > localMax) {
1100                 JS_ReportErrorNumber(state->context,
1101                                      js_GetErrorMessage, NULL,
1102                                      JSMSG_BAD_CLASS_RANGE);
1103                 return JS_FALSE;
1104             }
1105             inRange = JS_FALSE;
1106         } else {
1107             if (canStartRange && src < end - 1) {
1108                 if (*src == '-') {
1109                     ++src;
1110                     inRange = JS_TRUE;
1111                     rangeStart = (jschar)localMax;
1112                     continue;
1113                 }
1114             }
1115             if (state->flags & JSREG_FOLD)
1116                 rangeStart = localMax;   /* one run of the uc/dc loop below */
1117         }
1118
1119         if (state->flags & JSREG_FOLD) {
1120             jschar maxch = localMax;
1121
1122             for (i = rangeStart; i <= localMax; i++) {
1123                 jschar uch, dch;
1124
1125                 uch = jschar(upcase(i));
1126                 dch = inverse_upcase(jschar(i));
1127                 maxch = JS_MAX(maxch, uch);
1128                 maxch = JS_MAX(maxch, dch);
1129             }
1130             localMax = maxch;
1131         }
1132
1133         if (localMax > max)
1134             max = uintN(localMax);
1135     }
1136     target->u.ucclass.bmsize = uint16(max);
1137     return JS_TRUE;
1138 }
1139
1140 /*
1141  *  item:       assertion               An item is either an assertion or
1142  *              quantatom               a quantified atom.
1143  *
1144  *  assertion:  '^'                     Assertions match beginning of string
1145  *                                      (or line if the class static property
1146  *                                      RegExp.multiline is true).
1147  *              '$'                     End of string (or line if the class
1148  *                                      static property RegExp.multiline is
1149  *                                      true).
1150  *              '\b'                    Word boundary (between \w and \W).
1151  *              '\B'                    Word non-boundary.
1152  *
1153  *  quantatom:  atom                    An unquantified atom.
1154  *              quantatom '{' n ',' m '}'
1155  *                                      Atom must occur between n and m times.
1156  *              quantatom '{' n ',' '}' Atom must occur at least n times.
1157  *              quantatom '{' n '}'     Atom must occur exactly n times.
1158  *              quantatom '*'           Zero or more times (same as {0,}).
1159  *              quantatom '+'           One or more times (same as {1,}).
1160  *              quantatom '?'           Zero or one time (same as {0,1}).
1161  *
1162  *              any of which can be optionally followed by '?' for ungreedy
1163  *
1164  *  atom:       '(' regexp ')'          A parenthesized regexp (what matched
1165  *                                      can be addressed using a backreference,
1166  *                                      see '\' n below).
1167  *              '.'                     Matches any char except '\n'.
1168  *              '[' classlist ']'       A character class.
1169  *              '[' '^' classlist ']'   A negated character class.
1170  *              '\f'                    Form Feed.
1171  *              '\n'                    Newline (Line Feed).
1172  *              '\r'                    Carriage Return.
1173  *              '\t'                    Horizontal Tab.
1174  *              '\v'                    Vertical Tab.
1175  *              '\d'                    A digit (same as [0-9]).
1176  *              '\D'                    A non-digit.
1177  *              '\w'                    A word character, [0-9a-z_A-Z].
1178  *              '\W'                    A non-word character.
1179  *              '\s'                    A whitespace character, [ \b\f\n\r\t\v].
1180  *              '\S'                    A non-whitespace character.
1181  *              '\' n                   A backreference to the nth (n decimal
1182  *                                      and positive) parenthesized expression.
1183  *              '\' octal               An octal escape sequence (octal must be
1184  *                                      two or three digits long, unless it is
1185  *                                      0 for the null character).
1186  *              '\x' hex                A hex escape (hex must be two digits).
1187  *              '\u' unicode            A unicode escape (must be four digits).
1188  *              '\c' ctrl               A control character, ctrl is a letter.
1189  *              '\' literalatomchar     Any character except one of the above
1190  *                                      that follow '\' in an atom.
1191  *              otheratomchar           Any character not first among the other
1192  *                                      atom right-hand sides.
1193  */
1194 static JSBool
1195 ParseTerm(CompilerState *state)
1196 {
1197     jschar c = *state->cp++;
1198     uintN nDigits;
1199     uintN num, tmp, n, i;
1200     const jschar *termStart;
1201
1202     switch (c) {
1203     /* assertions and atoms */
1204       case '^':
1205         state->result = NewRENode(state, REOP_BOL);
1206         if (!state->result)
1207             return JS_FALSE;
1208         state->progLength++;
1209         return JS_TRUE;
1210       case '$':
1211         state->result = NewRENode(state, REOP_EOL);
1212         if (!state->result)
1213             return JS_FALSE;
1214         state->progLength++;
1215         return JS_TRUE;
1216       case '\\':
1217         if (state->cp >= state->cpend) {
1218             /* a trailing '\' is an error */
1219             ReportRegExpError(state, JSREPORT_ERROR, JSMSG_TRAILING_SLASH);
1220             return JS_FALSE;
1221         }
1222         c = *state->cp++;
1223         switch (c) {
1224         /* assertion escapes */
1225           case 'b' :
1226             state->result = NewRENode(state, REOP_WBDRY);
1227             if (!state->result)
1228                 return JS_FALSE;
1229             state->progLength++;
1230             return JS_TRUE;
1231           case 'B':
1232             state->result = NewRENode(state, REOP_WNONBDRY);
1233             if (!state->result)
1234                 return JS_FALSE;
1235             state->progLength++;
1236             return JS_TRUE;
1237           /* Decimal escape */
1238           case '0':
1239             /* Give a strict warning. See also the note below. */
1240             if (!ReportRegExpError(state, JSREPORT_WARNING | JSREPORT_STRICT,
1241                                    JSMSG_INVALID_BACKREF)) {
1242                 return JS_FALSE;
1243             }
1244      doOctal:
1245             num = 0;
1246             while (state->cp < state->cpend) {
1247                 c = *state->cp;
1248                 if (c < '0' || '7' < c)
1249                     break;
1250                 state->cp++;
1251                 tmp = 8 * num + (uintN)JS7_UNDEC(c);
1252                 if (tmp > 0377)
1253                     break;
1254                 num = tmp;
1255             }
1256             c = (jschar)num;
1257     doFlat:
1258             state->result = NewRENode(state, REOP_FLAT);
1259             if (!state->result)
1260                 return JS_FALSE;
1261             state->result->u.flat.chr = c;
1262             state->result->u.flat.length = 1;
1263             state->progLength += 3;
1264             break;
1265           case '1':
1266           case '2':
1267           case '3':
1268           case '4':
1269           case '5':
1270           case '6':
1271           case '7':
1272           case '8':
1273           case '9':
1274             termStart = state->cp - 1;
1275             num = GetDecimalValue(c, state->parenCount, FindParenCount, state);
1276             if (state->flags & JSREG_FIND_PAREN_ERROR)
1277                 return JS_FALSE;
1278             if (num == OVERFLOW_VALUE) {
1279                 /* Give a strict mode warning. */
1280                 if (!ReportRegExpError(state,
1281                                        JSREPORT_WARNING | JSREPORT_STRICT,
1282                                        (c >= '8')
1283                                        ? JSMSG_INVALID_BACKREF
1284                                        : JSMSG_BAD_BACKREF)) {
1285                     return JS_FALSE;
1286                 }
1287
1288                 /*
1289                  * Note: ECMA 262, 15.10.2.9 says that we should throw a syntax
1290                  * error here. However, for compatibility with IE, we treat the
1291                  * whole backref as flat if the first character in it is not a
1292                  * valid octal character, and as an octal escape otherwise.
1293                  */
1294                 state->cp = termStart;
1295                 if (c >= '8') {
1296                     /* Treat this as flat. termStart - 1 is the \. */
1297                     c = '\\';
1298                     goto asFlat;
1299                 }
1300
1301                 /* Treat this as an octal escape. */
1302                 goto doOctal;
1303             }
1304
1305             /*
1306              * When FindParenCount calls the regex parser recursively (to find
1307              * the number of backrefs) num can be arbitrary and the maximum
1308              * supported number of backrefs does not bound it.
1309              */
1310             JS_ASSERT_IF(!(state->flags & JSREG_FIND_PAREN_COUNT),
1311                          1 <= num && num <= 0x10000);
1312             state->result = NewRENode(state, REOP_BACKREF);
1313             if (!state->result)
1314                 return JS_FALSE;
1315             state->result->u.parenIndex = num - 1;
1316             state->progLength
1317                 += 1 + GetCompactIndexWidth(state->result->u.parenIndex);
1318             break;
1319           /* Control escape */
1320           case 'f':
1321             c = 0xC;
1322             goto doFlat;
1323           case 'n':
1324             c = 0xA;
1325             goto doFlat;
1326           case 'r':
1327             c = 0xD;
1328             goto doFlat;
1329           case 't':
1330             c = 0x9;
1331             goto doFlat;
1332           case 'v':
1333             c = 0xB;
1334             goto doFlat;
1335           /* Control letter */
1336           case 'c':
1337             if (state->cp < state->cpend && RE_IS_LETTER(*state->cp)) {
1338                 c = (jschar) (*state->cp++ & 0x1F);
1339             } else {
1340                 /* back off to accepting the original '\' as a literal */
1341                 --state->cp;
1342                 c = '\\';
1343             }
1344             goto doFlat;
1345           /* HexEscapeSequence */
1346           case 'x':
1347             nDigits = 2;
1348             goto lexHex;
1349           /* UnicodeEscapeSequence */
1350           case 'u':
1351             nDigits = 4;
1352 lexHex:
1353             n = 0;
1354             for (i = 0; i < nDigits && state->cp < state->cpend; i++) {
1355                 uintN digit;
1356                 c = *state->cp++;
1357                 if (!isASCIIHexDigit(c, &digit)) {
1358                     /*
1359                      * Back off to accepting the original 'u' or 'x' as a
1360                      * literal.
1361                      */
1362                     state->cp -= i + 2;
1363                     n = *state->cp++;
1364                     break;
1365                 }
1366                 n = (n << 4) | digit;
1367             }
1368             c = (jschar) n;
1369             goto doFlat;
1370           /* Character class escapes */
1371           case 'd':
1372             state->result = NewRENode(state, REOP_DIGIT);
1373 doSimple:
1374             if (!state->result)
1375                 return JS_FALSE;
1376             state->progLength++;
1377             break;
1378           case 'D':
1379             state->result = NewRENode(state, REOP_NONDIGIT);
1380             goto doSimple;
1381           case 's':
1382             state->result = NewRENode(state, REOP_SPACE);
1383             goto doSimple;
1384           case 'S':
1385             state->result = NewRENode(state, REOP_NONSPACE);
1386             goto doSimple;
1387           case 'w':
1388             state->result = NewRENode(state, REOP_ALNUM);
1389             goto doSimple;
1390           case 'W':
1391             state->result = NewRENode(state, REOP_NONALNUM);
1392             goto doSimple;
1393           /* IdentityEscape */
1394           default:
1395             state->result = NewRENode(state, REOP_FLAT);
1396             if (!state->result)
1397                 return JS_FALSE;
1398             state->result->u.flat.chr = c;
1399             state->result->u.flat.length = 1;
1400             state->result->kid = (void *) (state->cp - 1);
1401             state->progLength += 3;
1402             break;
1403         }
1404         break;
1405       case '[':
1406         state->result = NewRENode(state, REOP_CLASS);
1407         if (!state->result)
1408             return JS_FALSE;
1409         termStart = state->cp;
1410         state->result->u.ucclass.startIndex = termStart - state->cpbegin;
1411         for (;;) {
1412             if (state->cp == state->cpend) {
1413                 ReportRegExpErrorHelper(state, JSREPORT_ERROR,
1414                                         JSMSG_UNTERM_CLASS, termStart);
1415
1416                 return JS_FALSE;
1417             }
1418             if (*state->cp == '\\') {
1419                 state->cp++;
1420                 if (state->cp != state->cpend)
1421                     state->cp++;
1422                 continue;
1423             }
1424             if (*state->cp == ']') {
1425                 state->result->u.ucclass.kidlen = state->cp - termStart;
1426                 break;
1427             }
1428             state->cp++;
1429         }
1430         for (i = 0; i < CLASS_CACHE_SIZE; i++) {
1431             if (!state->classCache[i].start) {
1432                 state->classCache[i].start = termStart;
1433                 state->classCache[i].length = state->result->u.ucclass.kidlen;
1434                 state->classCache[i].index = state->classCount;
1435                 break;
1436             }
1437             if (state->classCache[i].length ==
1438                 state->result->u.ucclass.kidlen) {
1439                 for (n = 0; ; n++) {
1440                     if (n == state->classCache[i].length) {
1441                         state->result->u.ucclass.index
1442                             = state->classCache[i].index;
1443                         goto claim;
1444                     }
1445                     if (state->classCache[i].start[n] != termStart[n])
1446                         break;
1447                 }
1448             }
1449         }
1450         state->result->u.ucclass.index = state->classCount++;
1451
1452     claim:
1453         /*
1454          * Call CalculateBitmapSize now as we want any errors it finds
1455          * to be reported during the parse phase, not at execution.
1456          */
1457         if (!CalculateBitmapSize(state, state->result, termStart, state->cp++))
1458             return JS_FALSE;
1459         /*
1460          * Update classBitmapsMem with number of bytes to hold bmsize bits,
1461          * which is (bitsCount + 7) / 8 or (highest_bit + 1 + 7) / 8
1462          * or highest_bit / 8 + 1 where highest_bit is u.ucclass.bmsize.
1463          */
1464         n = (state->result->u.ucclass.bmsize >> 3) + 1;
1465         if (n > CLASS_BITMAPS_MEM_LIMIT - state->classBitmapsMem) {
1466             ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX);
1467             return JS_FALSE;
1468         }
1469         state->classBitmapsMem += n;
1470         /* CLASS, <index> */
1471         state->progLength
1472             += 1 + GetCompactIndexWidth(state->result->u.ucclass.index);
1473         break;
1474
1475       case '.':
1476         state->result = NewRENode(state, REOP_DOT);
1477         goto doSimple;
1478
1479       case '{':
1480       {
1481         const jschar *errp = state->cp--;
1482         intN err;
1483
1484         err = ParseMinMaxQuantifier(state, JS_TRUE);
1485         state->cp = errp;
1486
1487         if (err < 0)
1488             goto asFlat;
1489
1490         /* FALL THROUGH */
1491       }
1492       case '*':
1493       case '+':
1494       case '?':
1495         ReportRegExpErrorHelper(state, JSREPORT_ERROR,
1496                                 JSMSG_BAD_QUANTIFIER, state->cp - 1);
1497         return JS_FALSE;
1498       default:
1499 asFlat:
1500         state->result = NewRENode(state, REOP_FLAT);
1501         if (!state->result)
1502             return JS_FALSE;
1503         state->result->u.flat.chr = c;
1504         state->result->u.flat.length = 1;
1505         state->result->kid = (void *) (state->cp - 1);
1506         state->progLength += 3;
1507         break;
1508     }
1509     return ParseQuantifier(state);
1510 }
1511
1512 static JSBool
1513 ParseQuantifier(CompilerState *state)
1514 {
1515     RENode *term;
1516     term = state->result;
1517     if (state->cp < state->cpend) {
1518         switch (*state->cp) {
1519           case '+':
1520             state->result = NewRENode(state, REOP_QUANT);
1521             if (!state->result)
1522                 return JS_FALSE;
1523             state->result->u.range.min = 1;
1524             state->result->u.range.max = (uintN)-1;
1525             /* <PLUS>, <next> ... <ENDCHILD> */
1526             state->progLength += 4;
1527             goto quantifier;
1528           case '*':
1529             state->result = NewRENode(state, REOP_QUANT);
1530             if (!state->result)
1531                 return JS_FALSE;
1532             state->result->u.range.min = 0;
1533             state->result->u.range.max = (uintN)-1;
1534             /* <STAR>, <next> ... <ENDCHILD> */
1535             state->progLength += 4;
1536             goto quantifier;
1537           case '?':
1538             state->result = NewRENode(state, REOP_QUANT);
1539             if (!state->result)
1540                 return JS_FALSE;
1541             state->result->u.range.min = 0;
1542             state->result->u.range.max = 1;
1543             /* <OPT>, <next> ... <ENDCHILD> */
1544             state->progLength += 4;
1545             goto quantifier;
1546           case '{':       /* balance '}' */
1547           {
1548             intN err;
1549             const jschar *errp = state->cp;
1550
1551             err = ParseMinMaxQuantifier(state, JS_FALSE);
1552             if (err == 0)
1553                 goto quantifier;
1554             if (err == -1)
1555                 return JS_TRUE;
1556
1557             ReportRegExpErrorHelper(state, JSREPORT_ERROR, err, errp);
1558             return JS_FALSE;
1559           }
1560           default:;
1561         }
1562     }
1563     return JS_TRUE;
1564
1565 quantifier:
1566     if (state->treeDepth == TREE_DEPTH_MAX) {
1567         ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX);
1568         return JS_FALSE;
1569     }
1570
1571     ++state->treeDepth;
1572     ++state->cp;
1573     state->result->kid = term;
1574     if (state->cp < state->cpend && *state->cp == '?') {
1575         ++state->cp;
1576         state->result->u.range.greedy = JS_FALSE;
1577     } else {
1578         state->result->u.range.greedy = JS_TRUE;
1579     }
1580     return JS_TRUE;
1581 }
1582
1583 static intN
1584 ParseMinMaxQuantifier(CompilerState *state, JSBool ignoreValues)
1585 {
1586     uintN min, max;
1587     jschar c;
1588     const jschar *errp = state->cp++;
1589
1590     c = *state->cp;
1591     if (JS7_ISDEC(c)) {
1592         ++state->cp;
1593         min = GetDecimalValue(c, 0xFFFF, NULL, state);
1594         c = *state->cp;
1595
1596         if (!ignoreValues && min == OVERFLOW_VALUE)
1597             return JSMSG_MIN_TOO_BIG;
1598
1599         if (c == ',') {
1600             c = *++state->cp;
1601             if (JS7_ISDEC(c)) {
1602                 ++state->cp;
1603                 max = GetDecimalValue(c, 0xFFFF, NULL, state);
1604                 c = *state->cp;
1605                 if (!ignoreValues && max == OVERFLOW_VALUE)
1606                     return JSMSG_MAX_TOO_BIG;
1607                 if (!ignoreValues && min > max)
1608                     return JSMSG_OUT_OF_ORDER;
1609             } else {
1610                 max = (uintN)-1;
1611             }
1612         } else {
1613             max = min;
1614         }
1615         if (c == '}') {
1616             state->result = NewRENode(state, REOP_QUANT);
1617             if (!state->result)
1618                 return JSMSG_OUT_OF_MEMORY;
1619             state->result->u.range.min = min;
1620             state->result->u.range.max = max;
1621             /*
1622              * QUANT, <min>, <max>, <next> ... <ENDCHILD>
1623              * where <max> is written as compact(max+1) to make
1624              * (uintN)-1 sentinel to occupy 1 byte, not width_of(max)+1.
1625              */
1626             state->progLength += (1 + GetCompactIndexWidth(min)
1627                                   + GetCompactIndexWidth(max + 1)
1628                                   +3);
1629             return 0;
1630         }
1631     }
1632
1633     state->cp = errp;
1634     return -1;
1635 }
1636
1637 static JSBool
1638 SetForwardJumpOffset(jsbytecode *jump, jsbytecode *target)
1639 {
1640     ptrdiff_t offset = target - jump;
1641
1642     /* Check that target really points forward. */
1643     JS_ASSERT(offset >= 2);
1644     if ((size_t)offset > OFFSET_MAX)
1645         return JS_FALSE;
1646
1647     jump[0] = JUMP_OFFSET_HI(offset);
1648     jump[1] = JUMP_OFFSET_LO(offset);
1649     return JS_TRUE;
1650 }
1651
1652 /* Copy the charset data from a character class node to the charset list
1653  * in the regexp object. */
1654 static JS_ALWAYS_INLINE RECharSet *
1655 InitNodeCharSet(JSRegExp *re, RENode *node)
1656 {
1657     RECharSet *charSet = &re->classList[node->u.ucclass.index];
1658     charSet->converted = JS_FALSE;
1659     charSet->length = node->u.ucclass.bmsize;
1660     charSet->u.src.startIndex = node->u.ucclass.startIndex;
1661     charSet->u.src.length = node->u.ucclass.kidlen;
1662     charSet->sense = node->u.ucclass.sense;
1663     return charSet;
1664 }
1665
1666 /*
1667  * Generate bytecode for the tree rooted at t using an explicit stack instead
1668  * of recursion.
1669  */
1670 static jsbytecode *
1671 EmitREBytecode(CompilerState *state, JSRegExp *re, size_t treeDepth,
1672                jsbytecode *pc, RENode *t)
1673 {
1674     EmitStateStackEntry *emitStateSP, *emitStateStack;
1675     REOp op;
1676
1677     if (treeDepth == 0) {
1678         emitStateStack = NULL;
1679     } else {
1680         emitStateStack =
1681             (EmitStateStackEntry *)
1682             state->context->malloc(sizeof(EmitStateStackEntry) * treeDepth);
1683         if (!emitStateStack)
1684             return NULL;
1685     }
1686     emitStateSP = emitStateStack;
1687     op = t->op;
1688     JS_ASSERT(op < REOP_LIMIT);
1689
1690     for (;;) {
1691         *pc++ = op;
1692         switch (op) {
1693           case REOP_EMPTY:
1694             --pc;
1695             break;
1696
1697           case REOP_ALTPREREQ2:
1698           case REOP_ALTPREREQ:
1699             JS_ASSERT(emitStateSP);
1700             emitStateSP->altHead = pc - 1;
1701             emitStateSP->endTermFixup = pc;
1702             pc += OFFSET_LEN;
1703             SET_ARG(pc, t->u.altprereq.ch1);
1704             pc += ARG_LEN;
1705             SET_ARG(pc, t->u.altprereq.ch2);
1706             pc += ARG_LEN;
1707
1708             emitStateSP->nextAltFixup = pc;    /* offset to next alternate */
1709             pc += OFFSET_LEN;
1710
1711             emitStateSP->continueNode = t;
1712             emitStateSP->continueOp = REOP_JUMP;
1713             emitStateSP->jumpToJumpFlag = JS_FALSE;
1714             ++emitStateSP;
1715             JS_ASSERT((size_t)(emitStateSP - emitStateStack) <= treeDepth);
1716             t = (RENode *) t->kid;
1717             op = t->op;
1718             JS_ASSERT(op < REOP_LIMIT);
1719             continue;
1720
1721           case REOP_JUMP:
1722             emitStateSP->nextTermFixup = pc;    /* offset to following term */
1723             pc += OFFSET_LEN;
1724             if (!SetForwardJumpOffset(emitStateSP->nextAltFixup, pc))
1725                 goto jump_too_big;
1726             emitStateSP->continueOp = REOP_ENDALT;
1727             ++emitStateSP;
1728             JS_ASSERT((size_t)(emitStateSP - emitStateStack) <= treeDepth);
1729             t = (RENode *) t->u.kid2;
1730             op = t->op;
1731             JS_ASSERT(op < REOP_LIMIT);
1732             continue;
1733
1734           case REOP_ENDALT:
1735             /*
1736              * If we already patched emitStateSP->nextTermFixup to jump to
1737              * a nearer jump, to avoid 16-bit immediate offset overflow, we
1738              * are done here.
1739              */
1740             if (emitStateSP->jumpToJumpFlag)
1741                 break;
1742
1743             /*
1744              * Fix up the REOP_JUMP offset to go to the op after REOP_ENDALT.
1745              * REOP_ENDALT is executed only on successful match of the last
1746              * alternate in a group.
1747              */
1748             if (!SetForwardJumpOffset(emitStateSP->nextTermFixup, pc))
1749                 goto jump_too_big;
1750             if (t->op != REOP_ALT) {
1751                 if (!SetForwardJumpOffset(emitStateSP->endTermFixup, pc))
1752                     goto jump_too_big;
1753             }
1754
1755             /*
1756              * If the program is bigger than the REOP_JUMP offset range, then
1757              * we must check for alternates before this one that are part of
1758              * the same group, and fix up their jump offsets to target jumps
1759              * close enough to fit in a 16-bit unsigned offset immediate.
1760              */
1761             if ((size_t)(pc - re->program) > OFFSET_MAX &&
1762                 emitStateSP > emitStateStack) {
1763                 EmitStateStackEntry *esp, *esp2;
1764                 jsbytecode *alt, *jump;
1765                 ptrdiff_t span, header;
1766
1767                 esp2 = emitStateSP;
1768                 alt = esp2->altHead;
1769                 for (esp = esp2 - 1; esp >= emitStateStack; --esp) {
1770                     if (esp->continueOp == REOP_ENDALT &&
1771                         !esp->jumpToJumpFlag &&
1772                         esp->nextTermFixup + OFFSET_LEN == alt &&
1773                         (size_t)(pc - ((esp->continueNode->op != REOP_ALT)
1774                                        ? esp->endTermFixup
1775                                        : esp->nextTermFixup)) > OFFSET_MAX) {
1776                         alt = esp->altHead;
1777                         jump = esp->nextTermFixup;
1778
1779                         /*
1780                          * The span must be 1 less than the distance from
1781                          * jump offset to jump offset, so we actually jump
1782                          * to a REOP_JUMP bytecode, not to its offset!
1783                          */
1784                         for (;;) {
1785                             JS_ASSERT(jump < esp2->nextTermFixup);
1786                             span = esp2->nextTermFixup - jump - 1;
1787                             if ((size_t)span <= OFFSET_MAX)
1788                                 break;
1789                             do {
1790                                 if (--esp2 == esp)
1791                                     goto jump_too_big;
1792                             } while (esp2->continueOp != REOP_ENDALT);
1793                         }
1794
1795                         jump[0] = JUMP_OFFSET_HI(span);
1796                         jump[1] = JUMP_OFFSET_LO(span);
1797
1798                         if (esp->continueNode->op != REOP_ALT) {
1799                             /*
1800                              * We must patch the offset at esp->endTermFixup
1801                              * as well, for the REOP_ALTPREREQ{,2} opcodes.
1802                              * If we're unlucky and endTermFixup is more than
1803                              * OFFSET_MAX bytes from its target, we cheat by
1804                              * jumping 6 bytes to the jump whose offset is at
1805                              * esp->nextTermFixup, which has the same target.
1806                              */
1807                             jump = esp->endTermFixup;
1808                             header = esp->nextTermFixup - jump;
1809                             span += header;
1810                             if ((size_t)span > OFFSET_MAX)
1811                                 span = header;
1812
1813                             jump[0] = JUMP_OFFSET_HI(span);
1814                             jump[1] = JUMP_OFFSET_LO(span);
1815                         }
1816
1817                         esp->jumpToJumpFlag = JS_TRUE;
1818                     }
1819                 }
1820             }
1821             break;
1822
1823           case REOP_ALT:
1824             JS_ASSERT(emitStateSP);
1825             emitStateSP->altHead = pc - 1;
1826             emitStateSP->nextAltFixup = pc;     /* offset to next alternate */
1827             pc += OFFSET_LEN;
1828             emitStateSP->continueNode = t;
1829             emitStateSP->continueOp = REOP_JUMP;
1830             emitStateSP->jumpToJumpFlag = JS_FALSE;
1831             ++emitStateSP;
1832             JS_ASSERT((size_t)(emitStateSP - emitStateStack) <= treeDepth);
1833             t = (RENode *) t->kid;
1834             op = t->op;
1835             JS_ASSERT(op < REOP_LIMIT);
1836             continue;
1837
1838           case REOP_FLAT:
1839             /*
1840              * Coalesce FLATs if possible and if it would not increase bytecode
1841              * beyond preallocated limit. The latter happens only when bytecode
1842              * size for coalesced string with offset p and length 2 exceeds 6
1843              * bytes preallocated for 2 single char nodes, i.e. when
1844              * 1 + GetCompactIndexWidth(p) + GetCompactIndexWidth(2) > 6 or
1845              * GetCompactIndexWidth(p) > 4.
1846              * Since when GetCompactIndexWidth(p) <= 4 coalescing of 3 or more
1847              * nodes strictly decreases bytecode size, the check has to be
1848              * done only for the first coalescing.
1849              */
1850             if (t->kid &&
1851                 GetCompactIndexWidth((jschar *)t->kid - state->cpbegin) <= 4)
1852             {
1853                 while (t->next &&
1854                        t->next->op == REOP_FLAT &&
1855                        (jschar*)t->kid + t->u.flat.length ==
1856                        (jschar*)t->next->kid) {
1857                     t->u.flat.length += t->next->u.flat.length;
1858                     t->next = t->next->next;
1859                 }
1860             }
1861             if (t->kid && t->u.flat.length > 1) {
1862                 pc[-1] = (state->flags & JSREG_FOLD) ? REOP_FLATi : REOP_FLAT;
1863                 pc = WriteCompactIndex(pc, (jschar *)t->kid - state->cpbegin);
1864                 pc = WriteCompactIndex(pc, t->u.flat.length);
1865             } else if (t->u.flat.chr < 256) {
1866                 pc[-1] = (state->flags & JSREG_FOLD) ? REOP_FLAT1i : REOP_FLAT1;
1867                 *pc++ = (jsbytecode) t->u.flat.chr;
1868             } else {
1869                 pc[-1] = (state->flags & JSREG_FOLD)
1870                          ? REOP_UCFLAT1i
1871                          : REOP_UCFLAT1;
1872                 SET_ARG(pc, t->u.flat.chr);
1873                 pc += ARG_LEN;
1874             }
1875             break;
1876
1877           case REOP_LPAREN:
1878             JS_ASSERT(emitStateSP);
1879             pc = WriteCompactIndex(pc, t->u.parenIndex);
1880             emitStateSP->continueNode = t;
1881             emitStateSP->continueOp = REOP_RPAREN;
1882             ++emitStateSP;
1883             JS_ASSERT((size_t)(emitStateSP - emitStateStack) <= treeDepth);
1884             t = (RENode *) t->kid;
1885             op = t->op;
1886             continue;
1887
1888           case REOP_RPAREN:
1889             pc = WriteCompactIndex(pc, t->u.parenIndex);
1890             break;
1891
1892           case REOP_BACKREF:
1893             pc = WriteCompactIndex(pc, t->u.parenIndex);
1894             break;
1895
1896           case REOP_ASSERT:
1897             JS_ASSERT(emitStateSP);
1898             emitStateSP->nextTermFixup = pc;
1899             pc += OFFSET_LEN;
1900             emitStateSP->continueNode = t;
1901             emitStateSP->continueOp = REOP_ASSERTTEST;
1902             ++emitStateSP;
1903             JS_ASSERT((size_t)(emitStateSP - emitStateStack) <= treeDepth);
1904             t = (RENode *) t->kid;
1905             op = t->op;
1906             continue;
1907
1908           case REOP_ASSERTTEST:
1909           case REOP_ASSERTNOTTEST:
1910             if (!SetForwardJumpOffset(emitStateSP->nextTermFixup, pc))
1911                 goto jump_too_big;
1912             break;
1913
1914           case REOP_ASSERT_NOT:
1915             JS_ASSERT(emitStateSP);
1916             emitStateSP->nextTermFixup = pc;
1917             pc += OFFSET_LEN;
1918             emitStateSP->continueNode = t;
1919             emitStateSP->continueOp = REOP_ASSERTNOTTEST;
1920             ++emitStateSP;
1921             JS_ASSERT((size_t)(emitStateSP - emitStateStack) <= treeDepth);
1922             t = (RENode *) t->kid;
1923             op = t->op;
1924             continue;
1925
1926           case REOP_QUANT:
1927             JS_ASSERT(emitStateSP);
1928             if (t->u.range.min == 0 && t->u.range.max == (uintN)-1) {
1929                 pc[-1] = (t->u.range.greedy) ? REOP_STAR : REOP_MINIMALSTAR;
1930             } else if (t->u.range.min == 0 && t->u.range.max == 1) {
1931                 pc[-1] = (t->u.range.greedy) ? REOP_OPT : REOP_MINIMALOPT;
1932             } else if (t->u.range.min == 1 && t->u.range.max == (uintN) -1) {
1933                 pc[-1] = (t->u.range.greedy) ? REOP_PLUS : REOP_MINIMALPLUS;
1934             } else {
1935                 if (!t->u.range.greedy)
1936                     pc[-1] = REOP_MINIMALQUANT;
1937                 pc = WriteCompactIndex(pc, t->u.range.min);
1938                 /*
1939                  * Write max + 1 to avoid using size_t(max) + 1 bytes
1940                  * for (uintN)-1 sentinel.
1941                  */
1942                 pc = WriteCompactIndex(pc, t->u.range.max + 1);
1943             }
1944             emitStateSP->nextTermFixup = pc;
1945             pc += OFFSET_LEN;
1946             emitStateSP->continueNode = t;
1947             emitStateSP->continueOp = REOP_ENDCHILD;
1948             ++emitStateSP;
1949             JS_ASSERT((size_t)(emitStateSP - emitStateStack) <= treeDepth);
1950             t = (RENode *) t->kid;
1951             op = t->op;
1952             continue;
1953
1954           case REOP_ENDCHILD:
1955             if (!SetForwardJumpOffset(emitStateSP->nextTermFixup, pc))
1956                 goto jump_too_big;
1957             break;
1958
1959           case REOP_CLASS:
1960             if (!t->u.ucclass.sense)
1961                 pc[-1] = REOP_NCLASS;
1962             pc = WriteCompactIndex(pc, t->u.ucclass.index);
1963             InitNodeCharSet(re, t);
1964             break;
1965
1966           default:
1967             break;
1968         }
1969
1970         t = t->next;
1971         if (t) {
1972             op = t->op;
1973         } else {
1974             if (emitStateSP == emitStateStack)
1975                 break;
1976             --emitStateSP;
1977             t = emitStateSP->continueNode;
1978             op = (REOp) emitStateSP->continueOp;
1979         }
1980     }
1981
1982   cleanup:
1983     if (emitStateStack)
1984         state->context->free(emitStateStack);
1985     return pc;
1986
1987   jump_too_big:
1988     ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX);
1989     pc = NULL;
1990     goto cleanup;
1991 }
1992
1993 static JSBool
1994 CompileRegExpToAST(JSContext* cx, TokenStream* ts,
1995                    JSString* str, uintN flags, CompilerState& state)
1996 {
1997     uintN i;
1998     size_t len;
1999
2000     len = str->length();
2001
2002     state.context = cx;
2003     state.tokenStream = ts;
2004     state.cp = js_UndependString(cx, str);
2005     if (!state.cp)
2006         return JS_FALSE;
2007     state.cpbegin = state.cp;
2008     state.cpend = state.cp + len;
2009     state.flags = uint16(flags);
2010     state.parenCount = 0;
2011     state.classCount = 0;
2012     state.progLength = 0;
2013     state.treeDepth = 0;
2014     state.classBitmapsMem = 0;
2015     for (i = 0; i < CLASS_CACHE_SIZE; i++)
2016         state.classCache[i].start = NULL;
2017
2018     if (len != 0 && (flags & JSREG_FLAT)) {
2019         state.result = NewRENode(&state, REOP_FLAT);
2020         if (!state.result)
2021             return JS_FALSE;
2022         state.result->u.flat.chr = *state.cpbegin;
2023         state.result->u.flat.length = len;
2024         state.result->kid = (void *) state.cpbegin;
2025         /* Flat bytecode: REOP_FLAT compact(string_offset) compact(len). */
2026         state.progLength += 1 + GetCompactIndexWidth(0)
2027                           + GetCompactIndexWidth(len);
2028         return JS_TRUE;
2029     }
2030
2031     return ParseRegExp(&state);
2032 }
2033
2034 #ifdef JS_TRACER
2035 typedef js::Vector<LIns *, 4, js::ContextAllocPolicy> LInsList;
2036
2037 namespace js {
2038
2039 struct REFragment : public nanojit::Fragment
2040 {
2041     REFragment(const void* _ip verbose_only(, uint32_t profFragID))
2042       : nanojit::Fragment(ip verbose_only(, profFragID))
2043     {}
2044 };
2045
2046 } /* namespace js */
2047
2048 /* Return the cached fragment for the given regexp, or create one. */
2049 static Fragment*
2050 LookupNativeRegExp(JSContext* cx, uint16 re_flags,
2051                    const jschar* re_chars, size_t re_length)
2052 {
2053     TraceMonitor *tm = &JS_TRACE_MONITOR(cx);
2054     VMAllocator &alloc = *tm->dataAlloc;
2055     REHashMap &table = *tm->reFragments;
2056
2057     REHashKey k(re_length, re_flags, re_chars);
2058     REFragment *frag = table.get(k);
2059
2060     if (!frag) {
2061         verbose_only(
2062         uint32_t profFragID = (LogController.lcbits & LC_FragProfile)
2063                               ? (++(tm->lastFragID)) : 0;
2064         )
2065         frag = new (alloc) REFragment(0 verbose_only(, profFragID));
2066         /*
2067          * Copy the re_chars portion of the hash key into the Allocator, so
2068          * its lifecycle is disconnected from the lifecycle of the
2069          * underlying regexp.
2070          */
2071         k.re_chars = (const jschar*) new (alloc) jschar[re_length];
2072         memcpy((void*) k.re_chars, re_chars, re_length * sizeof(jschar));
2073         table.put(k, frag);
2074     }
2075     return frag;
2076 }
2077
2078 static JSBool
2079 ProcessCharSet(JSContext *cx, JSRegExp *re, RECharSet *charSet);
2080
2081 /* Utilities for the RegExpNativeCompiler */
2082
2083 namespace {
2084   /*
2085    * An efficient way to simultaneously statically guard that the sizeof(bool) is a
2086    * small power of 2 and take its log2.
2087    */
2088   template <int> struct StaticLog2 {};
2089   template <> struct StaticLog2<1> { static const int result = 0; };
2090   template <> struct StaticLog2<2> { static const int result = 1; };
2091   template <> struct StaticLog2<4> { static const int result = 2; };
2092   template <> struct StaticLog2<8> { static const int result = 3; };
2093 }
2094
2095 /*
2096  * This table allows efficient testing for the ASCII portion of \s during a
2097  * trace. ECMA-262 15.10.2.12 defines the following characters below 128 to be
2098  * whitespace: 0x9 (0), 0xA (10), 0xB (11), 0xC (12), 0xD (13), 0x20 (32). The
2099  * index must be <= 32.
2100  */
2101 static const bool js_ws[] = {
2102 /*       0      1      2      3      4      5      5      7      8      9      */
2103 /*  0 */ false, false, false, false, false, false, false, false, false, true,
2104 /*  1 */ true,  true,  true,  true,  false, false, false, false, false, false,
2105 /*  2 */ false, false, false, false, false, false, false, false, false, false,
2106 /*  3 */ false, false, true
2107 };
2108
2109 /* Sets of characters are described in terms of individuals and classes. */
2110 class CharSet {
2111   public:
2112     CharSet() : charEnd(charBuf), classes(0) {}
2113
2114     static const uintN sBufSize = 8;
2115
2116     bool full() { return charEnd == charBuf + sBufSize; }
2117
2118     /* Add a single char to the set. */
2119     bool addChar(jschar c)
2120     {
2121         if (full())
2122             return false;
2123         *charEnd++ = c;
2124         return true;
2125     }
2126
2127     enum Class {
2128         LineTerms  = 1 << 0,  /* Line Terminators (E262 7.3) */
2129         OtherSpace = 1 << 1,  /* \s (E262 15.10.2.12) - LineTerms */
2130         Digit      = 1 << 2,  /* \d (E262 15.10.2.12) */
2131         OtherAlnum = 1 << 3,  /* \w (E262 15,10.2.12) - Digit */
2132         Other      = 1 << 4,  /* all other characters */
2133         All        = LineTerms | OtherSpace | Digit | OtherAlnum | Other,
2134
2135         Space = LineTerms | OtherSpace,
2136         AlNum = Digit | OtherAlnum,
2137         Dot   = All & ~LineTerms
2138     };
2139
2140     /* Add a set of chars to the set. */
2141     void addClass(Class c) { classes |= c; }
2142
2143     /* Return whether two sets of chars are disjoint. */
2144     bool disjoint(const CharSet &) const;
2145
2146   private:
2147     static bool disjoint(const jschar *beg, const jschar *end, uintN classes);
2148
2149     mutable jschar charBuf[sBufSize];
2150     jschar *charEnd;
2151     uintN classes;
2152 };
2153
2154 /* Appease the type checker. */
2155 static inline CharSet::Class
2156 operator|(CharSet::Class c1, CharSet::Class c2) {
2157     return (CharSet::Class)(((int)c1) | ((int)c2));
2158 }
2159 static inline CharSet::Class
2160 operator~(CharSet::Class c) {
2161     return (CharSet::Class)(~(int)c);
2162 }
2163
2164 /*
2165  * Return whether the characters in the range [beg, end) fall within any of the
2166  * classes with a bit set in 'classes'.
2167  */
2168 bool
2169 CharSet::disjoint(const jschar *beg, const jschar *end, uintN classes)
2170 {
2171     for (const jschar *p = beg; p != end; ++p) {
2172         if (JS7_ISDEC(*p)) {
2173             if (classes & Digit)
2174                 return false;
2175         } else if (JS_ISWORD(*p)) {
2176             if (classes & OtherAlnum)
2177                 return false;
2178         } else if (RE_IS_LINE_TERM(*p)) {
2179             if (classes & LineTerms)
2180                 return false;
2181         } else if (JS_ISSPACE(*p)) {
2182             if (classes & OtherSpace)
2183                 return false;
2184         } else {
2185             if (classes & Other)
2186                 return false;
2187         }
2188     }
2189     return true;
2190 }
2191
2192 /*
2193  * Predicate version of the STL's set_intersection. Assumes both ranges are
2194  * sorted and thus runs in linear time.
2195  *
2196  * FIXME: This is a reusable algorithm, perhaps it should be put somewhere.
2197  */
2198 template <class InputIterator1, class InputIterator2>
2199 bool
2200 set_disjoint(InputIterator1 p1, InputIterator1 end1,
2201              InputIterator2 p2, InputIterator2 end2)
2202 {
2203     if (p1 == end1 || p2 == end2)
2204         return true;
2205     while (*p1 != *p2) {
2206         if (*p1 < *p2) {
2207             ++p1;
2208             if (p1 == end1)
2209                 return true;
2210         } else if (*p2 < *p1) {
2211             ++p2;
2212             if (p2 == end2)
2213                 return true;
2214         }
2215     }
2216     return false;
2217 }
2218
2219 static JSBool
2220 CharCmp(void *arg, const void *a, const void *b, int *result)
2221 {
2222     jschar ca = *(jschar *)a, cb = *(jschar *)b;
2223     *result = ca - cb;
2224     return JS_TRUE;
2225 }
2226
2227 bool
2228 CharSet::disjoint(const CharSet &other) const
2229 {
2230     /* Check overlap between classes. */
2231     if (classes & other.classes)
2232         return false;
2233
2234     /*
2235      * Check char-class overlap. Compare this->charBuf with other.classes and
2236      * vice versa with a loop.
2237      */
2238     if (!disjoint(this->charBuf, this->charEnd, other.classes) ||
2239         !disjoint(other.charBuf, other.charEnd, this->classes))
2240         return false;
2241
2242     /* Check char-char overlap. */
2243     jschar tmp[CharSet::sBufSize];
2244     js_MergeSort(charBuf, charEnd - charBuf, sizeof(jschar),
2245                  CharCmp, 0, tmp);
2246     js_MergeSort(other.charBuf, other.charEnd - other.charBuf, sizeof(jschar),
2247                  CharCmp, 0, tmp);
2248     return set_disjoint(charBuf, charEnd, other.charBuf, other.charEnd);
2249 }
2250
2251 /*
2252  * Return true if the given subexpression may match the empty string. The
2253  * conservative answer is |true|. If |next| is true, then the subexpression is
2254  * considered to be |node| followed by the rest of |node->next|. Otherwise, the
2255  * subexpression is considered to be |node| by itself.
2256  */
2257 static bool
2258 mayMatchEmpty(RENode *node, bool next = true)
2259 {
2260     if (!node)
2261         return true;
2262     switch (node->op) {
2263       case REOP_EMPTY:  return true;
2264       case REOP_FLAT:   return false;
2265       case REOP_CLASS:  return false;
2266       case REOP_ALNUM:  return false;
2267       case REOP_ALT:    return (mayMatchEmpty((RENode *)node->kid) ||
2268                                 mayMatchEmpty((RENode *)node->u.kid2)) &&
2269                                (!next || mayMatchEmpty(node->next));
2270       case REOP_QUANT:  return (node->u.range.min == 0 ||
2271                                 mayMatchEmpty((RENode *)node->kid)) &&
2272                                (!next || mayMatchEmpty(node->next));
2273       default:          return true;
2274     }
2275 }
2276
2277 /*
2278  * Enumerate the set of characters that may be consumed next by the given
2279  * subexpression in isolation. Return whether the enumeration was successful.
2280  */
2281 static bool
2282 enumerateNextChars(JSContext *cx, RENode *node, CharSet &set)
2283 {
2284     JS_CHECK_RECURSION(cx, return JS_FALSE);
2285
2286     if (!node)
2287         return true;
2288
2289     switch (node->op) {
2290       /* Record as bitflags. */
2291       case REOP_DOT:       set.addClass(CharSet::Dot);     return true;
2292       case REOP_DIGIT:     set.addClass(CharSet::Digit);   return true;
2293       case REOP_NONDIGIT:  set.addClass(~CharSet::Digit);  return true;
2294       case REOP_ALNUM:     set.addClass(CharSet::AlNum);   return true;
2295       case REOP_NONALNUM:  set.addClass(~CharSet::AlNum);  return true;
2296       case REOP_SPACE:     set.addClass(CharSet::Space);   return true;
2297       case REOP_NONSPACE:  set.addClass(~CharSet::Space);  return true;
2298
2299       /* Record as individual characters. */
2300       case REOP_FLAT:
2301         return set.addChar(node->u.flat.chr);
2302
2303       /* Control structures. */
2304       case REOP_EMPTY:
2305         return true;
2306       case REOP_ALT:
2307       case REOP_ALTPREREQ:
2308         return enumerateNextChars(cx, (RENode *)node->kid, set) &&
2309                enumerateNextChars(cx, (RENode *)node->u.kid2, set) &&
2310                (!mayMatchEmpty(node, false) ||
2311                 enumerateNextChars(cx, (RENode *)node->next, set));
2312       case REOP_QUANT:
2313         return enumerateNextChars(cx, (RENode *)node->kid, set) &&
2314                (!mayMatchEmpty(node, false) ||
2315                 enumerateNextChars(cx, (RENode *)node->next, set));
2316
2317       /* Arbitrary character classes and oddities. */
2318       default:
2319         return false;
2320     }
2321 }
2322
2323 class RegExpNativeCompiler {
2324  private:
2325     VMAllocator&     tempAlloc;
2326     JSContext*       cx;
2327     JSRegExp*        re;
2328     CompilerState*   cs;            /* RegExp to compile */
2329     Fragment*        fragment;
2330     LirWriter*       lir;
2331 #ifdef DEBUG
2332     LirWriter*       validate_writer;
2333 #endif
2334 #ifdef NJ_VERBOSE
2335     LirWriter*       verbose_filter;
2336 #endif
2337     LirBufWriter*    lirBufWriter;  /* for skip */
2338
2339     LIns*            state;
2340     LIns*            start;
2341     LIns*            cpend;
2342
2343     LirBuffer* const lirbuf;
2344
2345     bool outOfMemory() {
2346         return tempAlloc.outOfMemory() || JS_TRACE_MONITOR(cx).dataAlloc->outOfMemory();
2347     }
2348
2349     JSBool isCaseInsensitive() const { return (cs->flags & JSREG_FOLD) != 0; }
2350
2351     void targetCurrentPoint(LIns *ins)
2352     {
2353         ins->setTarget(lir->ins0(LIR_label));
2354     }
2355
2356     void targetCurrentPoint(LInsList &fails)
2357     {
2358         LIns *fail = lir->ins0(LIR_label);
2359         for (size_t i = 0; i < fails.length(); ++i) {
2360             fails[i]->setTarget(fail);
2361         }
2362         fails.clear();
2363     }
2364
2365     /*
2366      * These functions return the new position after their match operation,
2367      * or NULL if there was an error.
2368      */
2369     LIns* compileEmpty(RENode* node, LIns* pos, LInsList& fails)
2370     {
2371         return pos;
2372     }
2373
2374 #if defined(AVMPLUS_ARM) || defined(AVMPLUS_SPARC)
2375 /* We can't do this on ARM or SPARC, since it relies on doing a 32-bit load from
2376  * a pointer which is only 2-byte aligned.
2377  */
2378 #undef USE_DOUBLE_CHAR_MATCH
2379 #else
2380 #define USE_DOUBLE_CHAR_MATCH
2381 #endif
2382
2383     LIns* compileFlatSingleChar(jschar ch, LIns* pos, LInsList& fails)
2384     {
2385         LIns* to_fail = lir->insBranch(LIR_jf, lir->ins2(LIR_ltp, pos, cpend), 0);
2386         if (!fails.append(to_fail))
2387             return NULL;
2388         LIns* text_ch = lir->insLoad(LIR_ldus2ui, pos, 0, ACC_READONLY);
2389
2390         // Extra characters that need to be compared against when doing folding.
2391         struct extra {
2392             jschar ch;
2393             LIns   *match;
2394         };
2395         extra extras[5];
2396         int   nextras = 0;
2397
2398         if (cs->flags & JSREG_FOLD) {
2399             ch = JS_TOUPPER(ch);
2400             jschar lch = inverse_upcase(ch);
2401
2402             if (ch != lch) {
2403                 if (L'A' <= ch && ch <= L'Z') {
2404                     // Fast conversion of text character to lower case by OR-ing with 32.
2405                     text_ch = lir->ins2(LIR_ori, text_ch, lir->insImmI(32));
2406                     // These ASCII letters have 2 lower-case forms. We put the ASCII one in
2407                     // |extras| so it is tested first, because we expect that to be the common
2408                     // case. Note that the code points of the non-ASCII forms both have the
2409                     // 32 bit set, so it is OK to compare against the OR-32-converted text char.
2410                     ch = lch;
2411                     if (ch == L'i') {
2412                         extras[nextras++].ch = ch;
2413                         ch = 0x131;
2414                     } else if (ch == L's') {
2415                         extras[nextras++].ch = ch;
2416                         ch = 0x17f;
2417                     }
2418                     goto gen;
2419                 } else if (0x01c4 <= ch && ch <= 0x1e60) {
2420                     // The following group of conditionals handles characters that have 1 or 2
2421                     // lower-case forms in addition to JS_TOLOWER(ch).
2422                     if (ch <= 0x1f1) {                 // DZ,LJ,NJ
2423                         if (ch == 0x01c4) {
2424                             extras[nextras++].ch = 0x01c5;
2425                         } else if (ch == 0x01c7) {
2426                             extras[nextras++].ch = 0x01c8;
2427                         } else if (ch == 0x01ca) {
2428                             extras[nextras++].ch = 0x01cb;
2429                         } else if (ch == 0x01f1) {
2430                             extras[nextras++].ch = 0x01f2;
2431                         }
2432                     } else if (ch < 0x0392) {          // no extra lower-case forms in this range
2433                     } else if (ch <= 0x03a6) {         // Greek
2434                         if (ch == 0x0392) {
2435                             extras[nextras++].ch = 0x03d0;
2436                         } else if (ch == 0x0395) {
2437                             extras[nextras++].ch = 0x03f5;
2438                         } else if (ch == 0x0398) {
2439                             extras[nextras++].ch = 0x03d1;
2440                         } else if (ch == 0x0399) {
2441                             extras[nextras++].ch = 0x0345;
2442                             extras[nextras++].ch = 0x1fbe;
2443                         } else if (ch == 0x039a) {
2444                             extras[nextras++].ch = 0x03f0;
2445                         } else if (ch == 0x039c) {
2446                             extras[nextras++].ch = 0xb5;
2447                         } else if (ch == 0x03a0) {
2448                             extras[nextras++].ch = 0x03d6;
2449                         } else if (ch == 0x03a1) {
2450                             extras[nextras++].ch = 0x03f1;
2451                         } else if (ch == 0x03a3) {
2452                             extras[nextras++].ch = 0x03c2;
2453                         } else if (ch == 0x03a6) {
2454                             extras[nextras++].ch = 0x03d5;
2455                         }
2456                     } else if (ch == 0x1e60) {         // S with dot above
2457                         extras[nextras++].ch = 0x1e9b;
2458                     }
2459                 }
2460
2461                 extras[nextras++].ch = lch;
2462             }
2463         }
2464
2465     gen:
2466         for (int i = 0; i < nextras; ++i) {
2467             LIns *test = lir->ins2(LIR_eqi, text_ch, lir->insImmI(extras[i].ch));
2468             LIns *branch = lir->insBranch(LIR_jt, test, 0);
2469             extras[i].match = branch;
2470         }
2471
2472         if (!fails.append(lir->insBranch(LIR_jf, lir->ins2(LIR_eqi, text_ch, lir->insImmI(ch)), 0)))
2473             return NULL;
2474
2475         for (int i = 0; i < nextras; ++i)
2476             targetCurrentPoint(extras[i].match);
2477         return lir->ins2(LIR_addp, pos, lir->insImmWord(2));
2478     }
2479
2480     JS_INLINE bool hasCases(jschar ch)
2481     {
2482         return JS_TOLOWER(ch) != JS_TOUPPER(ch);
2483     }
2484
2485     LIns* compileFlatDoubleChar(jschar ch1, jschar ch2, LIns* pos, LInsList& fails)
2486     {
2487 #ifdef IS_BIG_ENDIAN
2488         uint32 word = (ch1 << 16) | ch2;
2489 #else
2490         uint32 word = (ch2 << 16) | ch1;
2491 #endif
2492         /*
2493          * Fast case-insensitive test for ASCII letters: convert text
2494          * char to lower case by bit-or-ing in 32 and compare.
2495          */
2496         JSBool useFastCI = JS_FALSE;
2497         union { jschar c[2]; uint32 i; } mask;
2498         if (cs->flags & JSREG_FOLD) {
2499             jschar uch1 = JS_TOUPPER(ch1);
2500             jschar uch2 = JS_TOUPPER(ch2);
2501             JSBool mask1 = (L'A' <= uch1 && uch1 <= L'Z' && uch1 != L'I' && uch1 != L'S');
2502             JSBool mask2 = (L'A' <= uch2 && uch2 <= L'Z' && uch2 != L'I' && uch2 != L'S');
2503             if ((!mask1 && hasCases(ch1)) || (!mask2 && hasCases(ch2))) {
2504                 pos = compileFlatSingleChar(ch1, pos, fails);
2505                 if (!pos) return NULL;
2506                 return compileFlatSingleChar(ch2, pos, fails);
2507             }
2508
2509             mask.c[0] = mask1 ? 0x0020 : 0x0;
2510             mask.c[1] = mask2 ? 0x0020 : 0x0;
2511
2512             if (mask.i) {
2513                 word |= mask.i;
2514                 useFastCI = JS_TRUE;
2515             }
2516         }
2517
2518         LIns* to_fail = lir->insBranch(LIR_jf,
2519                                        lir->ins2(LIR_ltp,
2520                                                  pos,
2521                                                  lir->ins2(LIR_addp,
2522                                                            cpend,
2523                                                            lir->insImmWord(-2))),
2524                                        0);
2525         if (!fails.append(to_fail))
2526             return NULL;
2527         LIns* text_word = lir->insLoad(LIR_ldi, pos, 0, ACC_OTHER);
2528         LIns* comp_word = useFastCI ?
2529             lir->ins2(LIR_ori, text_word, lir->insImmI(mask.i)) :
2530             text_word;
2531         if (!fails.append(lir->insBranch(LIR_jf, lir->ins2(LIR_eqi, comp_word, lir->insImmI(word)), 0)))
2532             return NULL;
2533
2534         return lir->ins2(LIR_addp, pos, lir->insImmWord(4));
2535     }
2536
2537     LIns* compileFlat(RENode *&node, LIns* pos, LInsList& fails)
2538     {
2539 #ifdef USE_DOUBLE_CHAR_MATCH
2540         if (node->u.flat.length == 1) {
2541             if (node->next && node->next->op == REOP_FLAT &&
2542                 node->next->u.flat.length == 1) {
2543                 pos = compileFlatDoubleChar(node->u.flat.chr,
2544                                             node->next->u.flat.chr,
2545                                             pos, fails);
2546                 node = node->next;
2547             } else {
2548                 pos = compileFlatSingleChar(node->u.flat.chr, pos, fails);
2549             }
2550             return pos;
2551         } else {
2552             size_t i;
2553             for (i = 0; i < node->u.flat.length - 1; i += 2) {
2554                 if (outOfMemory())
2555                     return 0;
2556                 pos = compileFlatDoubleChar(((jschar*) node->kid)[i],
2557                                             ((jschar*) node->kid)[i+1],
2558                                             pos, fails);
2559                 if (!pos)
2560                     return 0;
2561             }
2562             JS_ASSERT(pos != 0);
2563             if (i == node->u.flat.length - 1)
2564                 pos = compileFlatSingleChar(((jschar*) node->kid)[i], pos, fails);
2565             return pos;
2566         }
2567 #else
2568         if (node->u.flat.length == 1) {
2569             return compileFlatSingleChar(node->u.flat.chr, pos, fails);
2570         } else {
2571             for (size_t i = 0; i < node->u.flat.length; i++) {
2572                 if (outOfMemory())
2573                     return 0;
2574                 pos = compileFlatSingleChar(((jschar*) node->kid)[i], pos, fails);
2575                 if (!pos)
2576                     return 0;
2577             }
2578             return pos;
2579         }
2580 #endif
2581     }
2582
2583     LIns* compileClass(RENode* node, LIns* pos, LInsList& fails)
2584     {
2585         if (!node->u.ucclass.sense)
2586             return JS_FALSE;
2587         /*
2588          * If we share generated native code, we need to make a copy
2589          * of the bitmap because the original regexp's copy is destroyed when
2590          * that regexp is.
2591          */
2592         RECharSet *charSet = &re->classList[node->u.ucclass.index];
2593         size_t bitmapLen = (charSet->length >> 3) + 1;
2594         /* Arbitrary size limit on bitmap. */
2595         if (bitmapLen > 1024)
2596             return NULL;
2597         Allocator &alloc = *JS_TRACE_MONITOR(cx).dataAlloc;
2598         /* The following line allocates charSet.u.bits if successful. */
2599         if (!charSet->converted && !ProcessCharSet(cx, re, charSet))
2600             return NULL;
2601         void* bitmapData = alloc.alloc(bitmapLen);
2602         if (outOfMemory())
2603             return NULL;
2604         memcpy(bitmapData, charSet->u.bits, bitmapLen);
2605
2606         LIns* to_fail = lir->insBranch(LIR_jf, lir->ins2(LIR_ltp, pos, cpend), 0);
2607         if (!fails.append(to_fail))
2608             return NULL;
2609         LIns* text_ch = lir->insLoad(LIR_ldus2ui, pos, 0, ACC_READONLY);
2610         if (!fails.append(lir->insBranch(LIR_jf,
2611                                          lir->ins2(LIR_lei, text_ch, lir->insImmI(charSet->length)),
2612                                          0))) {
2613             return NULL;
2614         }
2615         LIns* byteIndex = lir->insI2P(lir->ins2(LIR_rshi, text_ch, lir->insImmI(3)));
2616         LIns* bitmap = lir->insImmP(bitmapData);
2617         LIns* byte = lir->insLoad(LIR_lduc2ui, lir->ins2(LIR_addp, bitmap, byteIndex), (int) 0,
2618                                   ACC_READONLY);
2619         LIns* bitMask = lir->ins2(LIR_lshi, lir->insImmI(1),
2620                                lir->ins2(LIR_andi, text_ch, lir->insImmI(0x7)));
2621         LIns* test = lir->ins2(LIR_eqi, lir->ins2(LIR_andi, byte, bitMask), lir->insImmI(0));
2622
2623         LIns* to_next = lir->insBranch(LIR_jt, test, 0);
2624         if (!fails.append(to_next))
2625             return NULL;
2626         return lir->ins2(LIR_addp, pos, lir->insImmWord(2));
2627     }
2628
2629     /* Factor out common code to index js_alnum. */
2630     LIns *compileTableRead(LIns *chr, const bool *tbl)
2631     {
2632         if (sizeof(bool) != 1) {
2633             LIns *sizeLog2 = lir->insImmI(StaticLog2<sizeof(bool)>::result);
2634             chr = lir->ins2(LIR_lshi, chr, sizeLog2);
2635         }
2636         LIns *addr = lir->ins2(LIR_addp, lir->insImmP(tbl), lir->insUI2P(chr));
2637         return lir->insLoad(LIR_lduc2ui, addr, 0, ACC_READONLY);
2638     }
2639
2640     /* Compile a builtin character class. */
2641     LIns *compileBuiltinClass(RENode *node, LIns *pos, LInsList &fails)
2642     {
2643         /* All the builtins checked below consume one character. */
2644         if (!fails.append(lir->insBranch(LIR_jf, lir->ins2(LIR_ltp, pos, cpend), 0)))
2645             return NULL;
2646         LIns *chr = lir->insLoad(LIR_ldus2ui, pos, 0, ACC_READONLY);
2647
2648         switch (node->op) {
2649           case REOP_DOT:
2650           {
2651             /* Accept any character except those in ECMA-262 15.10.2.8. */
2652             LIns *eq1 = lir->ins2(LIR_eqi, chr, lir->insImmI('\n'));
2653             if (!fails.append(lir->insBranch(LIR_jt, eq1, NULL)))
2654                 return NULL;
2655             LIns *eq2 = lir->ins2(LIR_eqi, chr, lir->insImmI('\r'));
2656             if (!fails.append(lir->insBranch(LIR_jt, eq2, NULL)))
2657                 return NULL;
2658             LIns *eq3 = lir->ins2(LIR_eqi, chr, lir->insImmI(LINE_SEPARATOR));
2659             if (!fails.append(lir->insBranch(LIR_jt, eq3, NULL)))
2660                 return NULL;
2661             LIns *eq4 = lir->ins2(LIR_eqi, chr, lir->insImmI(PARA_SEPARATOR));
2662             if (!fails.append(lir->insBranch(LIR_jt, eq4, NULL)))
2663                 return NULL;
2664             break;
2665           }
2666           case REOP_DIGIT:
2667           {
2668             LIns *ge = lir->ins2(LIR_gei, chr, lir->insImmI('0'));
2669             if (!fails.append(lir->insBranch(LIR_jf, ge, NULL)))
2670                 return NULL;
2671             LIns *le = lir->ins2(LIR_lei, chr, lir->insImmI('9'));
2672             if (!fails.append(lir->insBranch(LIR_jf, le, NULL)))
2673                 return NULL;
2674             break;
2675           }
2676           case REOP_NONDIGIT:
2677           {
2678             /* Use 'and' to give a predictable branch for success path. */
2679             LIns *ge = lir->ins2(LIR_gei, chr, lir->insImmI('0'));
2680             LIns *le = lir->ins2(LIR_lei, chr, lir->insImmI('9'));
2681             LIns *both = lir->ins2(LIR_andi, ge, le);
2682             if (!fails.append(lir->insBranch(LIR_jf, lir->insEqI_0(both), NULL)))
2683                 return NULL;
2684             break;
2685           }
2686           case REOP_ALNUM:
2687           {
2688             /*
2689              * Compile the condition:
2690              *   ((uint)*cp) < 128 && js_alnum[(uint)*cp]
2691              */
2692             LIns *rangeCnd = lir->ins2(LIR_ltui, chr, lir->insImmI(128));
2693             if (!fails.append(lir->insBranch(LIR_jf, rangeCnd, NULL)))
2694                 return NULL;
2695             LIns *tableVal = compileTableRead(chr, js_alnum);
2696             if (!fails.append(lir->insBranch(LIR_jt, lir->insEqI_0(tableVal), NULL)))
2697                 return NULL;
2698             break;
2699           }
2700           case REOP_NONALNUM:
2701           {
2702             /*
2703              * Compile the condition:
2704              *   ((uint)*cp) >= 128 || !js_alnum[(uint)*cp]
2705              */
2706             LIns *rangeCnd = lir->ins2(LIR_geui, chr, lir->insImmI(128));
2707             LIns *rangeBr = lir->insBranch(LIR_jt, rangeCnd, NULL);
2708             LIns *tableVal = compileTableRead(chr, js_alnum);
2709             if (!fails.append(lir->insBranch(LIR_jf, lir->insEqI_0(tableVal), NULL)))
2710                 return NULL;
2711             LIns *success = lir->ins0(LIR_label);
2712             rangeBr->setTarget(success);
2713             break;
2714           }
2715           case REOP_SPACE:
2716           case REOP_NONSPACE:
2717           {
2718             /*
2719              * ECMA-262 7.2, 7.3, and 15.10.2.12 define a bunch of Unicode code
2720              * points for whitespace. We optimize here for the common case of
2721              * ASCII characters using a table lookup for the lower block that
2722              * can actually contain spaces. For the rest, use a (more or less)
2723              * binary search to minimize tests.
2724              *
2725              *   [0000,0020]: 9, A, B, C, D, 20
2726              *   (0020,00A0): none
2727              *   [00A0,2000): A0, 1680, 180E
2728              *   [2000,200A]: all
2729              *   (200A, max): 2028, 2029, 202F, 205F, 3000
2730              */
2731             /* Below 0x20? */
2732             LIns *tableRangeCnd = lir->ins2(LIR_leui, chr, lir->insImmI(0x20));
2733             LIns *tableRangeBr = lir->insBranch(LIR_jt, tableRangeCnd, NULL);
2734             /* Fall through means *chr > 0x20. */
2735
2736             /* Handle (0x20,0xA0). */
2737             LIns *asciiCnd = lir->ins2(LIR_ltui, chr, lir->insImmI(0xA0));
2738             LIns *asciiMissBr = lir->insBranch(LIR_jt, asciiCnd, NULL);
2739             /* Fall through means *chr >= 0xA0. */
2740
2741             /* Partition around [0x2000,0x200A]. */
2742             LIns *belowCnd = lir->ins2(LIR_ltui, chr, lir->insImmI(0x2000));
2743             LIns *belowBr = lir->insBranch(LIR_jt, belowCnd, NULL);
2744             LIns *aboveCnd = lir->ins2(LIR_gtui, chr, lir->insImmI(0x200A));
2745             LIns *aboveBr = lir->insBranch(LIR_jt, aboveCnd, NULL);
2746             LIns *intervalMatchBr = lir->insBranch(LIR_j, NULL, NULL);
2747
2748             /* Handle [0xA0,0x2000). */
2749             LIns *belowLbl = lir->ins0(LIR_label);
2750             belowBr->setTarget(belowLbl);
2751             LIns *eq1Cnd = lir->ins2(LIR_eqi, chr, lir->insImmI(0xA0));
2752             LIns *eq1Br = lir->insBranch(LIR_jt, eq1Cnd, NULL);
2753             LIns *eq2Cnd = lir->ins2(LIR_eqi, chr, lir->insImmI(0x1680));
2754             LIns *eq2Br = lir->insBranch(LIR_jt, eq2Cnd, NULL);
2755             LIns *eq3Cnd = lir->ins2(LIR_eqi, chr, lir->insImmI(0x180E));
2756             LIns *eq3Br = lir->insBranch(LIR_jt, eq3Cnd, NULL);
2757             LIns *belowMissBr = lir->insBranch(LIR_j, NULL, NULL);
2758
2759             /* Handle (0x200A, max). */
2760             LIns *aboveLbl = lir->ins0(LIR_label);
2761             aboveBr->setTarget(aboveLbl);
2762             LIns *eq4Cnd = lir->ins2(LIR_eqi, chr, lir->insImmI(0x2028));
2763             LIns *eq4Br = lir->insBranch(LIR_jt, eq4Cnd, NULL);
2764             LIns *eq5Cnd = lir->ins2(LIR_eqi, chr, lir->insImmI(0x2029));
2765             LIns *eq5Br = lir->insBranch(LIR_jt, eq5Cnd, NULL);
2766             LIns *eq6Cnd = lir->ins2(LIR_eqi, chr, lir->insImmI(0x202F));
2767             LIns *eq6Br = lir->insBranch(LIR_jt, eq6Cnd, NULL);
2768             LIns *eq7Cnd = lir->ins2(LIR_eqi, chr, lir->insImmI(0x205F));
2769             LIns *eq7Br = lir->insBranch(LIR_jt, eq7Cnd, NULL);
2770             LIns *eq8Cnd = lir->ins2(LIR_eqi, chr, lir->insImmI(0x3000));
2771             LIns *eq8Br = lir->insBranch(LIR_jt, eq8Cnd, NULL);
2772             LIns *aboveMissBr = lir->insBranch(LIR_j, NULL, NULL);
2773
2774             /* Handle [0,0x20]. */
2775             LIns *tableLbl = lir->ins0(LIR_label);
2776             tableRangeBr->setTarget(tableLbl);
2777             LIns *tableVal = compileTableRead(chr, js_ws);
2778             LIns *tableCnd = lir->insEqI_0(tableVal);
2779             LIns *tableMatchBr = lir->insBranch(LIR_jf, tableCnd, NULL);
2780
2781             /* Collect misses. */
2782             LIns *missLbl = lir->ins0(LIR_label);
2783             asciiMissBr->setTarget(missLbl);
2784             belowMissBr->setTarget(missLbl);
2785             aboveMissBr->setTarget(missLbl);
2786             LIns *missBr = lir->insBranch(LIR_j, NULL, NULL);
2787             if (node->op == REOP_SPACE) {
2788                 if (!fails.append(missBr))
2789                     return NULL;
2790             }
2791
2792             /* Collect matches. */
2793             LIns *matchLbl = lir->ins0(LIR_label);
2794             intervalMatchBr->setTarget(matchLbl);
2795             tableMatchBr->setTarget(matchLbl);
2796             eq1Br->setTarget(matchLbl); eq2Br->setTarget(matchLbl);
2797             eq3Br->setTarget(matchLbl); eq4Br->setTarget(matchLbl);
2798             eq5Br->setTarget(matchLbl); eq6Br->setTarget(matchLbl);
2799             eq7Br->setTarget(matchLbl); eq8Br->setTarget(matchLbl);
2800             if (node->op == REOP_NONSPACE) {
2801                 LIns *matchBr = lir->insBranch(LIR_j, NULL, NULL);
2802                 if (!fails.append(matchBr))
2803                     return NULL;
2804             }
2805             /* Fall through means match == success. */
2806
2807             /* Collect successes to fall through. */
2808             LIns *success = lir->ins0(LIR_label);
2809             if (node->op == REOP_NONSPACE)
2810                 missBr->setTarget(success);
2811             break;
2812           }
2813           default:
2814             return NULL;
2815         }
2816
2817         return lir->ins2(LIR_addp, pos, lir->insImmWord(2));
2818     }
2819
2820     LIns *compileAlt(RENode *node, LIns *pos, bool atEnd, LInsList &fails)
2821     {
2822         RENode *leftRe = (RENode *)node->kid, *rightRe = (RENode *)node->u.kid2;
2823
2824         /*
2825          * If the RE continues after the alternative, we need to ensure that no
2826          * backtracking is required. Recursive calls to compileNode will fail
2827          * on capturing parens, so the only thing we have to check here is that,
2828          * if the left subexpression matches, we can keep going without later
2829          * deciding we need to try the right subexpression.
2830          */
2831         if (!atEnd) {
2832             /*
2833              * If there is no character overlap between left and right, then
2834              * there is only one possible path through the alternative.
2835              */
2836             CharSet leftSet, rightSet;
2837             if (!enumerateNextChars(cx, leftRe, leftSet) ||
2838                 !enumerateNextChars(cx, rightRe, rightSet) ||
2839                 !leftSet.disjoint(rightSet))
2840                 return NULL;
2841
2842             /*
2843              * If there is an empty path through either subexpression, the above
2844              * check is incomplete; we need to include |node->next| as well.
2845              */
2846             bool epsLeft = mayMatchEmpty(leftRe),
2847                  epsRight = mayMatchEmpty(rightRe);
2848             if (epsRight && epsLeft) {
2849                 return NULL;
2850             } else if (epsLeft || epsRight) {
2851                 CharSet nextSet;
2852                 if (!enumerateNextChars(cx, node->next, nextSet) ||
2853                     (epsLeft && !nextSet.disjoint(rightSet)) ||
2854                     (epsRight && !nextSet.disjoint(leftSet))) {
2855                     return NULL;
2856                 }
2857             }
2858         }
2859
2860         /* Try left branch. */
2861         LInsList kidFails(cx);
2862         LIns *branchEnd = compileNode(leftRe, pos, atEnd, kidFails);
2863         if (!branchEnd)
2864             return NULL;
2865
2866         /*
2867          * Since there are no phis, simulate by writing to and reading from
2868          * memory (REGlobalData::stateStack, since it is unused).
2869          */
2870         lir->insStore(branchEnd, state,
2871                        offsetof(REGlobalData, stateStack), ACC_OTHER);
2872         LIns *leftSuccess = lir->insBranch(LIR_j, NULL, NULL);
2873
2874         /* Try right branch. */
2875         targetCurrentPoint(kidFails);
2876         if (!(branchEnd = compileNode(rightRe, pos, atEnd, fails)))
2877             return NULL;
2878         lir->insStore(branchEnd, state,
2879                        offsetof(REGlobalData, stateStack), ACC_OTHER);
2880
2881         /* Land success on the left branch. */
2882         targetCurrentPoint(leftSuccess);
2883         return addName(fragment->lirbuf,
2884                        lir->insLoad(LIR_ldp, state, offsetof(REGlobalData, stateStack), ACC_OTHER),
2885                        "pos");
2886     }
2887
2888     LIns *compileOpt(RENode *node, LIns *pos, bool atEnd, LInsList &fails)
2889     {
2890         /*
2891          * Since there are no phis, simulate by writing to and reading from
2892          * memory (REGlobalData::stateStack, since it is unused).
2893          */
2894         lir->insStore(pos, state, offsetof(REGlobalData, stateStack), ACC_OTHER);
2895
2896         /* Try ? body. */
2897         LInsList kidFails(cx);
2898         if (!(pos = compileNode(node, pos, atEnd, kidFails)))
2899             return NULL;
2900         lir->insStore(pos, state, offsetof(REGlobalData, stateStack), ACC_OTHER);
2901
2902         /* Join success and failure and get new position. */
2903         targetCurrentPoint(kidFails);
2904         pos = addName(fragment->lirbuf,
2905                       lir->insLoad(LIR_ldp, state, offsetof(REGlobalData, stateStack), ACC_OTHER),
2906                       "pos");
2907
2908         return pos;
2909     }
2910
2911     LIns *compileQuant(RENode *node, LIns *pos, bool atEnd, LInsList &fails)
2912     {
2913         /* Only support greedy *, +, ?. */
2914         if (!node->u.range.greedy ||
2915             node->u.range.min > 1 ||
2916             (node->u.range.max > 1 && node->u.range.max < (uintN)-1)) {
2917             return NULL;
2918         }
2919
2920         RENode *bodyRe = (RENode *)node->kid;
2921
2922         /*
2923          * If the RE continues after the alternative, we need to ensure that no
2924          * backtracking is required. Recursive calls to compileNode will fail
2925          * on capturing parens, so the only thing we have to check here is that,
2926          * if the quantifier body matches, we can continue matching the body
2927          * without later deciding we need to undo the body matches.
2928          */
2929         if (!atEnd) {
2930             /*
2931              * If there is no character overlap between the body and
2932              * |node->next|, then all possible body matches are used.
2933              */
2934             CharSet bodySet, nextSet;
2935             if (!enumerateNextChars(cx, bodyRe, bodySet) ||
2936                 !enumerateNextChars(cx, node->next, nextSet) ||
2937                 !bodySet.disjoint(nextSet)) {
2938                 return NULL;
2939             }
2940         }
2941
2942         /* Fork off ? and {1,1}. */
2943         if (node->u.range.max == 1) {
2944             if (node->u.range.min == 1)
2945                 return compileNode(bodyRe, pos, atEnd, fails);
2946             else
2947                 return compileOpt(bodyRe, pos, atEnd, fails);
2948         }
2949
2950         /* For +, compile a copy of the body where failure is real failure. */
2951         if (node->u.range.min == 1) {
2952             if (!(pos = compileNode(bodyRe, pos, atEnd, fails)))
2953                 return NULL;
2954         }
2955
2956         /*
2957          * Since there are no phis, simulate by writing to and reading from
2958          * memory (REGlobalData::stateStack, since it is unused).
2959          */
2960         lir->insStore(pos, state, offsetof(REGlobalData, stateStack), ACC_OTHER);
2961
2962         /* Begin iteration: load loop variables. */
2963         LIns *loopTop = lir->ins0(LIR_label);
2964         LIns *iterBegin = addName(fragment->lirbuf,
2965                                   lir->insLoad(LIR_ldp, state,
2966                                                offsetof(REGlobalData, stateStack), ACC_OTHER),
2967                                   "pos");
2968
2969         /* Match quantifier body. */
2970         LInsList kidFails(cx);
2971         LIns *iterEnd = compileNode(bodyRe, iterBegin, atEnd, kidFails);
2972         if (!iterEnd)
2973             return NULL;
2974
2975         /*
2976          * If there is an epsilon path through the body then, when it is taken,
2977          * we need to abort the loop or else we will loop forever.
2978          */
2979         if (mayMatchEmpty(bodyRe)) {
2980             LIns *eqCnd = lir->ins2(LIR_eqp, iterBegin, iterEnd);
2981             if (!kidFails.append(lir->insBranch(LIR_jt, eqCnd, NULL)))
2982                 return NULL;
2983         }
2984
2985         /* End iteration: store loop variables, increment, jump */
2986         lir->insStore(iterEnd, state, offsetof(REGlobalData, stateStack), ACC_OTHER);
2987         lir->insBranch(LIR_j, NULL, loopTop);
2988
2989         /*
2990          * Using '+' as branch, the intended control flow is:
2991          *
2992          *     ...
2993          * A -> |
2994          *      |<---.
2995          * B -> |    |
2996          *      +--. |
2997          * C -> |  | |
2998          *      +--. |
2999          * D -> |  | |
3000          *      +--|-'
3001          * X -> |  |
3002          *      |<-'
3003          * E -> |
3004          *     ...
3005          *
3006          * We are currently at point X. Since the regalloc makes a single,
3007          * linear, backwards sweep over the IR (going from E to A), point X
3008          * must tell the regalloc what LIR insns are live at the end of D.
3009          * Thus, we need to report *all* insns defined *before* the end of D
3010          * that may be used *after* D. This means insns defined in A, B, C, or
3011          * D and used in B, C, D, or E. Since insns in B, C, and D are
3012          * conditionally executed, and we (currently) don't have real phi
3013          * nodes, we need only consider insns defined in A and used in E.
3014          */
3015         lir->ins1(LIR_livep, state);
3016         lir->ins1(LIR_livep, cpend);
3017         lir->ins1(LIR_livep, start);
3018
3019         /* After the loop: reload 'pos' from memory and continue. */
3020         targetCurrentPoint(kidFails);
3021         return iterBegin;
3022     }
3023
3024     /*
3025      * Compile the regular expression rooted at 'node'. Return 0 on failed
3026      * compilation. Otherwise, generate code that falls through on success (the
3027      * returned LIns* is the current 'pos') and jumps to the end on failure (by
3028      * adding the guard LIns to 'fails').
3029      */
3030     LIns *compileNode(RENode *node, LIns *pos, bool atEnd, LInsList &fails)
3031     {
3032         for (; pos && node; node = node->next) {
3033             if (outOfMemory())
3034                 return NULL;
3035
3036             bool childNextIsEnd = atEnd && !node->next;
3037
3038             switch (node->op) {
3039               case REOP_EMPTY:
3040                 pos = compileEmpty(node, pos, fails);
3041                 break;
3042               case REOP_FLAT:
3043                 pos = compileFlat(node, pos, fails);
3044                 break;
3045               case REOP_ALT:
3046               case REOP_ALTPREREQ:
3047                 pos = compileAlt(node, pos, childNextIsEnd, fails);
3048                 break;
3049               case REOP_QUANT:
3050                 pos = compileQuant(node, pos, childNextIsEnd, fails);
3051                 break;
3052               case REOP_CLASS:
3053                 pos = compileClass(node, pos, fails);
3054                 break;
3055               case REOP_DOT:
3056               case REOP_DIGIT:
3057               case REOP_NONDIGIT:
3058               case REOP_ALNUM:
3059               case REOP_NONALNUM:
3060               case REOP_SPACE:
3061               case REOP_NONSPACE:
3062                 pos = compileBuiltinClass(node, pos, fails);
3063                 break;
3064               default:
3065                 return NULL;
3066             }
3067         }
3068         return pos;
3069     }
3070
3071     /*
3072      * This function kicks off recursive compileNode compilation, finishes the
3073      * success path, and lets the failed-match path fall through.
3074      */
3075     bool compileRootNode(RENode *root, LIns *pos, LIns *anchorFail)
3076     {
3077         /* Compile the regular expression body. */
3078         LInsList fails(cx);
3079         pos = compileNode(root, pos, true, fails);
3080         if (!pos)
3081             return false;
3082
3083         /* Fall-through from compileNode means success. */
3084         lir->insStore(pos, state, offsetof(REGlobalData, stateStack), ACC_OTHER);
3085         lir->ins0(LIR_regfence);
3086         lir->ins1(LIR_reti, lir->insImmI(1));
3087
3088         /* Stick return here so we don't have to jump over it every time. */
3089         if (anchorFail) {
3090             targetCurrentPoint(anchorFail);
3091             lir->ins0(LIR_regfence);
3092             lir->ins1(LIR_reti, lir->insImmI(0));
3093         }
3094
3095         /* Target failed matches. */
3096         targetCurrentPoint(fails);
3097         return true;
3098     }
3099
3100     /* Compile a regular expressions that can only match on the first char. */
3101     bool compileSticky(RENode *root, LIns *start)
3102     {
3103         if (!compileRootNode(root, start, NULL))
3104             return false;
3105
3106         /* Failed to match on first character, so fail whole match. */
3107         lir->ins0(LIR_regfence);
3108         lir->ins1(LIR_reti, lir->insImmI(0));
3109         return !outOfMemory();
3110     }
3111
3112     /* Compile normal regular expressions that can match starting at any char. */
3113     bool compileAnchoring(RENode *root, LIns *start)
3114     {
3115         /* Guard outer anchoring loop. Use <= to allow empty regexp match. */
3116         LIns *anchorFail = lir->insBranch(LIR_jf, lir->ins2(LIR_lep, start, cpend), 0);
3117
3118         if (!compileRootNode(root, start, anchorFail))
3119             return false;
3120
3121         /* Outer loop increment. */
3122         lir->insStore(lir->ins2(LIR_addp, start, lir->insImmWord(2)), state,
3123                        offsetof(REGlobalData, skipped), ACC_OTHER);
3124
3125         return !outOfMemory();
3126     }
3127
3128     inline LIns*
3129     addName(LirBuffer* lirbuf, LIns* ins, const char* name)
3130     {
3131 #ifdef NJ_VERBOSE
3132         debug_only_stmt(lirbuf->printer->lirNameMap->addName(ins, name);)
3133 #endif
3134         return ins;
3135     }
3136
3137     /*
3138      * Insert the side exit and guard record for a compiled regexp. Most
3139      * of the fields are not used. The important part is the regexp source
3140      * and flags, which we use as the fragment lookup key.
3141      */
3142     GuardRecord* insertGuard(LIns* loopLabel, const jschar* re_chars, size_t re_length)
3143     {
3144         if (loopLabel) {
3145             lir->insBranch(LIR_j, NULL, loopLabel);
3146             LirBuffer* lirbuf = fragment->lirbuf;
3147             lir->ins1(LIR_livep, lirbuf->state);
3148             lir->ins1(LIR_livep, lirbuf->param1);
3149         }
3150
3151         Allocator &alloc = *JS_TRACE_MONITOR(cx).dataAlloc;
3152
3153         /* Must only create a VMSideExit; see StackFilter::getTops. */
3154         size_t len = (sizeof(GuardRecord) +
3155                       sizeof(VMSideExit) +
3156                       (re_length-1) * sizeof(jschar));
3157         GuardRecord* guard = (GuardRecord *) alloc.alloc(len);
3158         VMSideExit* exit = (VMSideExit*)(guard+1);
3159         guard->exit = exit;
3160         guard->exit->target = fragment;
3161         fragment->lastIns = lir->insGuard(LIR_x, NULL, guard);
3162         // guard->profCount is calloc'd to zero
3163         verbose_only(
3164             guard->profGuardID = fragment->guardNumberer++;
3165             guard->nextInFrag = fragment->guardsForFrag;
3166             fragment->guardsForFrag = guard;
3167         )
3168         return guard;
3169     }
3170
3171  public:
3172     RegExpNativeCompiler(JSContext* cx, JSRegExp* re, CompilerState* cs, Fragment* fragment)
3173         : tempAlloc(*JS_TRACE_MONITOR(cx).reTempAlloc), cx(cx),
3174           re(re), cs(cs), fragment(fragment), lir(NULL), lirBufWriter(NULL),
3175           lirbuf(new (tempAlloc) LirBuffer(tempAlloc))
3176     {
3177         fragment->lirbuf = lirbuf;
3178 #ifdef DEBUG
3179         lirbuf->printer = new (tempAlloc) LInsPrinter(tempAlloc);
3180 #endif
3181     }
3182
3183     ~RegExpNativeCompiler() {
3184         /* Purge the tempAlloc used during recording. */
3185         tempAlloc.reset();
3186     }
3187
3188     JSBool compile()
3189     {
3190         GuardRecord* guard = NULL;
3191         const jschar* re_chars;
3192         size_t re_length;
3193         TraceMonitor* tm = &JS_TRACE_MONITOR(cx);
3194         Assembler *assm = tm->assembler;
3195         LIns* loopLabel = NULL;
3196
3197         if (outOfMemory() || OverfullJITCache(tm))
3198             return JS_FALSE;
3199
3200         re->source->getCharsAndLength(re_chars, re_length);
3201         /*
3202          * If the regexp is too long nanojit will assert when we
3203          * try to insert the guard record.
3204          */
3205         if (re_length > 1024) {
3206             re->flags |= JSREG_NOCOMPILE;
3207             return JS_FALSE;
3208         }
3209
3210         /* At this point we have an empty fragment. */
3211         LirBuffer* lirbuf = fragment->lirbuf;
3212         if (outOfMemory())
3213             goto fail;
3214         /* FIXME Use bug 463260 smart pointer when available. */
3215         lir = lirBufWriter = new LirBufWriter(lirbuf, nanojit::AvmCore::config);
3216
3217         /* FIXME Use bug 463260 smart pointer when available. */
3218 #ifdef NJ_VERBOSE
3219         debug_only_stmt(
3220             if (LogController.lcbits & LC_TMRegexp) {
3221                 lir = verbose_filter = new VerboseWriter(tempAlloc, lir, lirbuf->printer,
3222                                                          &LogController);
3223             }
3224         )
3225 #endif
3226 #ifdef DEBUG
3227         lir = validate_writer = new ValidateWriter(lir, lirbuf->printer, "regexp writer pipeline");
3228 #endif
3229
3230         /*
3231          * Although we could just load REGlobalData::cpend from 'state', by
3232          * passing it as a parameter, we avoid loading it every iteration.
3233          */
3234         lir->ins0(LIR_start);
3235
3236         for (int i = 0; i < NumSavedRegs; ++i)
3237             lir->insParam(i, 1);
3238 #ifdef DEBUG
3239         for (int i = 0; i < NumSavedRegs; ++i)
3240             addName(lirbuf, lirbuf->savedRegs[i], regNames[Assembler::savedRegs[i]]);
3241 #endif
3242
3243         lirbuf->state = state = addName(lirbuf, lir->insParam(0, 0), "state");
3244         lirbuf->param1 = cpend = addName(lirbuf, lir->insParam(1, 0), "cpend");
3245
3246         loopLabel = lir->ins0(LIR_label);
3247         // If profiling, record where the loop label is, so that the
3248         // assembler can insert a frag-entry-counter increment at that
3249         // point
3250         verbose_only( if (LogController.lcbits & LC_FragProfile) {
3251             NanoAssert(!fragment->loopLabel);
3252             fragment->loopLabel = loopLabel;
3253         })
3254
3255         start = addName(lirbuf,
3256                       lir->insLoad(LIR_ldp, state, offsetof(REGlobalData, skipped), ACC_OTHER),
3257                       "start");
3258
3259         if (cs->flags & JSREG_STICKY) {
3260             if (!compileSticky(cs->result, start))
3261                 goto fail;
3262         } else {
3263             if (!compileAnchoring(cs->result, start))
3264                 goto fail;
3265         }
3266
3267         guard = insertGuard(loopLabel, re_chars, re_length);
3268
3269         if (outOfMemory())
3270             goto fail;
3271
3272         /*
3273          * Deep in the nanojit compiler, the StackFilter is trying to throw
3274          * away stores above the VM interpreter/native stacks. We have no such
3275          * stacks, so rely on the fact that lirbuf->sp and lirbuf->rp are null
3276          * to ensure our stores are ignored.
3277          */
3278         JS_ASSERT(!lirbuf->sp && !lirbuf->rp);
3279
3280         assm->compile(fragment, tempAlloc, /*optimize*/true verbose_only(, lirbuf->printer));
3281         if (assm->error() != nanojit::None)
3282             goto fail;
3283
3284         delete lirBufWriter;
3285 #ifdef DEBUG
3286         delete validate_writer;
3287 #endif
3288 #ifdef NJ_VERBOSE
3289         debug_only_stmt( if (LogController.lcbits & LC_TMRegexp)
3290                              delete verbose_filter; )
3291 #endif
3292         return JS_TRUE;
3293     fail:
3294         if (outOfMemory() || OverfullJITCache(tm)) {
3295             delete lirBufWriter;
3296             // recover profiling data from expiring Fragments
3297             verbose_only(
3298                 REHashMap::Iter iter(*(tm->reFragments));
3299                 while (iter.next()) {
3300                     nanojit::Fragment* frag = iter.value();
3301                     FragProfiling_FragFinalizer(frag, tm);
3302                 }
3303             )
3304             FlushJITCache(cx);
3305         } else {
3306             if (!guard) insertGuard(loopLabel, re_chars, re_length);
3307             re->flags |= JSREG_NOCOMPILE;
3308             delete lirBufWriter;
3309         }
3310 #ifdef DEBUG
3311         delete validate_writer;
3312 #endif
3313 #ifdef NJ_VERBOSE
3314         debug_only_stmt( if (LogController.lcbits & LC_TMRegexp)
3315                              delete verbose_filter; )
3316 #endif
3317         return JS_FALSE;
3318     }
3319 };
3320
3321 /*
3322  * Compile a regexp to native code in the given fragment.
3323  */
3324 static inline JSBool
3325 CompileRegExpToNative(JSContext* cx, JSRegExp* re, Fragment* fragment)
3326 {
3327     JSBool rv = JS_FALSE;
3328     void* mark;
3329     CompilerState state;
3330     RegExpNativeCompiler rc(cx, re, &state, fragment);
3331
3332     JS_ASSERT(!fragment->code());
3333     mark = cx->tempPool.getMark();
3334     if (!CompileRegExpToAST(cx, NULL, re->source, re->flags, state)) {
3335         goto out;
3336     }
3337     rv = rc.compile();
3338  out:
3339     cx->tempPool.release(mark);
3340     return rv;
3341 }
3342
3343 /* Function type for a compiled native regexp. */
3344 typedef void *(FASTCALL *NativeRegExp)(REGlobalData*, const jschar *);
3345
3346 /*
3347  * Return a compiled native regexp if one already exists or can be created
3348  * now, or NULL otherwise.
3349  */
3350 static NativeRegExp
3351 GetNativeRegExp(JSContext* cx, JSRegExp* re)
3352 {
3353     const jschar *re_chars;
3354     size_t re_length;
3355     re->source->getCharsAndLength(re_chars, re_length);
3356     Fragment *fragment = LookupNativeRegExp(cx, re->flags, re_chars, re_length);
3357     JS_ASSERT(fragment);
3358     if (!fragment->code() && fragment->recordAttempts == 0) {
3359         fragment->recordAttempts++;
3360         if (!CompileRegExpToNative(cx, re, fragment))
3361             return NULL;
3362     }
3363     union { NIns *code; NativeRegExp func; } u;
3364     u.code = fragment->code();
3365     return u.func;
3366 }
3367 #endif
3368
3369 JSRegExp *
3370 js_NewRegExp(JSContext *cx, TokenStream *ts,
3371              JSString *str, uintN flags, JSBool flat)
3372 {
3373     JSRegExp *re;
3374     void *mark;
3375     CompilerState state;
3376     size_t resize;
3377     jsbytecode *endPC;
3378     uintN i;
3379
3380     re = NULL;
3381     mark = cx->tempPool.getMark();
3382
3383     /*
3384      * Parsing the string as flat is now expressed internally using
3385      * a flag, so that we keep this information in the JSRegExp, but
3386      * we keep the 'flat' parameter for now for compatibility.
3387      */
3388     if (flat) flags |= JSREG_FLAT;
3389     if (!CompileRegExpToAST(cx, ts, str, flags, state))
3390         goto out;
3391
3392     resize = offsetof(JSRegExp, program) + state.progLength + 1;
3393     re = (JSRegExp *) cx->malloc(resize);
3394     if (!re)
3395         goto out;
3396
3397     re->nrefs = 1;
3398     JS_ASSERT(state.classBitmapsMem <= CLASS_BITMAPS_MEM_LIMIT);
3399     re->classCount = state.classCount;
3400     if (re->classCount) {
3401         re->classList = (RECharSet *)
3402             cx->malloc(re->classCount * sizeof(RECharSet));
3403         if (!re->classList) {
3404             js_DestroyRegExp(cx, re);
3405             re = NULL;
3406             goto out;
3407         }
3408         for (i = 0; i < re->classCount; i++)
3409             re->classList[i].converted = JS_FALSE;
3410     } else {
3411         re->classList = NULL;
3412     }
3413
3414     /* Compile the bytecode version. */
3415     endPC = EmitREBytecode(&state, re, state.treeDepth, re->program, state.result);
3416     if (!endPC) {
3417         js_DestroyRegExp(cx, re);
3418         re = NULL;
3419         goto out;
3420     }
3421     *endPC++ = REOP_END;
3422     /*
3423      * Check whether size was overestimated and shrink using realloc.
3424      * This is safe since no pointers to newly parsed regexp or its parts
3425      * besides re exist here.
3426      */
3427     if ((size_t)(endPC - re->program) != state.progLength + 1) {
3428         JSRegExp *tmp;
3429         JS_ASSERT((size_t)(endPC - re->program) < state.progLength + 1);
3430         resize = offsetof(JSRegExp, program) + (endPC - re->program);
3431         tmp = (JSRegExp *) cx->realloc(re, resize);
3432         if (tmp)
3433             re = tmp;
3434     }
3435
3436     re->flags = uint16(flags);
3437     re->parenCount = state.parenCount;
3438     re->source = str;
3439
3440 out:
3441     cx->tempPool.release(mark);
3442     return re;
3443 }
3444
3445 JSRegExp *
3446 js_NewRegExpOpt(JSContext *cx, JSString *str, JSString *opt, JSBool flat)
3447 {
3448     uintN flags;
3449     const jschar *s;
3450     size_t i, n;
3451     char charBuf[2];
3452
3453     flags = 0;
3454     if (opt) {
3455         opt->getCharsAndLength(s, n);
3456         for (i = 0; i < n; i++) {
3457 #define HANDLE_FLAG(name)                                                     \
3458             JS_BEGIN_MACRO                                                    \
3459                 if (flags & (name))                                           \
3460                     goto bad_flag;                                            \
3461                 flags |= (name);                                              \
3462             JS_END_MACRO
3463             switch (s[i]) {
3464               case 'g':
3465                 HANDLE_FLAG(JSREG_GLOB);
3466                 break;
3467               case 'i':
3468                 HANDLE_FLAG(JSREG_FOLD);
3469                 break;
3470               case 'm':
3471                 HANDLE_FLAG(JSREG_MULTILINE);
3472                 break;
3473               case 'y':
3474                 HANDLE_FLAG(JSREG_STICKY);
3475                 break;
3476               default:
3477               bad_flag:
3478                 charBuf[0] = (char)s[i];
3479                 charBuf[1] = '\0';
3480                 JS_ReportErrorFlagsAndNumber(cx, JSREPORT_ERROR,
3481                                              js_GetErrorMessage, NULL,
3482                                              JSMSG_BAD_REGEXP_FLAG, charBuf);
3483                 return NULL;
3484             }
3485 #undef HANDLE_FLAG
3486         }
3487     }
3488     return js_NewRegExp(cx, NULL, str, flags, flat);
3489 }
3490
3491 /*
3492  * Save the current state of the match - the position in the input
3493  * text as well as the position in the bytecode. The state of any
3494  * parent expressions is also saved (preceding state).
3495  * Contents of parenCount parentheses from parenIndex are also saved.
3496  */
3497 static REBackTrackData *
3498 PushBackTrackState(REGlobalData *gData, REOp op,
3499                    jsbytecode *target, REMatchState *x, const jschar *cp,
3500                    size_t parenIndex, size_t parenCount)
3501 {
3502     size_t i;
3503     REBackTrackData *result =
3504         (REBackTrackData *) ((char *)gData->backTrackSP + gData->cursz);
3505
3506     size_t sz = sizeof(REBackTrackData) +
3507                 gData->stateStackTop * sizeof(REProgState) +
3508                 parenCount * sizeof(RECapture);
3509
3510     ptrdiff_t btsize = gData->backTrackStackSize;
3511     ptrdiff_t btincr = ((char *)result + sz) -
3512                        ((char *)gData->backTrackStack + btsize);
3513
3514     re_debug("\tBT_Push: %lu,%lu",
3515              (unsigned long) parenIndex, (unsigned long) parenCount);
3516
3517     if (btincr > 0) {
3518         ptrdiff_t offset = (char *)result - (char *)gData->backTrackStack;
3519
3520         btincr = JS_ROUNDUP(btincr, btsize);
3521         gData->cx->regexpPool.growCast<REBackTrackData *>(gData->backTrackStack, btsize, btincr);
3522         if (!gData->backTrackStack) {
3523             js_ReportOutOfScriptQuota(gData->cx);
3524             gData->ok = JS_FALSE;
3525             return NULL;
3526         }
3527         gData->backTrackStackSize = btsize + btincr;
3528         result = (REBackTrackData *) ((char *)gData->backTrackStack + offset);
3529     }
3530     gData->backTrackSP = result;
3531     result->sz = gData->cursz;
3532     gData->cursz = sz;
3533
3534     result->backtrack_op = op;
3535     result->backtrack_pc = target;
3536     result->cp = cp;
3537     result->parenCount = parenCount;
3538     result->parenIndex = parenIndex;
3539
3540     result->saveStateStackTop = gData->stateStackTop;
3541     JS_ASSERT(gData->stateStackTop);
3542     memcpy(result + 1, gData->stateStack,
3543            sizeof(REProgState) * result->saveStateStackTop);
3544
3545     if (parenCount != 0) {
3546         memcpy((char *)(result + 1) +
3547                sizeof(REProgState) * result->saveStateStackTop,
3548                &x->parens[parenIndex],
3549                sizeof(RECapture) * parenCount);
3550         for (i = 0; i != parenCount; i++)
3551             x->parens[parenIndex + i].index = -1;
3552     }
3553
3554     return result;
3555 }
3556
3557
3558 /*
3559  *   Consecutive literal characters.
3560  */
3561 #if 0
3562 static REMatchState *
3563 FlatNMatcher(REGlobalData *gData, REMatchState *x, jschar *matchChars,
3564              size_t length)
3565 {
3566     size_t i;
3567     if (length > gData->cpend - x->cp)
3568         return NULL;
3569     for (i = 0; i != length; i++) {
3570         if (matchChars[i] != x->cp[i])
3571             return NULL;
3572     }
3573     x->cp += length;
3574     return x;
3575 }
3576 #endif
3577
3578 static JS_ALWAYS_INLINE REMatchState *
3579 FlatNIMatcher(REGlobalData *gData, REMatchState *x, jschar *matchChars,
3580               size_t length)
3581 {
3582     size_t i;
3583     JS_ASSERT(gData->cpend >= x->cp);
3584     if (length > (size_t)(gData->cpend - x->cp))
3585         return NULL;
3586     for (i = 0; i != length; i++) {
3587         if (upcase(matchChars[i]) != upcase(x->cp[i]))
3588             return NULL;
3589     }
3590     x->cp += length;
3591     return x;
3592 }
3593
3594 /*
3595  * 1. Evaluate DecimalEscape to obtain an EscapeValue E.
3596  * 2. If E is not a character then go to step 6.
3597  * 3. Let ch be E's character.
3598  * 4. Let A be a one-element RECharSet containing the character ch.
3599  * 5. Call CharacterSetMatcher(A, false) and return its Matcher result.
3600  * 6. E must be an integer. Let n be that integer.
3601  * 7. If n=0 or n>NCapturingParens then throw a SyntaxError exception.
3602  * 8. Return an internal Matcher closure that takes two arguments, a State x
3603  *    and a Continuation c, and performs the following:
3604  *     1. Let cap be x's captures internal array.
3605  *     2. Let s be cap[n].
3606  *     3. If s is undefined, then call c(x) and return its result.
3607  *     4. Let e be x's endIndex.
3608  *     5. Let len be s's length.
3609  *     6. Let f be e+len.
3610  *     7. If f>InputLength, return failure.
3611  *     8. If there exists an integer i between 0 (inclusive) and len (exclusive)
3612  *        such that Canonicalize(s[i]) is not the same character as
3613  *        Canonicalize(Input [e+i]), then return failure.
3614  *     9. Let y be the State (f, cap).
3615  *     10. Call c(y) and return its result.
3616  */
3617 static REMatchState *
3618 BackrefMatcher(REGlobalData *gData, REMatchState *x, size_t parenIndex)
3619 {
3620     size_t len, i;
3621     const jschar *parenContent;
3622     RECapture *cap = &x->parens[parenIndex];
3623
3624     if (cap->index == -1)
3625         return x;
3626
3627     len = cap->length;
3628     if (x->cp + len > gData->cpend)
3629         return NULL;
3630
3631     parenContent = &gData->cpbegin[cap->index];
3632     if (gData->regexp->flags & JSREG_FOLD) {
3633         for (i = 0; i < len; i++) {
3634             if (upcase(parenContent[i]) != upcase(x->cp[i]))
3635                 return NULL;
3636         }
3637     } else {
3638         for (i = 0; i < len; i++) {
3639             if (parenContent[i] != x->cp[i])
3640                 return NULL;
3641         }
3642     }
3643     x->cp += len;
3644     return x;
3645 }
3646
3647
3648 /* Add a single character to the RECharSet */
3649 static void
3650 AddCharacterToCharSet(RECharSet *cs, jschar c)
3651 {
3652     uintN byteIndex = (uintN)(c >> 3);
3653     JS_ASSERT(c <= cs->length);
3654     cs->u.bits[byteIndex] |= 1 << (c & 0x7);
3655 }
3656
3657
3658 /* Add a character range, c1 to c2 (inclusive) to the RECharSet */
3659 static void
3660 AddCharacterRangeToCharSet(RECharSet *cs, uintN c1, uintN c2)
3661 {
3662     uintN i;
3663
3664     uintN byteIndex1 = c1 >> 3;
3665     uintN byteIndex2 = c2 >> 3;
3666
3667     JS_ASSERT(c2 <= cs->length && c1 <= c2);
3668
3669     c1 &= 0x7;
3670     c2 &= 0x7;
3671
3672     if (byteIndex1 == byteIndex2) {
3673         cs->u.bits[byteIndex1] |= ((uint8)0xFF >> (7 - (c2 - c1))) << c1;
3674     } else {
3675         cs->u.bits[byteIndex1] |= 0xFF << c1;
3676         for (i = byteIndex1 + 1; i < byteIndex2; i++)
3677             cs->u.bits[i] = 0xFF;
3678         cs->u.bits[byteIndex2] |= (uint8)0xFF >> (7 - c2);
3679     }
3680 }
3681
3682 struct CharacterRange {
3683     jschar start;
3684     jschar end;
3685 };
3686
3687 /*
3688  * The following characters are taken from the ECMA-262 standard, section 7.2
3689  * and 7.3, and the Unicode 3 standard, Table 6-1.
3690  */
3691 static const CharacterRange WhiteSpaceRanges[] = {
3692     /* TAB, LF, VT, FF, CR */
3693     { 0x0009, 0x000D },
3694     /* SPACE */
3695     { 0x0020, 0x0020 },
3696     /* NO-BREAK SPACE */
3697     { 0x00A0, 0x00A0 },
3698     /*
3699      * EN QUAD, EM QUAD, EN SPACE, EM SPACE, THREE-PER-EM SPACE, FOUR-PER-EM
3700      * SPACE, SIX-PER-EM SPACE, FIGURE SPACE, PUNCTUATION SPACE, THIN SPACE,
3701      * HAIR SPACE, ZERO WIDTH SPACE
3702      */
3703     { 0x2000, 0x200B },
3704     /* LS, PS */
3705     { 0x2028, 0x2029 },
3706     /* NARROW NO-BREAK SPACE */
3707     { 0x202F, 0x202F },
3708     /* IDEOGRAPHIC SPACE */
3709     { 0x3000, 0x3000 }
3710 };
3711
3712 /* ECMA-262 standard, section 15.10.2.6. */
3713 static const CharacterRange WordRanges[] = {
3714     { jschar('0'), jschar('9') },
3715     { jschar('A'), jschar('Z') },
3716     { jschar('_'), jschar('_') },
3717     { jschar('a'), jschar('z') }
3718 };
3719
3720 static void
3721 AddCharacterRanges(RECharSet *charSet,
3722                    const CharacterRange *range,
3723                    const CharacterRange *end)
3724 {
3725     for (; range < end; ++range)
3726         AddCharacterRangeToCharSet(charSet, range->start, range->end);
3727 }
3728
3729 static void
3730 AddInvertedCharacterRanges(RECharSet *charSet,
3731                            const CharacterRange *range,
3732                            const CharacterRange *end)
3733 {
3734     uint16 previous = 0;
3735     for (; range < end; ++range) {
3736         AddCharacterRangeToCharSet(charSet, previous, range->start - 1);
3737         previous = range->end + 1;
3738     }
3739     AddCharacterRangeToCharSet(charSet, previous, charSet->length);
3740 }
3741
3742 /* Compile the source of the class into a RECharSet */
3743 static JSBool
3744 ProcessCharSet(JSContext *cx, JSRegExp *re, RECharSet *charSet)
3745 {
3746     const jschar *src, *end;
3747     JSBool inRange = JS_FALSE;
3748     jschar rangeStart = 0;
3749     uintN byteLength, n;
3750     jschar c, thisCh;
3751     intN nDigits, i;
3752
3753     JS_ASSERT(!charSet->converted);
3754     /*
3755      * Assert that startIndex and length points to chars inside [] inside
3756      * source string.
3757      */
3758     JS_ASSERT(1 <= charSet->u.src.startIndex);
3759     JS_ASSERT(charSet->u.src.startIndex < re->source->length());
3760     JS_ASSERT(charSet->u.src.length <= re->source->length()
3761                                        - 1 - charSet->u.src.startIndex);
3762
3763     charSet->converted = JS_TRUE;
3764     src = re->source->chars() + charSet->u.src.startIndex;
3765     end = src + charSet->u.src.length;
3766     JS_ASSERT(src[-1] == '[');
3767     JS_ASSERT(end[0] == ']');
3768
3769     byteLength = (charSet->length >> 3) + 1;
3770     charSet->u.bits = (uint8 *)cx->malloc(byteLength);
3771     if (!charSet->u.bits) {
3772         JS_ReportOutOfMemory(cx);
3773         return JS_FALSE;
3774     }
3775     memset(charSet->u.bits, 0, byteLength);
3776
3777     if (src == end)
3778         return JS_TRUE;
3779
3780     if (*src == '^') {
3781         JS_ASSERT(charSet->sense == JS_FALSE);
3782         ++src;
3783     } else {
3784         JS_ASSERT(charSet->sense == JS_TRUE);
3785     }
3786
3787     while (src != end) {
3788         switch (*src) {
3789           case '\\':
3790             ++src;
3791             c = *src++;
3792             switch (c) {
3793               case 'b':
3794                 thisCh = 0x8;
3795                 break;
3796               case 'f':
3797                 thisCh = 0xC;
3798                 break;
3799               case 'n':
3800                 thisCh = 0xA;
3801                 break;
3802               case 'r':
3803                 thisCh = 0xD;
3804                 break;
3805               case 't':
3806                 thisCh = 0x9;
3807                 break;
3808               case 'v':
3809                 thisCh = 0xB;
3810                 break;
3811               case 'c':
3812                 if (src < end && JS_ISWORD(*src)) {
3813                     thisCh = (jschar)(*src++ & 0x1F);
3814                 } else {
3815                     --src;
3816                     thisCh = '\\';
3817                 }
3818                 break;
3819               case 'x':
3820                 nDigits = 2;
3821                 goto lexHex;
3822               case 'u':
3823                 nDigits = 4;
3824             lexHex:
3825                 n = 0;
3826                 for (i = 0; (i < nDigits) && (src < end); i++) {
3827                     uintN digit;
3828                     c = *src++;
3829                     if (!isASCIIHexDigit(c, &digit)) {
3830                         /*
3831                          * Back off to accepting the original '\'
3832                          * as a literal
3833                          */
3834                         src -= i + 1;
3835                         n = '\\';
3836                         break;
3837                     }
3838                     n = (n << 4) | digit;
3839                 }
3840                 thisCh = (jschar)n;
3841                 break;
3842               case '0':
3843               case '1':
3844               case '2':
3845               case '3':
3846               case '4':
3847               case '5':
3848               case '6':
3849               case '7':
3850                 /*
3851                  *  This is a non-ECMA extension - decimal escapes (in this
3852                  *  case, octal!) are supposed to be an error inside class
3853                  *  ranges, but supported here for backwards compatibility.
3854                  */
3855                 n = JS7_UNDEC(c);
3856                 c = *src;
3857                 if ('0' <= c && c <= '7') {
3858                     src++;
3859                     n = 8 * n + JS7_UNDEC(c);
3860                     c = *src;
3861                     if ('0' <= c && c <= '7') {
3862                         src++;
3863                         i = 8 * n + JS7_UNDEC(c);
3864                         if (i <= 0377)
3865                             n = i;
3866                         else
3867                             src--;
3868                     }
3869                 }
3870                 thisCh = (jschar)n;
3871                 break;
3872
3873               case 'd':
3874                 AddCharacterRangeToCharSet(charSet, '0', '9');
3875                 continue;   /* don't need range processing */
3876               case 'D':
3877                 AddCharacterRangeToCharSet(charSet, 0, '0' - 1);
3878                 AddCharacterRangeToCharSet(charSet,
3879                                            (jschar)('9' + 1),
3880                                            (jschar)charSet->length);
3881                 continue;
3882               case 's':
3883                 AddCharacterRanges(charSet, WhiteSpaceRanges,
3884                                    WhiteSpaceRanges + JS_ARRAY_LENGTH(WhiteSpaceRanges));
3885                 continue;
3886               case 'S':
3887                 AddInvertedCharacterRanges(charSet, WhiteSpaceRanges,
3888                                            WhiteSpaceRanges + JS_ARRAY_LENGTH(WhiteSpaceRanges));
3889                 continue;
3890               case 'w':
3891                 AddCharacterRanges(charSet, WordRanges,
3892                                    WordRanges + JS_ARRAY_LENGTH(WordRanges));
3893                 continue;
3894               case 'W':
3895                 AddInvertedCharacterRanges(charSet, WordRanges,
3896                                            WordRanges + JS_ARRAY_LENGTH(WordRanges));
3897                 continue;
3898               default:
3899                 thisCh = c;
3900                 break;
3901
3902             }
3903             break;
3904
3905           default:
3906             thisCh = *src++;
3907             break;
3908
3909         }
3910         if (inRange) {
3911             if (re->flags & JSREG_FOLD) {
3912                 int i;
3913
3914                 JS_ASSERT(rangeStart <= thisCh);
3915                 for (i = rangeStart; i <= thisCh; i++) {
3916                     jschar uch, dch;
3917
3918                     AddCharacterToCharSet(charSet, jschar(i));
3919                     uch = jschar(upcase(i));
3920                     dch = inverse_upcase(jschar(i));
3921                     if (i != uch)
3922                         AddCharacterToCharSet(charSet, uch);
3923                     if (i != dch)
3924                         AddCharacterToCharSet(charSet, dch);
3925                 }
3926             } else {
3927                 AddCharacterRangeToCharSet(charSet, rangeStart, thisCh);
3928             }
3929             inRange = JS_FALSE;
3930         } else {
3931             if (re->flags & JSREG_FOLD) {
3932                 AddCharacterToCharSet(charSet, jschar(upcase(thisCh)));
3933                 AddCharacterToCharSet(charSet, inverse_upcase(thisCh));
3934             } else {
3935                 AddCharacterToCharSet(charSet, thisCh);
3936             }
3937             if (src < end - 1) {
3938                 if (*src == '-') {
3939                     ++src;
3940                     inRange = JS_TRUE;
3941                     rangeStart = thisCh;
3942                 }
3943             }
3944         }
3945     }
3946     return JS_TRUE;
3947 }
3948
3949 static inline JSBool
3950 MatcherProcessCharSet(REGlobalData *gData, RECharSet *charSet) {
3951     JSBool rv = ProcessCharSet(gData->cx, gData->regexp, charSet);
3952     if (!rv) gData->ok = JS_FALSE;
3953     return rv;
3954 }
3955
3956 void
3957 js_DestroyRegExp(JSContext *cx, JSRegExp *re)
3958 {
3959     if (JS_ATOMIC_DECREMENT(&re->nrefs) == 0) {
3960         if (re->classList) {
3961             uintN i;
3962             for (i = 0; i < re->classCount; i++) {
3963                 if (re->classList[i].converted)
3964                     cx->free(re->classList[i].u.bits);
3965                 re->classList[i].u.bits = NULL;
3966             }
3967             cx->free(re->classList);
3968         }
3969         cx->free(re);
3970     }
3971 }
3972
3973 static JSBool
3974 ReallocStateStack(REGlobalData *gData)
3975 {
3976     size_t limit = gData->stateStackLimit;
3977     size_t sz = sizeof(REProgState) * limit;
3978
3979     gData->cx->regexpPool.growCast<REProgState *>(gData->stateStack, sz, sz);
3980     if (!gData->stateStack) {
3981         js_ReportOutOfScriptQuota(gData->cx);
3982         gData->ok = JS_FALSE;
3983         return JS_FALSE;
3984     }
3985     gData->stateStackLimit = limit + limit;
3986     return JS_TRUE;
3987 }
3988
3989 #define PUSH_STATE_STACK(data)                                                \
3990     JS_BEGIN_MACRO                                                            \
3991         ++(data)->stateStackTop;                                              \
3992         if ((data)->stateStackTop == (data)->stateStackLimit &&               \
3993             !ReallocStateStack((data))) {                                     \
3994             return NULL;                                                      \
3995         }                                                                     \
3996     JS_END_MACRO
3997
3998 /*
3999  * Apply the current op against the given input to see if it's going to match
4000  * or fail. Return false if we don't get a match, true if we do. If updatecp is
4001  * true, then update the current state's cp. Always update startpc to the next
4002  * op.
4003  */
4004 static JS_ALWAYS_INLINE REMatchState *
4005 SimpleMatch(REGlobalData *gData, REMatchState *x, REOp op,
4006             jsbytecode **startpc, JSBool updatecp)
4007 {
4008     REMatchState *result = NULL;
4009     jschar matchCh;
4010     size_t parenIndex;
4011     size_t offset, length, index;
4012     jsbytecode *pc = *startpc;  /* pc has already been incremented past op */
4013     jschar *source;
4014     const jschar *startcp = x->cp;
4015     jschar ch;
4016     RECharSet *charSet;
4017
4018 #ifdef REGEXP_DEBUG
4019     const char *opname = reop_names[op];
4020     re_debug("\n%06d: %*s%s", pc - gData->regexp->program,
4021              gData->stateStackTop * 2, "", opname);
4022 #endif
4023     switch (op) {
4024       case REOP_EMPTY:
4025         result = x;
4026         break;
4027       case REOP_BOL:
4028         if (x->cp != gData->cpbegin) {
4029             if (!gData->cx->regExpStatics.multiline &&
4030                 !(gData->regexp->flags & JSREG_MULTILINE)) {
4031                 break;
4032             }
4033             if (!RE_IS_LINE_TERM(x->cp[-1]))
4034                 break;
4035         }
4036         result = x;
4037         break;
4038       case REOP_EOL:
4039         if (x->cp != gData->cpend) {
4040             if (!gData->cx->regExpStatics.multiline &&
4041                 !(gData->regexp->flags & JSREG_MULTILINE)) {
4042                 break;
4043             }
4044             if (!RE_IS_LINE_TERM(*x->cp))
4045                 break;
4046         }
4047         result = x;
4048         break;
4049       case REOP_WBDRY:
4050         if ((x->cp == gData->cpbegin || !JS_ISWORD(x->cp[-1])) ^
4051             !(x->cp != gData->cpend && JS_ISWORD(*x->cp))) {
4052             result = x;
4053         }
4054         break;
4055       case REOP_WNONBDRY:
4056         if ((x->cp == gData->cpbegin || !JS_ISWORD(x->cp[-1])) ^
4057             (x->cp != gData->cpend && JS_ISWORD(*x->cp))) {
4058             result = x;
4059         }
4060         break;
4061       case REOP_DOT:
4062         if (x->cp != gData->cpend && !RE_IS_LINE_TERM(*x->cp)) {
4063             result = x;
4064             result->cp++;
4065         }
4066         break;
4067       case REOP_DIGIT:
4068         if (x->cp != gData->cpend && JS7_ISDEC(*x->cp)) {
4069             result = x;
4070             result->cp++;
4071         }
4072         break;
4073       case REOP_NONDIGIT:
4074         if (x->cp != gData->cpend && !JS7_ISDEC(*x->cp)) {
4075             result = x;
4076             result->cp++;
4077         }
4078         break;
4079       case REOP_ALNUM:
4080         if (x->cp != gData->cpend && JS_ISWORD(*x->cp)) {
4081             result = x;
4082             result->cp++;
4083         }
4084         break;
4085       case REOP_NONALNUM:
4086         if (x->cp != gData->cpend && !JS_ISWORD(*x->cp)) {
4087             result = x;
4088             result->cp++;
4089         }
4090         break;
4091       case REOP_SPACE:
4092         if (x->cp != gData->cpend && JS_ISSPACE(*x->cp)) {
4093             result = x;
4094             result->cp++;
4095         }
4096         break;
4097       case REOP_NONSPACE:
4098         if (x->cp != gData->cpend && !JS_ISSPACE(*x->cp)) {
4099             result = x;
4100             result->cp++;
4101         }
4102         break;
4103       case REOP_BACKREF:
4104         pc = ReadCompactIndex(pc, &parenIndex);
4105         JS_ASSERT(parenIndex < gData->regexp->parenCount);
4106         result = BackrefMatcher(gData, x, parenIndex);
4107         break;
4108       case REOP_FLAT:
4109         pc = ReadCompactIndex(pc, &offset);
4110         JS_ASSERT(offset < gData->regexp->source->length());
4111         pc = ReadCompactIndex(pc, &length);
4112         JS_ASSERT(1 <= length);
4113         JS_ASSERT(length <= gData->regexp->source->length() - offset);
4114         if (length <= (size_t)(gData->cpend - x->cp)) {
4115             source = gData->regexp->source->chars() + offset;
4116             re_debug_chars(source, length);
4117             for (index = 0; index != length; index++) {
4118                 if (source[index] != x->cp[index])
4119                     return NULL;
4120             }
4121             x->cp += length;
4122             result = x;
4123         }
4124         break;
4125       case REOP_FLAT1:
4126         matchCh = *pc++;
4127         re_debug(" '%c' == '%c'", (char)matchCh, (char)*x->cp);
4128         if (x->cp != gData->cpend && *x->cp == matchCh) {
4129             result = x;
4130             result->cp++;
4131         }
4132         break;
4133       case REOP_FLATi:
4134         pc = ReadCompactIndex(pc, &offset);
4135         JS_ASSERT(offset < gData->regexp->source->length());
4136         pc = ReadCompactIndex(pc, &length);
4137         JS_ASSERT(1 <= length);
4138         JS_ASSERT(length <= gData->regexp->source->length() - offset);
4139         source = gData->regexp->source->chars();
4140         result = FlatNIMatcher(gData, x, source + offset, length);
4141         break;
4142       case REOP_FLAT1i:
4143         matchCh = *pc++;
4144         if (x->cp != gData->cpend && upcase(*x->cp) == upcase(matchCh)) {
4145             result = x;
4146             result->cp++;
4147         }
4148         break;
4149       case REOP_UCFLAT1:
4150         matchCh = GET_ARG(pc);
4151         re_debug(" '%c' == '%c'", (char)matchCh, (char)*x->cp);
4152         pc += ARG_LEN;
4153         if (x->cp != gData->cpend && *x->cp == matchCh) {
4154             result = x;
4155             result->cp++;
4156         }
4157         break;
4158       case REOP_UCFLAT1i:
4159         matchCh = GET_ARG(pc);
4160         pc += ARG_LEN;
4161         if (x->cp != gData->cpend && upcase(*x->cp) == upcase(matchCh)) {
4162             result = x;
4163             result->cp++;
4164         }
4165         break;
4166       case REOP_CLASS:
4167         pc = ReadCompactIndex(pc, &index);
4168         JS_ASSERT(index < gData->regexp->classCount);
4169         if (x->cp != gData->cpend) {
4170             charSet = &gData->regexp->classList[index];
4171             JS_ASSERT(charSet->converted);
4172             ch = *x->cp;
4173             index = ch >> 3;
4174             if (ch <= charSet->length &&
4175                 (charSet->u.bits[index] & (1 << (ch & 0x7)))) {
4176                 result = x;
4177                 result->cp++;
4178             }
4179         }
4180         break;
4181       case REOP_NCLASS:
4182         pc = ReadCompactIndex(pc, &index);
4183         JS_ASSERT(index < gData->regexp->classCount);
4184         if (x->cp != gData->cpend) {
4185             charSet = &gData->regexp->classList[index];
4186             JS_ASSERT(charSet->converted);
4187             ch = *x->cp;
4188             index = ch >> 3;
4189             if (ch > charSet->length ||
4190                 !(charSet->u.bits[index] & (1 << (ch & 0x7)))) {
4191                 result = x;
4192                 result->cp++;
4193             }
4194         }
4195         break;
4196
4197       default:
4198         JS_ASSERT(JS_FALSE);
4199     }
4200     if (result) {
4201         if (!updatecp)
4202             x->cp = startcp;
4203         *startpc = pc;
4204         re_debug(" * ");
4205         return result;
4206     }
4207     x->cp = startcp;
4208     return NULL;
4209 }
4210
4211 static JS_ALWAYS_INLINE REMatchState *
4212 ExecuteREBytecode(REGlobalData *gData, REMatchState *x)
4213 {
4214     REMatchState *result = NULL;
4215     REBackTrackData *backTrackData;
4216     jsbytecode *nextpc, *testpc;
4217     REOp nextop;
4218     RECapture *cap;
4219     REProgState *curState;
4220     const jschar *startcp;
4221     size_t parenIndex, k;
4222     size_t parenSoFar = 0;
4223
4224     jschar matchCh1, matchCh2;
4225     RECharSet *charSet;
4226
4227     JSBool anchor;
4228     jsbytecode *pc = gData->regexp->program;
4229     REOp op = (REOp) *pc++;
4230
4231     /*
4232      * If the first node is a simple match, step the index into the string
4233      * until that match is made, or fail if it can't be found at all.
4234      */
4235     if (REOP_IS_SIMPLE(op) && !(gData->regexp->flags & JSREG_STICKY)) {
4236         anchor = JS_FALSE;
4237         while (x->cp <= gData->cpend) {
4238             nextpc = pc;    /* reset back to start each time */
4239             result = SimpleMatch(gData, x, op, &nextpc, JS_TRUE);
4240             if (result) {
4241                 anchor = JS_TRUE;
4242                 x = result;
4243                 pc = nextpc;    /* accept skip to next opcode */
4244                 op = (REOp) *pc++;
4245                 JS_ASSERT(op < REOP_LIMIT);
4246                 break;
4247             }
4248             gData->skipped++;
4249             x->cp++;
4250         }
4251         if (!anchor)
4252             goto bad;
4253     }
4254
4255     for (;;) {
4256 #ifdef REGEXP_DEBUG
4257         const char *opname = reop_names[op];
4258         re_debug("\n%06d: %*s%s", pc - gData->regexp->program,
4259                  gData->stateStackTop * 2, "", opname);
4260 #endif
4261         if (REOP_IS_SIMPLE(op)) {
4262             result = SimpleMatch(gData, x, op, &pc, JS_TRUE);
4263         } else {
4264             curState = &gData->stateStack[gData->stateStackTop];
4265             switch (op) {
4266               case REOP_END:
4267                 goto good;
4268               case REOP_ALTPREREQ2:
4269                 nextpc = pc + GET_OFFSET(pc);   /* start of next op */
4270                 pc += ARG_LEN;
4271                 matchCh2 = GET_ARG(pc);
4272                 pc += ARG_LEN;
4273                 k = GET_ARG(pc);
4274                 pc += ARG_LEN;
4275
4276                 if (x->cp != gData->cpend) {
4277                     if (*x->cp == matchCh2)
4278                         goto doAlt;
4279
4280                     charSet = &gData->regexp->classList[k];
4281                     if (!charSet->converted && !MatcherProcessCharSet(gData, charSet))
4282                         goto bad;
4283                     matchCh1 = *x->cp;
4284                     k = matchCh1 >> 3;
4285                     if ((matchCh1 > charSet->length ||
4286                          !(charSet->u.bits[k] & (1 << (matchCh1 & 0x7)))) ^
4287                         charSet->sense) {
4288                         goto doAlt;
4289                     }
4290                 }
4291                 result = NULL;
4292                 break;
4293
4294               case REOP_ALTPREREQ:
4295                 nextpc = pc + GET_OFFSET(pc);   /* start of next op */
4296                 pc += ARG_LEN;
4297                 matchCh1 = GET_ARG(pc);
4298                 pc += ARG_LEN;
4299                 matchCh2 = GET_ARG(pc);
4300                 pc += ARG_LEN;
4301                 if (x->cp == gData->cpend ||
4302                     (*x->cp != matchCh1 && *x->cp != matchCh2)) {
4303                     result = NULL;
4304                     break;
4305                 }
4306                 /* else false thru... */
4307
4308               case REOP_ALT:
4309               doAlt:
4310                 nextpc = pc + GET_OFFSET(pc);   /* start of next alternate */
4311                 pc += ARG_LEN;                  /* start of this alternate */
4312                 curState->parenSoFar = parenSoFar;
4313                 PUSH_STATE_STACK(gData);
4314                 op = (REOp) *pc++;
4315                 startcp = x->cp;
4316                 if (REOP_IS_SIMPLE(op)) {
4317                     if (!SimpleMatch(gData, x, op, &pc, JS_TRUE)) {
4318                         op = (REOp) *nextpc++;
4319                         pc = nextpc;
4320                         continue;
4321                     }
4322                     result = x;
4323                     op = (REOp) *pc++;
4324                 }
4325                 nextop = (REOp) *nextpc++;
4326                 if (!PushBackTrackState(gData, nextop, nextpc, x, startcp, 0, 0))
4327                     goto bad;
4328                 continue;
4329
4330               /*
4331                * Occurs at (successful) end of REOP_ALT,
4332                */
4333               case REOP_JUMP:
4334                 /*
4335                  * If we have not gotten a result here, it is because of an
4336                  * empty match.  Do the same thing REOP_EMPTY would do.
4337                  */
4338                 if (!result)
4339                     result = x;
4340
4341                 --gData->stateStackTop;
4342                 pc += GET_OFFSET(pc);
4343                 op = (REOp) *pc++;
4344                 continue;
4345
4346               /*
4347                * Occurs at last (successful) end of REOP_ALT,
4348                */
4349               case REOP_ENDALT:
4350                 /*
4351                  * If we have not gotten a result here, it is because of an
4352                  * empty match.  Do the same thing REOP_EMPTY would do.
4353                  */
4354                 if (!result)
4355                     result = x;
4356
4357                 --gData->stateStackTop;
4358                 op = (REOp) *pc++;
4359                 continue;
4360
4361               case REOP_LPAREN:
4362                 pc = ReadCompactIndex(pc, &parenIndex);
4363                 re_debug("[ %lu ]", (unsigned long) parenIndex);
4364                 JS_ASSERT(parenIndex < gData->regexp->parenCount);
4365                 if (parenIndex + 1 > parenSoFar)
4366                     parenSoFar = parenIndex + 1;
4367                 x->parens[parenIndex].index = x->cp - gData->cpbegin;
4368                 x->parens[parenIndex].length = 0;
4369                 op = (REOp) *pc++;
4370                 continue;
4371
4372               case REOP_RPAREN:
4373               {
4374                 ptrdiff_t delta;
4375
4376                 pc = ReadCompactIndex(pc, &parenIndex);
4377                 JS_ASSERT(parenIndex < gData->regexp->parenCount);
4378                 cap = &x->parens[parenIndex];
4379                 delta = x->cp - (gData->cpbegin + cap->index);
4380                 cap->length = (delta < 0) ? 0 : (size_t) delta;
4381                 op = (REOp) *pc++;
4382
4383                 if (!result)
4384                     result = x;
4385                 continue;
4386               }
4387               case REOP_ASSERT:
4388                 nextpc = pc + GET_OFFSET(pc);  /* start of term after ASSERT */
4389                 pc += ARG_LEN;                 /* start of ASSERT child */
4390                 op = (REOp) *pc++;
4391                 testpc = pc;
4392                 if (REOP_IS_SIMPLE(op) &&
4393                     !SimpleMatch(gData, x, op, &testpc, JS_FALSE)) {
4394                     result = NULL;
4395                     break;
4396                 }
4397                 curState->u.assertion.top =
4398                     (char *)gData->backTrackSP - (char *)gData->backTrackStack;
4399                 curState->u.assertion.sz = gData->cursz;
4400                 curState->index = x->cp - gData->cpbegin;
4401                 curState->parenSoFar = parenSoFar;
4402                 PUSH_STATE_STACK(gData);
4403                 if (!PushBackTrackState(gData, REOP_ASSERTTEST,
4404                                         nextpc, x, x->cp, 0, 0)) {
4405                     goto bad;
4406                 }
4407                 continue;
4408
4409               case REOP_ASSERT_NOT:
4410                 nextpc = pc + GET_OFFSET(pc);
4411                 pc += ARG_LEN;
4412                 op = (REOp) *pc++;
4413                 testpc = pc;
4414                 if (REOP_IS_SIMPLE(op) /* Note - fail to fail! */ &&
4415                     SimpleMatch(gData, x, op, &testpc, JS_FALSE) &&
4416                     *testpc == REOP_ASSERTNOTTEST) {
4417                     result = NULL;
4418                     break;
4419                 }
4420                 curState->u.assertion.top
4421                     = (char *)gData->backTrackSP -
4422                       (char *)gData->backTrackStack;
4423                 curState->u.assertion.sz = gData->cursz;
4424                 curState->index = x->cp - gData->cpbegin;
4425                 curState->parenSoFar = parenSoFar;
4426                 PUSH_STATE_STACK(gData);
4427                 if (!PushBackTrackState(gData, REOP_ASSERTNOTTEST,
4428                                         nextpc, x, x->cp, 0, 0)) {
4429                     goto bad;
4430                 }
4431                 continue;
4432
4433               case REOP_ASSERTTEST:
4434                 --gData->stateStackTop;
4435                 --curState;
4436                 x->cp = gData->cpbegin + curState->index;
4437                 gData->backTrackSP =
4438                     (REBackTrackData *) ((char *)gData->backTrackStack +
4439                                          curState->u.assertion.top);
4440                 gData->cursz = curState->u.assertion.sz;
4441                 if (result)
4442                     result = x;
4443                 break;
4444
4445               case REOP_ASSERTNOTTEST:
4446                 --gData->stateStackTop;
4447                 --curState;
4448                 x->cp = gData->cpbegin + curState->index;
4449                 gData->backTrackSP =
4450                     (REBackTrackData *) ((char *)gData->backTrackStack +
4451                                          curState->u.assertion.top);
4452                 gData->cursz = curState->u.assertion.sz;
4453                 result = (!result) ? x : NULL;
4454                 break;
4455               case REOP_STAR:
4456                 curState->u.quantifier.min = 0;
4457                 curState->u.quantifier.max = (uintN)-1;
4458                 goto quantcommon;
4459               case REOP_PLUS:
4460                 curState->u.quantifier.min = 1;
4461                 curState->u.quantifier.max = (uintN)-1;
4462                 goto quantcommon;
4463               case REOP_OPT:
4464                 curState->u.quantifier.min = 0;
4465                 curState->u.quantifier.max = 1;
4466                 goto quantcommon;
4467               case REOP_QUANT:
4468                 pc = ReadCompactIndex(pc, &k);
4469                 curState->u.quantifier.min = k;
4470                 pc = ReadCompactIndex(pc, &k);
4471                 /* max is k - 1 to use one byte for (uintN)-1 sentinel. */
4472                 curState->u.quantifier.max = k - 1;
4473                 JS_ASSERT(curState->u.quantifier.min
4474                           <= curState->u.quantifier.max);
4475               quantcommon:
4476                 if (curState->u.quantifier.max == 0) {
4477                     pc = pc + GET_OFFSET(pc);
4478                     op = (REOp) *pc++;
4479                     result = x;
4480                     continue;
4481                 }
4482                 /* Step over <next> */
4483                 nextpc = pc + ARG_LEN;
4484                 op = (REOp) *nextpc++;
4485                 startcp = x->cp;
4486                 if (REOP_IS_SIMPLE(op)) {
4487                     if (!SimpleMatch(gData, x, op, &nextpc, JS_TRUE)) {
4488                         if (curState->u.quantifier.min == 0)
4489                             result = x;
4490                         else
4491                             result = NULL;
4492                         pc = pc + GET_OFFSET(pc);
4493                         break;
4494                     }
4495                     op = (REOp) *nextpc++;
4496                     result = x;
4497                 }
4498                 curState->index = startcp - gData->cpbegin;
4499                 curState->continue_op = REOP_REPEAT;
4500                 curState->continue_pc = pc;
4501                 curState->parenSoFar = parenSoFar;
4502                 PUSH_STATE_STACK(gData);
4503                 if (curState->u.quantifier.min == 0 &&
4504                     !PushBackTrackState(gData, REOP_REPEAT, pc, x, startcp,
4505                                         0, 0)) {
4506                     goto bad;
4507                 }
4508                 pc = nextpc;
4509                 continue;
4510
4511               case REOP_ENDCHILD: /* marks the end of a quantifier child */
4512                 pc = curState[-1].continue_pc;
4513                 op = (REOp) curState[-1].continue_op;
4514
4515                 if (!result)
4516                     result = x;
4517                 continue;
4518
4519               case REOP_REPEAT:
4520                 --curState;
4521                 do {
4522                     --gData->stateStackTop;
4523                     if (!result) {
4524                         /* Failed, see if we have enough children. */
4525                         if (curState->u.quantifier.min == 0)
4526                             goto repeatDone;
4527                         goto break_switch;
4528                     }
4529                     if (curState->u.quantifier.min == 0 &&
4530                         x->cp == gData->cpbegin + curState->index) {
4531                         /* matched an empty string, that'll get us nowhere */
4532                         result = NULL;
4533                         goto break_switch;
4534                     }
4535                     if (curState->u.quantifier.min != 0)
4536                         curState->u.quantifier.min--;
4537                     if (curState->u.quantifier.max != (uintN) -1)
4538                         curState->u.quantifier.max--;
4539                     if (curState->u.quantifier.max == 0)
4540                         goto repeatDone;
4541                     nextpc = pc + ARG_LEN;
4542                     nextop = (REOp) *nextpc;
4543                     startcp = x->cp;
4544                     if (REOP_IS_SIMPLE(nextop)) {
4545                         nextpc++;
4546                         if (!SimpleMatch(gData, x, nextop, &nextpc, JS_TRUE)) {
4547                             if (curState->u.quantifier.min == 0)
4548                                 goto repeatDone;
4549                             result = NULL;
4550                             goto break_switch;
4551                         }
4552                         result = x;
4553                     }
4554                     curState->index = startcp - gData->cpbegin;
4555                     PUSH_STATE_STACK(gData);
4556                     if (curState->u.quantifier.min == 0 &&
4557                         !PushBackTrackState(gData, REOP_REPEAT,
4558                                             pc, x, startcp,
4559                                             curState->parenSoFar,
4560                                             parenSoFar -
4561                                             curState->parenSoFar)) {
4562                         goto bad;
4563                     }
4564                 } while (*nextpc == REOP_ENDCHILD);
4565                 pc = nextpc;
4566                 op = (REOp) *pc++;
4567                 parenSoFar = curState->parenSoFar;
4568                 continue;
4569
4570               repeatDone:
4571                 result = x;
4572                 pc += GET_OFFSET(pc);
4573                 goto break_switch;
4574
4575               case REOP_MINIMALSTAR:
4576                 curState->u.quantifier.min = 0;
4577                 curState->u.quantifier.max = (uintN)-1;
4578                 goto minimalquantcommon;
4579               case REOP_MINIMALPLUS:
4580                 curState->u.quantifier.min = 1;
4581                 curState->u.quantifier.max = (uintN)-1;
4582                 goto minimalquantcommon;
4583               case REOP_MINIMALOPT:
4584                 curState->u.quantifier.min = 0;
4585                 curState->u.quantifier.max = 1;
4586                 goto minimalquantcommon;
4587               case REOP_MINIMALQUANT:
4588                 pc = ReadCompactIndex(pc, &k);
4589                 curState->u.quantifier.min = k;
4590                 pc = ReadCompactIndex(pc, &k);
4591                 /* See REOP_QUANT comments about k - 1. */
4592                 curState->u.quantifier.max = k - 1;
4593                 JS_ASSERT(curState->u.quantifier.min
4594                           <= curState->u.quantifier.max);
4595               minimalquantcommon:
4596                 curState->index = x->cp - gData->cpbegin;
4597                 curState->parenSoFar = parenSoFar;
4598                 PUSH_STATE_STACK(gData);
4599                 if (curState->u.quantifier.min != 0) {
4600                     curState->continue_op = REOP_MINIMALREPEAT;
4601                     curState->continue_pc = pc;
4602                     /* step over <next> */
4603                     pc += OFFSET_LEN;
4604                     op = (REOp) *pc++;
4605                 } else {
4606                     if (!PushBackTrackState(gData, REOP_MINIMALREPEAT,
4607                                             pc, x, x->cp, 0, 0)) {
4608                         goto bad;
4609                     }
4610                     --gData->stateStackTop;
4611                     pc = pc + GET_OFFSET(pc);
4612                     op = (REOp) *pc++;
4613                 }
4614                 continue;
4615
4616               case REOP_MINIMALREPEAT:
4617                 --gData->stateStackTop;
4618                 --curState;
4619
4620                 re_debug("{%d,%d}", curState->u.quantifier.min,
4621                          curState->u.quantifier.max);
4622 #define PREPARE_REPEAT()                                                      \
4623     JS_BEGIN_MACRO                                                            \
4624         curState->index = x->cp - gData->cpbegin;                             \
4625         curState->continue_op = REOP_MINIMALREPEAT;                           \
4626         curState->continue_pc = pc;                                           \
4627         pc += ARG_LEN;                                                        \
4628         for (k = curState->parenSoFar; k < parenSoFar; k++)                   \
4629             x->parens[k].index = -1;                                          \
4630         PUSH_STATE_STACK(gData);                                              \
4631         op = (REOp) *pc++;                                                    \
4632         JS_ASSERT(op < REOP_LIMIT);                                           \
4633     JS_END_MACRO
4634
4635                 if (!result) {
4636                     re_debug(" - ");
4637                     /*
4638                      * Non-greedy failure - try to consume another child.
4639                      */
4640                     if (curState->u.quantifier.max == (uintN) -1 ||
4641                         curState->u.quantifier.max > 0) {
4642                         PREPARE_REPEAT();
4643                         continue;
4644                     }
4645                     /* Don't need to adjust pc since we're going to pop. */
4646                     break;
4647                 }
4648                 if (curState->u.quantifier.min == 0 &&
4649                     x->cp == gData->cpbegin + curState->index) {
4650                     /* Matched an empty string, that'll get us nowhere. */
4651                     result = NULL;
4652                     break;
4653                 }
4654                 if (curState->u.quantifier.min != 0)
4655                     curState->u.quantifier.min--;
4656                 if (curState->u.quantifier.max != (uintN) -1)
4657                     curState->u.quantifier.max--;
4658                 if (curState->u.quantifier.min != 0) {
4659                     PREPARE_REPEAT();
4660                     continue;
4661                 }
4662                 curState->index = x->cp - gData->cpbegin;
4663                 curState->parenSoFar = parenSoFar;
4664                 PUSH_STATE_STACK(gData);
4665                 if (!PushBackTrackState(gData, REOP_MINIMALREPEAT,
4666                                         pc, x, x->cp,
4667                                         curState->parenSoFar,
4668                                         parenSoFar - curState->parenSoFar)) {
4669                     goto bad;
4670                 }
4671                 --gData->stateStackTop;
4672                 pc = pc + GET_OFFSET(pc);
4673                 op = (REOp) *pc++;
4674                 JS_ASSERT(op < REOP_LIMIT);
4675                 continue;
4676               default:
4677                 JS_ASSERT(JS_FALSE);
4678                 result = NULL;
4679             }
4680           break_switch:;
4681         }
4682
4683         /*
4684          *  If the match failed and there's a backtrack option, take it.
4685          *  Otherwise this is a complete and utter failure.
4686          */
4687         if (!result) {
4688             if (gData->cursz == 0)
4689                 return NULL;
4690             if (!JS_CHECK_OPERATION_LIMIT(gData->cx)) {
4691                 gData->ok = JS_FALSE;
4692                 return NULL;
4693             }
4694
4695             /* Potentially detect explosive regex here. */
4696             gData->backTrackCount++;
4697             if (gData->backTrackLimit &&
4698                 gData->backTrackCount >= gData->backTrackLimit) {
4699                 JS_ReportErrorNumber(gData->cx, js_GetErrorMessage, NULL,
4700                                      JSMSG_REGEXP_TOO_COMPLEX);
4701                 gData->ok = JS_FALSE;
4702                 return NULL;
4703             }
4704
4705             backTrackData = gData->backTrackSP;
4706             gData->cursz = backTrackData->sz;
4707             gData->backTrackSP =
4708                 (REBackTrackData *) ((char *)backTrackData - backTrackData->sz);
4709             x->cp = backTrackData->cp;
4710             pc = backTrackData->backtrack_pc;
4711             op = (REOp) backTrackData->backtrack_op;
4712             JS_ASSERT(op < REOP_LIMIT);
4713             gData->stateStackTop = backTrackData->saveStateStackTop;
4714             JS_ASSERT(gData->stateStackTop);
4715
4716             memcpy(gData->stateStack, backTrackData + 1,
4717                    sizeof(REProgState) * backTrackData->saveStateStackTop);
4718             curState = &gData->stateStack[gData->stateStackTop - 1];
4719
4720             if (backTrackData->parenCount) {
4721                 memcpy(&x->parens[backTrackData->parenIndex],
4722                        (char *)(backTrackData + 1) +
4723                        sizeof(REProgState) * backTrackData->saveStateStackTop,
4724                        sizeof(RECapture) * backTrackData->parenCount);
4725                 parenSoFar = backTrackData->parenIndex + backTrackData->parenCount;
4726             } else {
4727                 for (k = curState->parenSoFar; k < parenSoFar; k++)
4728                     x->parens[k].index = -1;
4729                 parenSoFar = curState->parenSoFar;
4730             }
4731
4732             re_debug("\tBT_Pop: %ld,%ld",
4733                      (unsigned long) backTrackData->parenIndex,
4734                      (unsigned long) backTrackData->parenCount);
4735             continue;
4736         }
4737         x = result;
4738
4739         /*
4740          *  Continue with the expression.
4741          */
4742         op = (REOp)*pc++;
4743         JS_ASSERT(op < REOP_LIMIT);
4744     }
4745
4746 bad:
4747     re_debug("\n");
4748     return NULL;
4749
4750 good:
4751     re_debug("\n");
4752     return x;
4753 }
4754
4755 static REMatchState *
4756 MatchRegExp(REGlobalData *gData, REMatchState *x)
4757 {
4758     const jschar *cpOrig = x->cp;
4759
4760 #ifdef JS_TRACER
4761     NativeRegExp native;
4762
4763     /* Run with native regexp if possible. */
4764     if (TRACING_ENABLED(gData->cx) &&
4765         !(gData->regexp->flags & JSREG_NOCOMPILE) &&
4766         (native = GetNativeRegExp(gData->cx, gData->regexp))) {
4767
4768         /*
4769          * For efficient native execution, store offset as a direct pointer into
4770          * the buffer and convert back after execution finishes.
4771          */
4772         gData->skipped = (ptrdiff_t)cpOrig;
4773
4774 #ifdef JS_JIT_SPEW
4775         debug_only_stmt({
4776             VOUCH_DOES_NOT_REQUIRE_STACK();
4777             JSStackFrame *caller = (JS_ON_TRACE(gData->cx))
4778                                    ? NULL
4779                                    : js_GetScriptedCaller(gData->cx, NULL);
4780             debug_only_printf(LC_TMRegexp,
4781                               "entering REGEXP trace at %s:%u@%u, code: %p\n",
4782                               caller ? caller->script->filename : "<unknown>",
4783                               caller ? js_FramePCToLineNumber(gData->cx, caller) : 0,
4784                               caller ? FramePCOffset(caller) : 0,
4785                               JS_FUNC_TO_DATA_PTR(void *, native));
4786         })
4787 #endif
4788
4789         void *result;
4790 #if defined(JS_NO_FASTCALL) && defined(NANOJIT_IA32)
4791         /*
4792          * Although a NativeRegExp takes one argument and SIMULATE_FASTCALL is
4793          * passing two, the second goes into 'edx' and can safely be ignored.
4794          */
4795         SIMULATE_FASTCALL(result, gData, gData->cpend, native);
4796 #else
4797         result = native(gData, gData->cpend);
4798 #endif
4799         debug_only_print0(LC_TMRegexp, "leaving REGEXP trace\n");
4800         if (!result)
4801             return NULL;
4802
4803         /* Restore REGlobalData::skipped and fill REMatchState. */
4804         x->cp = (const jschar *)gData->stateStack;
4805         gData->skipped = (const jschar *)gData->skipped - cpOrig;
4806         return x;
4807     }
4808 #endif
4809
4810     /*
4811      * Have to include the position beyond the last character
4812      * in order to detect end-of-input/line condition.
4813      */
4814     for (const jschar *p = cpOrig; p <= gData->cpend; p++) {
4815         gData->skipped = p - cpOrig;
4816         x->cp = p;
4817         for (uintN j = 0; j < gData->regexp->parenCount; j++)
4818             x->parens[j].index = -1;
4819         REMatchState *result = ExecuteREBytecode(gData, x);
4820         if (!gData->ok || result || (gData->regexp->flags & JSREG_STICKY))
4821             return result;
4822         gData->backTrackSP = gData->backTrackStack;
4823         gData->cursz = 0;
4824         gData->stateStackTop = 0;
4825         p = cpOrig + gData->skipped;
4826     }
4827     return NULL;
4828 }
4829
4830 #define MIN_BACKTRACK_LIMIT 400000
4831
4832 static REMatchState *
4833 InitMatch(JSContext *cx, REGlobalData *gData, JSRegExp *re, size_t length)
4834 {
4835     REMatchState *result;
4836     uintN i;
4837
4838     gData->backTrackStackSize = INITIAL_BACKTRACK;
4839     JSArenaPool &regexpPool = cx->regexpPool;
4840     regexpPool.allocateCast<REBackTrackData *>(gData->backTrackStack, INITIAL_BACKTRACK);
4841     if (!gData->backTrackStack)
4842         goto bad;
4843
4844     gData->backTrackSP = gData->backTrackStack;
4845     gData->cursz = 0;
4846     gData->backTrackCount = 0;
4847     gData->backTrackLimit = 0;
4848     if (JS_GetOptions(cx) & JSOPTION_RELIMIT) {
4849         gData->backTrackLimit = length * length * length; /* O(n^3) */
4850         if (gData->backTrackLimit < MIN_BACKTRACK_LIMIT)
4851             gData->backTrackLimit = MIN_BACKTRACK_LIMIT;
4852     }
4853
4854     gData->stateStackLimit = INITIAL_STATESTACK;
4855     regexpPool.allocateCast<REProgState *>(gData->stateStack,
4856                                            sizeof(REProgState) * INITIAL_STATESTACK);
4857     if (!gData->stateStack)
4858         goto bad;
4859
4860     gData->stateStackTop = 0;
4861     gData->cx = cx;
4862     gData->regexp = re;
4863     gData->ok = JS_TRUE;
4864
4865     regexpPool.allocateCast<REMatchState *>(result, offsetof(REMatchState, parens)
4866                                             + re->parenCount * sizeof(RECapture));
4867     if (!result)
4868         goto bad;
4869
4870     for (i = 0; i < re->classCount; i++) {
4871         if (!re->classList[i].converted &&
4872             !MatcherProcessCharSet(gData, &re->classList[i])) {
4873             return NULL;
4874         }
4875     }
4876
4877     return result;
4878
4879 bad:
4880     js_ReportOutOfScriptQuota(cx);
4881     gData->ok = JS_FALSE;
4882     return NULL;
4883 }
4884
4885 JSBool
4886 js_ExecuteRegExp(JSContext *cx, JSRegExp *re, JSString *str, size_t *indexp,
4887                  JSBool test, jsval *rval)
4888 {
4889     REGlobalData gData;
4890     REMatchState *x, *result;
4891
4892     const jschar *cp, *ep;
4893     size_t i, length, start;
4894     JSBool ok;
4895     JSRegExpStatics *res;
4896     ptrdiff_t matchlen;
4897     uintN num;
4898     JSString *parstr, *matchstr;
4899     JSObject *obj;
4900
4901     RECapture *parsub = NULL;
4902     void *mark;
4903     int64 *timestamp;
4904
4905     /*
4906      * It's safe to load from cp because JSStrings have a zero at the end,
4907      * and we never let cp get beyond cpend.
4908      */
4909     start = *indexp;
4910     str->getCharsAndLength(cp, length);
4911     if (start > length)
4912         start = length;
4913     gData.cpbegin = cp;
4914     gData.cpend = cp + length;
4915     cp += start;
4916     gData.start = start;
4917     gData.skipped = 0;
4918
4919     if (!cx->regexpPool.getSecond()) {
4920         /*
4921          * The first arena in the regexpPool must have a timestamp at its base.
4922          */
4923         cx->regexpPool.allocateCast<int64 *>(timestamp, sizeof *timestamp);
4924         if (!timestamp)
4925             return JS_FALSE;
4926         *timestamp = JS_Now();
4927     }
4928     mark = cx->regexpPool.getMark();
4929
4930     x = InitMatch(cx, &gData, re, length);
4931
4932     if (!x) {
4933         ok = JS_FALSE;
4934         goto out;
4935     }
4936     x->cp = cp;
4937
4938     /*
4939      * Call the recursive matcher to do the real work.  Return null on mismatch
4940      * whether testing or not.  On match, return an extended Array object.
4941      */
4942     result = MatchRegExp(&gData, x);
4943     ok = gData.ok;
4944     if (!ok)
4945         goto out;
4946     if (!result) {
4947         *rval = JSVAL_NULL;
4948         goto out;
4949     }
4950     cp = result->cp;
4951     i = cp - gData.cpbegin;
4952     *indexp = i;
4953     matchlen = i - (start + gData.skipped);
4954     JS_ASSERT(matchlen >= 0);
4955     ep = cp;
4956     cp -= matchlen;
4957
4958     if (test) {
4959         /*
4960          * Testing for a match and updating cx->regExpStatics: don't allocate
4961          * an array object, do return true.
4962          */
4963         *rval = JSVAL_TRUE;
4964
4965         /* Avoid warning.  (gcc doesn't detect that obj is needed iff !test); */
4966         obj = NULL;
4967     } else {
4968         /*
4969          * The array returned on match has element 0 bound to the matched
4970          * string, elements 1 through state.parenCount bound to the paren
4971          * matches, an index property telling the length of the left context,
4972          * and an input property referring to the input string.
4973          */
4974         obj = js_NewSlowArrayObject(cx);
4975         if (!obj) {
4976             ok = JS_FALSE;
4977             goto out;
4978         }
4979         *rval = OBJECT_TO_JSVAL(obj);
4980
4981 #define DEFVAL(val, id) {                                                     \
4982     ok = js_DefineProperty(cx, obj, id, val,                                  \
4983                            JS_PropertyStub, JS_PropertyStub,                  \
4984                            JSPROP_ENUMERATE);                                 \
4985     if (!ok)                                                                  \
4986         goto out;                                                             \
4987 }
4988
4989         matchstr = js_NewDependentString(cx, str, cp - str->chars(),
4990                                          matchlen);
4991         if (!matchstr) {
4992             ok = JS_FALSE;
4993             goto out;
4994         }
4995         DEFVAL(STRING_TO_JSVAL(matchstr), INT_TO_JSID(0));
4996     }
4997
4998     res = &cx->regExpStatics;
4999     res->input = str;
5000     if (!res->parens.resize(re->parenCount)) {
5001         ok = JS_FALSE;
5002         goto out;
5003     }
5004     if (re->parenCount == 0) {
5005         res->lastParen = js_EmptySubString;
5006     } else {
5007         for (num = 0; num < re->parenCount; num++) {
5008             JSSubString *sub = &res->parens[num];
5009             parsub = &result->parens[num];
5010             if (parsub->index == -1) {
5011                 sub->chars = NULL;
5012                 sub->length = 0;
5013             } else {
5014                 sub->chars = gData.cpbegin + parsub->index;
5015                 sub->length = parsub->length;
5016             }
5017             if (test)
5018                 continue;
5019             if (parsub->index == -1) {
5020                 ok = js_DefineProperty(cx, obj, INT_TO_JSID(num + 1), JSVAL_VOID, NULL, NULL,
5021                                        JSPROP_ENUMERATE);
5022             } else {
5023                 parstr = js_NewDependentString(cx, str,
5024                                                gData.cpbegin + parsub->index -
5025                                                str->chars(),
5026                                                parsub->length);
5027                 if (!parstr) {
5028                     ok = JS_FALSE;
5029                     goto out;
5030                 }
5031                 ok = js_DefineProperty(cx, obj, INT_TO_JSID(num + 1), STRING_TO_JSVAL(parstr),
5032                                        NULL, NULL, JSPROP_ENUMERATE);
5033             }
5034             if (!ok)
5035                 goto out;
5036         }
5037         if (parsub->index == -1) {
5038             res->lastParen = js_EmptySubString;
5039         } else {
5040             res->lastParen.chars = gData.cpbegin + parsub->index;
5041             res->lastParen.length = parsub->length;
5042         }
5043     }
5044
5045     if (!test) {
5046         /*
5047          * Define the index and input properties last for better for/in loop
5048          * order (so they come after the elements).
5049          */
5050         DEFVAL(INT_TO_JSVAL(start + gData.skipped),
5051                ATOM_TO_JSID(cx->runtime->atomState.indexAtom));
5052         DEFVAL(STRING_TO_JSVAL(str),
5053                ATOM_TO_JSID(cx->runtime->atomState.inputAtom));
5054     }
5055
5056 #undef DEFVAL
5057
5058     res->lastMatch.chars = cp;
5059     res->lastMatch.length = matchlen;
5060
5061     /*
5062      * For JS1.3 and ECMAv2, emulate Perl5 exactly:
5063      *
5064      * js1.3        "hi", "hi there"            "hihitherehi therebye"
5065      */
5066     res->leftContext.chars = str->chars();
5067     res->leftContext.length = start + gData.skipped;
5068     res->rightContext.chars = ep;
5069     res->rightContext.length = gData.cpend - ep;
5070
5071 out:
5072     cx->regexpPool.release(mark);
5073     return ok;
5074 }
5075
5076 /************************************************************************/
5077
5078 static JSBool
5079 SetRegExpLastIndex(JSContext *cx, JSObject *obj, jsdouble lastIndex)
5080 {
5081     JS_ASSERT(obj->isRegExp());
5082     return JS_NewNumberValue(cx, lastIndex, obj->addressOfRegExpLastIndex());
5083 }
5084
5085 static JSBool
5086 regexp_getProperty(JSContext *cx, JSObject *obj, jsval id, jsval *vp)
5087 {
5088     jsint slot;
5089     JSRegExp *re;
5090
5091     if (!JSVAL_IS_INT(id))
5092         return JS_TRUE;
5093     while (obj->getClass() != &js_RegExpClass) {
5094         obj = obj->getProto();
5095         if (!obj)
5096             return JS_TRUE;
5097     }
5098     slot = JSVAL_TO_INT(id);
5099     if (slot == REGEXP_LAST_INDEX) {
5100         *vp = obj->getRegExpLastIndex();
5101         return JS_TRUE;
5102     }
5103
5104     JS_LOCK_OBJ(cx, obj);
5105     re = (JSRegExp *) obj->getPrivate();
5106     if (re) {
5107         switch (slot) {
5108           case REGEXP_SOURCE:
5109             *vp = STRING_TO_JSVAL(re->source);
5110             break;
5111           case REGEXP_GLOBAL:
5112             *vp = BOOLEAN_TO_JSVAL((re->flags & JSREG_GLOB) != 0);
5113             break;
5114           case REGEXP_IGNORE_CASE:
5115             *vp = BOOLEAN_TO_JSVAL((re->flags & JSREG_FOLD) != 0);
5116             break;
5117           case REGEXP_MULTILINE:
5118             *vp = BOOLEAN_TO_JSVAL((re->flags & JSREG_MULTILINE) != 0);
5119             break;
5120           case REGEXP_STICKY:
5121             *vp = BOOLEAN_TO_JSVAL((re->flags & JSREG_STICKY) != 0);
5122             break;
5123         }
5124     }
5125     JS_UNLOCK_OBJ(cx, obj);
5126     return JS_TRUE;
5127 }
5128
5129 static JSBool
5130 regexp_setProperty(JSContext *cx, JSObject *obj, jsval id, jsval *vp)
5131 {
5132     JSBool ok;
5133     jsint slot;
5134     jsdouble lastIndex;
5135
5136     ok = JS_TRUE;
5137     if (!JSVAL_IS_INT(id))
5138         return ok;
5139     while (obj->getClass() != &js_RegExpClass) {
5140         obj = obj->getProto();
5141         if (!obj)
5142             return JS_TRUE;
5143     }
5144     slot = JSVAL_TO_INT(id);
5145     if (slot == REGEXP_LAST_INDEX) {
5146         if (!JS_ValueToNumber(cx, *vp, &lastIndex))
5147             return JS_FALSE;
5148         lastIndex = js_DoubleToInteger(lastIndex);
5149         ok = SetRegExpLastIndex(cx, obj, lastIndex);
5150     }
5151     return ok;
5152 }
5153
5154 #define REGEXP_PROP_ATTRS     (JSPROP_PERMANENT | JSPROP_SHARED)
5155 #define RO_REGEXP_PROP_ATTRS  (REGEXP_PROP_ATTRS | JSPROP_READONLY)
5156
5157 #define G regexp_getProperty
5158 #define S regexp_setProperty
5159
5160 static JSPropertySpec regexp_props[] = {
5161     {"source",     REGEXP_SOURCE,      RO_REGEXP_PROP_ATTRS,G,S},
5162     {"global",     REGEXP_GLOBAL,      RO_REGEXP_PROP_ATTRS,G,S},
5163     {"ignoreCase", REGEXP_IGNORE_CASE, RO_REGEXP_PROP_ATTRS,G,S},
5164     {"lastIndex",  REGEXP_LAST_INDEX,  REGEXP_PROP_ATTRS,G,S},
5165     {"multiline",  REGEXP_MULTILINE,   RO_REGEXP_PROP_ATTRS,G,S},
5166     {"sticky",     REGEXP_STICKY,      RO_REGEXP_PROP_ATTRS,G,S},
5167     {0,0,0,0,0}
5168 };
5169
5170 #undef G
5171 #undef S
5172
5173 /*
5174  * RegExp class static properties and their Perl counterparts:
5175  *
5176  *  RegExp.input                $_
5177  *  RegExp.multiline            $*
5178  *  RegExp.lastMatch            $&
5179  *  RegExp.lastParen            $+
5180  *  RegExp.leftContext          $`
5181  *  RegExp.rightContext         $'
5182  */
5183 enum regexp_static_tinyid {
5184     REGEXP_STATIC_INPUT         = -1,
5185     REGEXP_STATIC_MULTILINE     = -2,
5186     REGEXP_STATIC_LAST_MATCH    = -3,
5187     REGEXP_STATIC_LAST_PAREN    = -4,
5188     REGEXP_STATIC_LEFT_CONTEXT  = -5,
5189     REGEXP_STATIC_RIGHT_CONTEXT = -6
5190 };
5191
5192 void
5193 js_InitRegExpStatics(JSContext *cx)
5194 {
5195     /*
5196      * To avoid multiple allocations in InitMatch(), the arena size parameter
5197      * should be at least as big as:
5198      *   INITIAL_BACKTRACK
5199      *   + (sizeof(REProgState) * INITIAL_STATESTACK)
5200      *   + (offsetof(REMatchState, parens) + avgParanSize * sizeof(RECapture))
5201      */
5202     cx->regexpPool.init("regexp", 12 * 1024 - 40,  /* FIXME: bug 421435 */
5203                         sizeof(void *), &cx->scriptStackQuota);
5204
5205     JS_ClearRegExpStatics(cx);
5206 }
5207
5208 JS_FRIEND_API(void)
5209 js_SaveAndClearRegExpStatics(JSContext *cx, JSRegExpStatics *statics,
5210                              AutoValueRooter *tvr)
5211 {
5212     statics->copy(cx->regExpStatics);
5213     if (statics->input)
5214         tvr->setString(statics->input);
5215     JS_ClearRegExpStatics(cx);
5216 }
5217
5218 JS_FRIEND_API(void)
5219 js_RestoreRegExpStatics(JSContext *cx, JSRegExpStatics *statics,
5220                         AutoValueRooter *tvr)
5221 {
5222     /* Clear/free any new JSRegExpStatics data before clobbering. */
5223     cx->regExpStatics.copy(*statics);
5224 }
5225
5226 void
5227 js_TraceRegExpStatics(JSTracer *trc, JSContext *acx)
5228 {
5229     JSRegExpStatics *res = &acx->regExpStatics;
5230
5231     if (res->input)
5232         JS_CALL_STRING_TRACER(trc, res->input, "res->input");
5233 }
5234
5235 void
5236 js_FreeRegExpStatics(JSContext *cx)
5237 {
5238     JS_ClearRegExpStatics(cx);
5239     cx->regexpPool.finish();
5240 }
5241
5242 static JSBool
5243 regexp_static_getProperty(JSContext *cx, JSObject *obj, jsval id, jsval *vp)
5244 {
5245     jsint slot;
5246     JSRegExpStatics *res;
5247     JSString *str;
5248     JSSubString *sub;
5249
5250     res = &cx->regExpStatics;
5251     if (!JSVAL_IS_INT(id))
5252         return JS_TRUE;
5253     slot = JSVAL_TO_INT(id);
5254     switch (slot) {
5255       case REGEXP_STATIC_INPUT:
5256         *vp = res->input ? STRING_TO_JSVAL(res->input)
5257                          : JS_GetEmptyStringValue(cx);
5258         return JS_TRUE;
5259       case REGEXP_STATIC_MULTILINE:
5260         *vp = BOOLEAN_TO_JSVAL(res->multiline);
5261         return JS_TRUE;
5262       case REGEXP_STATIC_LAST_MATCH:
5263         sub = &res->lastMatch;
5264         break;
5265       case REGEXP_STATIC_LAST_PAREN:
5266         sub = &res->lastParen;
5267         break;
5268       case REGEXP_STATIC_LEFT_CONTEXT:
5269         sub = &res->leftContext;
5270         break;
5271       case REGEXP_STATIC_RIGHT_CONTEXT:
5272         sub = &res->rightContext;
5273         break;
5274       default:
5275         sub = (size_t(slot) < res->parens.length()) ? &res->parens[slot] : &js_EmptySubString;
5276         break;
5277     }
5278     str = js_NewStringCopyN(cx, sub->chars, sub->length);
5279     if (!str)
5280         return JS_FALSE;
5281     *vp = STRING_TO_JSVAL(str);
5282     return JS_TRUE;
5283 }
5284
5285 static JSBool
5286 regexp_static_setProperty(JSContext *cx, JSObject *obj, jsval id, jsval *vp)
5287 {
5288     JSRegExpStatics *res;
5289
5290     if (!JSVAL_IS_INT(id))
5291         return JS_TRUE;
5292     res = &cx->regExpStatics;
5293     /* XXX use if-else rather than switch to keep MSVC1.52 from crashing */
5294     if (JSVAL_TO_INT(id) == REGEXP_STATIC_INPUT) {
5295         if (!JSVAL_IS_STRING(*vp) &&
5296             !JS_ConvertValue(cx, *vp, JSTYPE_STRING, vp)) {
5297             return JS_FALSE;
5298         }
5299         res->input = JSVAL_TO_STRING(*vp);
5300     } else if (JSVAL_TO_INT(id) == REGEXP_STATIC_MULTILINE) {
5301         if (!JSVAL_IS_BOOLEAN(*vp) &&
5302             !JS_ConvertValue(cx, *vp, JSTYPE_BOOLEAN, vp)) {
5303             return JS_FALSE;
5304         }
5305         res->multiline = JSVAL_TO_BOOLEAN(*vp);
5306     }
5307     return JS_TRUE;
5308 }
5309 #define REGEXP_STATIC_PROP_ATTRS    (REGEXP_PROP_ATTRS | JSPROP_ENUMERATE)
5310 #define RO_REGEXP_STATIC_PROP_ATTRS (REGEXP_STATIC_PROP_ATTRS | JSPROP_READONLY)
5311
5312 static JSPropertySpec regexp_static_props[] = {
5313     {"input",
5314      REGEXP_STATIC_INPUT,
5315      REGEXP_STATIC_PROP_ATTRS,
5316      regexp_static_getProperty,    regexp_static_setProperty},
5317     {"multiline",
5318      REGEXP_STATIC_MULTILINE,
5319      REGEXP_STATIC_PROP_ATTRS,
5320      regexp_static_getProperty,    regexp_static_setProperty},
5321     {"lastMatch",
5322      REGEXP_STATIC_LAST_MATCH,
5323      RO_REGEXP_STATIC_PROP_ATTRS,
5324      regexp_static_getProperty,    regexp_static_getProperty},
5325     {"lastParen",
5326      REGEXP_STATIC_LAST_PAREN,
5327      RO_REGEXP_STATIC_PROP_ATTRS,
5328      regexp_static_getProperty,    regexp_static_getProperty},
5329     {"leftContext",
5330      REGEXP_STATIC_LEFT_CONTEXT,
5331      RO_REGEXP_STATIC_PROP_ATTRS,
5332      regexp_static_getProperty,    regexp_static_getProperty},
5333     {"rightContext",
5334      REGEXP_STATIC_RIGHT_CONTEXT,
5335      RO_REGEXP_STATIC_PROP_ATTRS,
5336      regexp_static_getProperty,    regexp_static_getProperty},
5337
5338     /* XXX should have block scope and local $1, etc. */
5339     {"$1", 0, RO_REGEXP_STATIC_PROP_ATTRS,
5340      regexp_static_getProperty,    regexp_static_getProperty},
5341     {"$2", 1, RO_REGEXP_STATIC_PROP_ATTRS,
5342      regexp_static_getProperty,    regexp_static_getProperty},
5343     {"$3", 2, RO_REGEXP_STATIC_PROP_ATTRS,
5344      regexp_static_getProperty,    regexp_static_getProperty},
5345     {"$4", 3, RO_REGEXP_STATIC_PROP_ATTRS,
5346      regexp_static_getProperty,    regexp_static_getProperty},
5347     {"$5", 4, RO_REGEXP_STATIC_PROP_ATTRS,
5348      regexp_static_getProperty,    regexp_static_getProperty},
5349     {"$6", 5, RO_REGEXP_STATIC_PROP_ATTRS,
5350      regexp_static_getProperty,    regexp_static_getProperty},
5351     {"$7", 6, RO_REGEXP_STATIC_PROP_ATTRS,
5352      regexp_static_getProperty,    regexp_static_getProperty},
5353     {"$8", 7, RO_REGEXP_STATIC_PROP_ATTRS,
5354      regexp_static_getProperty,    regexp_static_getProperty},
5355     {"$9", 8, RO_REGEXP_STATIC_PROP_ATTRS,
5356      regexp_static_getProperty,    regexp_static_getProperty},
5357
5358     {0,0,0,0,0}
5359 };
5360
5361 static void
5362 regexp_finalize(JSContext *cx, JSObject *obj)
5363 {
5364     JSRegExp *re = (JSRegExp *) obj->getPrivate();
5365     if (!re)
5366         return;
5367     js_DestroyRegExp(cx, re);
5368 }
5369
5370 /* Forward static prototype. */
5371 static JSBool
5372 regexp_exec_sub(JSContext *cx, JSObject *obj, uintN argc, jsval *argv,
5373                 JSBool test, jsval *rval);
5374
5375 static JSBool
5376 regexp_call(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval)
5377 {
5378     return regexp_exec_sub(cx, JSVAL_TO_OBJECT(argv[-2]), argc, argv,
5379                            JS_FALSE, rval);
5380 }
5381
5382 #if JS_HAS_XDR
5383
5384 #include "jsxdrapi.h"
5385
5386 JSBool
5387 js_XDRRegExpObject(JSXDRState *xdr, JSObject **objp)
5388 {
5389     JSRegExp *re;
5390     JSString *source;
5391     uint32 flagsword;
5392     JSObject *obj;
5393
5394     if (xdr->mode == JSXDR_ENCODE) {
5395         re = (JSRegExp *) (*objp)->getPrivate();
5396         if (!re)
5397             return JS_FALSE;
5398         source = re->source;
5399         flagsword = (uint32)re->flags;
5400     }
5401     if (!JS_XDRString(xdr, &source) ||
5402         !JS_XDRUint32(xdr, &flagsword)) {
5403         return JS_FALSE;
5404     }
5405     if (xdr->mode == JSXDR_DECODE) {
5406         obj = NewObject(xdr->cx, &js_RegExpClass, NULL, NULL);
5407         if (!obj)
5408             return JS_FALSE;
5409         obj->clearParent();
5410         obj->clearProto();
5411         re = js_NewRegExp(xdr->cx, NULL, source, (uint8)flagsword, JS_FALSE);
5412         if (!re)
5413             return JS_FALSE;
5414         obj->setPrivate(re);
5415         obj->zeroRegExpLastIndex();
5416         *objp = obj;
5417     }
5418     return JS_TRUE;
5419 }
5420
5421 #else  /* !JS_HAS_XDR */
5422
5423 #define js_XDRRegExpObject NULL
5424
5425 #endif /* !JS_HAS_XDR */
5426
5427 static void
5428 regexp_trace(JSTracer *trc, JSObject *obj)
5429 {
5430     JSRegExp *re = (JSRegExp *) obj->getPrivate();
5431     if (re && re->source)
5432         JS_CALL_STRING_TRACER(trc, re->source, "source");
5433 }
5434
5435 JSClass js_RegExpClass = {
5436     js_RegExp_str,
5437     JSCLASS_HAS_PRIVATE |
5438     JSCLASS_HAS_RESERVED_SLOTS(JSObject::REGEXP_FIXED_RESERVED_SLOTS) |
5439     JSCLASS_MARK_IS_TRACE | JSCLASS_HAS_CACHED_PROTO(JSProto_RegExp),
5440     JS_PropertyStub,    JS_PropertyStub,
5441     JS_PropertyStub,    JS_PropertyStub,
5442     JS_EnumerateStub,   JS_ResolveStub,
5443     JS_ConvertStub,     regexp_finalize,
5444     NULL,               NULL,
5445     regexp_call,        NULL,
5446     js_XDRRegExpObject, NULL,
5447     JS_CLASS_TRACE(regexp_trace), 0
5448 };
5449
5450 static const jschar empty_regexp_ucstr[] = {'(', '?', ':', ')', 0};
5451
5452 JSBool
5453 js_regexp_toString(JSContext *cx, JSObject *obj, jsval *vp)
5454 {
5455     JSRegExp *re;
5456     const jschar *source;
5457     jschar *chars;
5458     size_t length, nflags;
5459     uintN flags;
5460     JSString *str;
5461
5462     if (!JS_InstanceOf(cx, obj, &js_RegExpClass, vp + 2))
5463         return JS_FALSE;
5464     JS_LOCK_OBJ(cx, obj);
5465     re = (JSRegExp *) obj->getPrivate();
5466     if (!re) {
5467         JS_UNLOCK_OBJ(cx, obj);
5468         *vp = STRING_TO_JSVAL(cx->runtime->emptyString);
5469         return JS_TRUE;
5470     }
5471
5472     re->source->getCharsAndLength(source, length);
5473     if (length == 0) {
5474         source = empty_regexp_ucstr;
5475         length = JS_ARRAY_LENGTH(empty_regexp_ucstr) - 1;
5476     }
5477     length += 2;
5478     nflags = 0;
5479     for (flags = re->flags; flags != 0; flags &= flags - 1)
5480         nflags++;
5481     chars = (jschar*) cx->malloc((length + nflags + 1) * sizeof(jschar));
5482     if (!chars) {
5483         JS_UNLOCK_OBJ(cx, obj);
5484         return JS_FALSE;
5485     }
5486
5487     chars[0] = '/';
5488     js_strncpy(&chars[1], source, length - 2);
5489     chars[length-1] = '/';
5490     if (nflags) {
5491         if (re->flags & JSREG_GLOB)
5492             chars[length++] = 'g';
5493         if (re->flags & JSREG_FOLD)
5494             chars[length++] = 'i';
5495         if (re->flags & JSREG_MULTILINE)
5496             chars[length++] = 'm';
5497         if (re->flags & JSREG_STICKY)
5498             chars[length++] = 'y';
5499     }
5500     JS_UNLOCK_OBJ(cx, obj);
5501     chars[length] = 0;
5502
5503     str = js_NewString(cx, chars, length);
5504     if (!str) {
5505         cx->free(chars);
5506         return JS_FALSE;
5507     }
5508     *vp = STRING_TO_JSVAL(str);
5509     return JS_TRUE;
5510 }
5511
5512 static JSBool
5513 regexp_toString(JSContext *cx, uintN argc, jsval *vp)
5514 {
5515     JSObject *obj;
5516
5517     obj = JS_THIS_OBJECT(cx, vp);
5518     return obj && js_regexp_toString(cx, obj, vp);
5519 }
5520
5521 static JSBool
5522 regexp_compile_sub(JSContext *cx, JSObject *obj, uintN argc, jsval *argv,
5523                    jsval *rval)
5524 {
5525     JSString *opt, *str;
5526     JSRegExp *oldre, *re;
5527     JSObject *obj2;
5528     size_t length, nbytes;
5529     const jschar *cp, *start, *end;
5530     jschar *nstart, *ncp, *tmp;
5531
5532     if (!JS_InstanceOf(cx, obj, &js_RegExpClass, argv))
5533         return JS_FALSE;
5534     opt = NULL;
5535     if (argc == 0) {
5536         str = cx->runtime->emptyString;
5537     } else {
5538         if (JSVAL_IS_OBJECT(argv[0])) {
5539             /*
5540              * If we get passed in a RegExp object we construct a new
5541              * RegExp that is a duplicate of it by re-compiling the
5542              * original source code. ECMA requires that it be an error
5543              * here if the flags are specified. (We must use the flags
5544              * from the original RegExp also).
5545              */
5546             obj2 = JSVAL_TO_OBJECT(argv[0]);
5547             if (obj2 && obj2->getClass() == &js_RegExpClass) {
5548                 if (argc >= 2 && !JSVAL_IS_VOID(argv[1])) { /* 'flags' passed */
5549                     JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL,
5550                                          JSMSG_NEWREGEXP_FLAGGED);
5551                     return JS_FALSE;
5552                 }
5553                 JS_LOCK_OBJ(cx, obj2);
5554                 re = (JSRegExp *) obj2->getPrivate();
5555                 if (!re) {
5556                     JS_UNLOCK_OBJ(cx, obj2);
5557                     return JS_FALSE;
5558                 }
5559                 re = js_NewRegExp(cx, NULL, re->source, re->flags, JS_FALSE);
5560                 JS_UNLOCK_OBJ(cx, obj2);
5561                 goto created;
5562             }
5563         }
5564         str = js_ValueToString(cx, argv[0]);
5565         if (!str)
5566             return JS_FALSE;
5567         argv[0] = STRING_TO_JSVAL(str);
5568         if (argc > 1) {
5569             if (JSVAL_IS_VOID(argv[1])) {
5570                 opt = NULL;
5571             } else {
5572                 opt = js_ValueToString(cx, argv[1]);
5573                 if (!opt)
5574                     return JS_FALSE;
5575                 argv[1] = STRING_TO_JSVAL(opt);
5576             }
5577         }
5578
5579         /* Escape any naked slashes in the regexp source. */
5580         str->getCharsAndLength(start, length);
5581         end = start + length;
5582         nstart = ncp = NULL;
5583         for (cp = start; cp < end; cp++) {
5584             if (*cp == '/' && (cp == start || cp[-1] != '\\')) {
5585                 nbytes = (++length + 1) * sizeof(jschar);
5586                 if (!nstart) {
5587                     nstart = (jschar *) cx->malloc(nbytes);
5588                     if (!nstart)
5589                         return JS_FALSE;
5590                     ncp = nstart + (cp - start);
5591                     js_strncpy(nstart, start, cp - start);
5592                 } else {
5593                     tmp = (jschar *) cx->realloc(nstart, nbytes);
5594                     if (!tmp) {
5595                         cx->free(nstart);
5596                         return JS_FALSE;
5597                     }
5598                     ncp = tmp + (ncp - nstart);
5599                     nstart = tmp;
5600                 }
5601                 *ncp++ = '\\';
5602             }
5603             if (nstart)
5604                 *ncp++ = *cp;
5605         }
5606
5607         if (nstart) {
5608             /* Don't forget to store the backstop after the new string. */
5609             JS_ASSERT((size_t)(ncp - nstart) == length);
5610             *ncp = 0;
5611             str = js_NewString(cx, nstart, length);
5612             if (!str) {
5613                 cx->free(nstart);
5614                 return JS_FALSE;
5615             }
5616             argv[0] = STRING_TO_JSVAL(str);
5617         }
5618     }
5619
5620     re = js_NewRegExpOpt(cx, str, opt, JS_FALSE);
5621 created:
5622     if (!re)
5623         return JS_FALSE;
5624     JS_LOCK_OBJ(cx, obj);
5625     oldre = (JSRegExp *) obj->getPrivate();
5626     obj->setPrivate(re);
5627     obj->zeroRegExpLastIndex();
5628     JS_UNLOCK_OBJ(cx, obj);
5629     if (oldre)
5630         js_DestroyRegExp(cx, oldre);
5631     *rval = OBJECT_TO_JSVAL(obj);
5632     return JS_TRUE;
5633 }
5634
5635 static JSBool
5636 regexp_compile(JSContext *cx, uintN argc, jsval *vp)
5637 {
5638     JSObject *obj;
5639
5640     obj = JS_THIS_OBJECT(cx, vp);
5641     return obj && regexp_compile_sub(cx, obj, argc, vp + 2, vp);
5642 }
5643
5644 static JSBool
5645 regexp_exec_sub(JSContext *cx, JSObject *obj, uintN argc, jsval *argv,
5646                 JSBool test, jsval *rval)
5647 {
5648     JSBool ok, sticky;
5649     JSRegExp *re;
5650     jsdouble lastIndex;
5651     JSString *str;
5652     size_t i;
5653
5654     ok = JS_InstanceOf(cx, obj, &js_RegExpClass, argv);
5655     if (!ok)
5656         return JS_FALSE;
5657     JS_LOCK_OBJ(cx, obj);
5658     re = (JSRegExp *) obj->getPrivate();
5659     if (!re) {
5660         JS_UNLOCK_OBJ(cx, obj);
5661         return JS_TRUE;
5662     }
5663
5664     /* NB: we must reach out: after this paragraph, in order to drop re. */
5665     HOLD_REGEXP(cx, re);
5666     sticky = (re->flags & JSREG_STICKY) != 0;
5667     if (re->flags & (JSREG_GLOB | JSREG_STICKY)) {
5668         jsval v = obj->getRegExpLastIndex();
5669         if (JSVAL_IS_INT(v)) {
5670             lastIndex = JSVAL_TO_INT(v);
5671         } else {
5672             JS_ASSERT(JSVAL_IS_DOUBLE(v));
5673             lastIndex = *JSVAL_TO_DOUBLE(v);
5674         }
5675     } else {
5676         lastIndex = 0;
5677     }
5678     JS_UNLOCK_OBJ(cx, obj);
5679
5680     /* Now that obj is unlocked, it's safe to (potentially) grab the GC lock. */
5681     if (argc == 0) {
5682         str = cx->regExpStatics.input;
5683         if (!str) {
5684             const char *bytes = js_GetStringBytes(cx, re->source);
5685
5686             if (bytes) {
5687                 JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL,
5688                                      JSMSG_NO_INPUT,
5689                                      bytes,
5690                                      (re->flags & JSREG_GLOB) ? "g" : "",
5691                                      (re->flags & JSREG_FOLD) ? "i" : "",
5692                                      (re->flags & JSREG_MULTILINE) ? "m" : "",
5693                                      (re->flags & JSREG_STICKY) ? "y" : "");
5694             }
5695             ok = JS_FALSE;
5696             goto out;
5697         }
5698     } else {
5699         str = js_ValueToString(cx, argv[0]);
5700         if (!str) {
5701             ok = JS_FALSE;
5702             goto out;
5703         }
5704         argv[0] = STRING_TO_JSVAL(str);
5705     }
5706
5707     if (lastIndex < 0 || str->length() < lastIndex) {
5708         obj->zeroRegExpLastIndex();
5709         *rval = JSVAL_NULL;
5710     } else {
5711         i = (size_t) lastIndex;
5712         ok = js_ExecuteRegExp(cx, re, str, &i, test, rval);
5713         if (ok &&
5714             ((re->flags & JSREG_GLOB) || (*rval != JSVAL_NULL && sticky))) {
5715             if (*rval == JSVAL_NULL)
5716                 obj->zeroRegExpLastIndex();
5717             else
5718                 ok = SetRegExpLastIndex(cx, obj, i);
5719         }
5720     }
5721
5722 out:
5723     DROP_REGEXP(cx, re);
5724     return ok;
5725 }
5726
5727 static JSBool
5728 regexp_exec(JSContext *cx, uintN argc, jsval *vp)
5729 {
5730     return regexp_exec_sub(cx, JS_THIS_OBJECT(cx, vp), argc, vp + 2, JS_FALSE,
5731                            vp);
5732 }
5733
5734 static JSBool
5735 regexp_test(JSContext *cx, uintN argc, jsval *vp)
5736 {
5737     if (!regexp_exec_sub(cx, JS_THIS_OBJECT(cx, vp), argc, vp + 2, JS_TRUE, vp))
5738         return JS_FALSE;
5739     if (*vp != JSVAL_TRUE)
5740         *vp = JSVAL_FALSE;
5741     return JS_TRUE;
5742 }
5743
5744 static JSFunctionSpec regexp_methods[] = {
5745 #if JS_HAS_TOSOURCE
5746     JS_FN(js_toSource_str,  regexp_toString,    0,0),
5747 #endif
5748     JS_FN(js_toString_str,  regexp_toString,    0,0),
5749     JS_FN("compile",        regexp_compile,     2,0),
5750     JS_FN("exec",           regexp_exec,        1,0),
5751     JS_FN("test",           regexp_test,        1,0),
5752     JS_FS_END
5753 };
5754
5755 static JSBool
5756 RegExp(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval)
5757 {
5758     if (!JS_IsConstructing(cx)) {
5759         /*
5760          * If first arg is regexp and no flags are given, just return the arg.
5761          * (regexp_compile_sub detects the regexp + flags case and throws a
5762          * TypeError.)  See 10.15.3.1.
5763          */
5764         if ((argc < 2 || JSVAL_IS_VOID(argv[1])) &&
5765             !JSVAL_IS_PRIMITIVE(argv[0]) &&
5766             JSVAL_TO_OBJECT(argv[0])->getClass() == &js_RegExpClass) {
5767             *rval = argv[0];
5768             return JS_TRUE;
5769         }
5770
5771         /* Otherwise, replace obj with a new RegExp object. */
5772         obj = NewObject(cx, &js_RegExpClass, NULL, NULL);
5773         if (!obj)
5774             return JS_FALSE;
5775
5776         /*
5777          * regexp_compile_sub does not use rval to root its temporaries so we
5778          * can use it to root obj.
5779          */
5780         *rval = OBJECT_TO_JSVAL(obj);
5781     }
5782     return regexp_compile_sub(cx, obj, argc, argv, rval);
5783 }
5784
5785 JSObject *
5786 js_InitRegExpClass(JSContext *cx, JSObject *obj)
5787 {
5788     JSObject *proto = js_InitClass(cx, obj, NULL, &js_RegExpClass, RegExp, 1,
5789                                    regexp_props, regexp_methods,
5790                                    regexp_static_props, NULL);
5791     if (!proto)
5792         return NULL;
5793
5794     JSObject *ctor = JS_GetConstructor(cx, proto);
5795     if (!ctor)
5796         return NULL;
5797
5798     /* Give RegExp.prototype private data so it matches the empty string. */
5799     jsval rval;
5800     if (!JS_AliasProperty(cx, ctor, "input",        "$_") ||
5801         !JS_AliasProperty(cx, ctor, "multiline",    "$*") ||
5802         !JS_AliasProperty(cx, ctor, "lastMatch",    "$&") ||
5803         !JS_AliasProperty(cx, ctor, "lastParen",    "$+") ||
5804         !JS_AliasProperty(cx, ctor, "leftContext",  "$`") ||
5805         !JS_AliasProperty(cx, ctor, "rightContext", "$'") ||
5806         !regexp_compile_sub(cx, proto, 0, NULL, &rval)) {
5807         return NULL;
5808     }
5809
5810     return proto;
5811 }
5812
5813 JSObject *
5814 js_NewRegExpObject(JSContext *cx, TokenStream *ts,
5815                    const jschar *chars, size_t length, uintN flags)
5816 {
5817     JSString *str;
5818     JSObject *obj;
5819     JSRegExp *re;
5820
5821     str = js_NewStringCopyN(cx, chars, length);
5822     if (!str)
5823         return NULL;
5824     AutoValueRooter tvr(cx, str);
5825     re = js_NewRegExp(cx, ts,  str, flags, JS_FALSE);
5826     if (!re)
5827         return NULL;
5828     obj = NewObject(cx, &js_RegExpClass, NULL, NULL);
5829     if (!obj) {
5830         js_DestroyRegExp(cx, re);
5831         return NULL;
5832     }
5833     obj->setPrivate(re);
5834     obj->zeroRegExpLastIndex();
5835     return obj;
5836 }
5837
5838 JSObject * JS_FASTCALL
5839 js_CloneRegExpObject(JSContext *cx, JSObject *obj, JSObject *proto)
5840 {
5841     JS_ASSERT(obj->getClass() == &js_RegExpClass);
5842     JS_ASSERT(proto);
5843     JS_ASSERT(proto->getClass() == &js_RegExpClass);
5844     JSObject *clone = NewObjectWithGivenProto(cx, &js_RegExpClass, proto, NULL);
5845     if (!clone)
5846         return NULL;
5847     JSRegExp *re = static_cast<JSRegExp *>(obj->getPrivate());
5848     clone->setPrivate(re);
5849     clone->zeroRegExpLastIndex();
5850     HOLD_REGEXP(cx, re);
5851     return clone;
5852 }
5853
5854 #ifdef JS_TRACER
5855 JS_DEFINE_CALLINFO_3(extern, OBJECT, js_CloneRegExpObject, CONTEXT, OBJECT, OBJECT, 0,
5856                      ACC_STORE_ANY)
5857 #endif
5858
5859 bool
5860 js_ContainsRegExpMetaChars(const jschar *chars, size_t length)
5861 {
5862     for (size_t i = 0; i < length; ++i) {
5863         jschar c = chars[i];
5864         switch (c) {
5865           /* Taken from the PatternCharacter production in 15.10.1. */
5866           case '^': case '$': case '\\': case '.': case '*': case '+':
5867           case '?': case '(': case ')': case '[': case ']': case '{':
5868           case '}': case '|':
5869             return true;
5870           default:;
5871         }
5872     }
5873     return false;
5874 }
5875
5876 JSBool
5877 js_ObjectIsRegExp(JSObject *obj)
5878 {
5879     return obj->isRegExp();
5880 }