Bug 462027 - Bail off trace when reentering interpreter. r=gal.
[mozilla-central.git] / js / src / jsregexp.cpp
blob723dd146094beab6e15d8552c44072e63b5e50ea
1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2 * vim: set sw=4 ts=8 et tw=78:
4 * ***** BEGIN LICENSE BLOCK *****
5 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
7 * The contents of this file are subject to the Mozilla Public License Version
8 * 1.1 (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
10 * http://www.mozilla.org/MPL/
12 * Software distributed under the License is distributed on an "AS IS" basis,
13 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
14 * for the specific language governing rights and limitations under the
15 * License.
17 * The Original Code is Mozilla Communicator client code, released
18 * March 31, 1998.
20 * The Initial Developer of the Original Code is
21 * Netscape Communications Corporation.
22 * Portions created by the Initial Developer are Copyright (C) 1998
23 * the Initial Developer. All Rights Reserved.
25 * Contributor(s):
27 * Alternatively, the contents of this file may be used under the terms of
28 * either of the GNU General Public License Version 2 or later (the "GPL"),
29 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
30 * in which case the provisions of the GPL or the LGPL are applicable instead
31 * of those above. If you wish to allow use of your version of this file only
32 * under the terms of either the GPL or the LGPL, and not to allow others to
33 * use your version of this file under the terms of the MPL, indicate your
34 * decision by deleting the provisions above and replace them with the notice
35 * and other provisions required by the GPL or the LGPL. If you do not delete
36 * the provisions above, a recipient may use your version of this file under
37 * the terms of any one of the MPL, the GPL or the LGPL.
39 * ***** END LICENSE BLOCK ***** */
42 * JS regular expressions, after Perl.
44 #include "jsstddef.h"
45 #include <stdlib.h>
46 #include <string.h>
47 #include <stdarg.h>
48 #include "jstypes.h"
49 #include "jsarena.h" /* Added by JSIFY */
50 #include "jsutil.h" /* Added by JSIFY */
51 #include "jsapi.h"
52 #include "jsarray.h"
53 #include "jsatom.h"
54 #include "jsbuiltins.h"
55 #include "jscntxt.h"
56 #include "jsversion.h"
57 #include "jsfun.h"
58 #include "jsgc.h"
59 #include "jsinterp.h"
60 #include "jslock.h"
61 #include "jsnum.h"
62 #include "jsobj.h"
63 #include "jsopcode.h"
64 #include "jsregexp.h"
65 #include "jsscan.h"
66 #include "jsscope.h"
67 #include "jsstaticcheck.h"
68 #include "jsstr.h"
70 #ifdef JS_TRACER
71 #include "jstracer.h"
72 using namespace avmplus;
73 using namespace nanojit;
74 #endif
76 typedef enum REOp {
77 #define REOP_DEF(opcode, name) opcode,
78 #include "jsreops.tbl"
79 #undef REOP_DEF
80 REOP_LIMIT /* META: no operator >= to this */
81 } REOp;
83 #define REOP_IS_SIMPLE(op) ((op) <= REOP_NCLASS)
85 #ifdef REGEXP_DEBUG
86 const char *reop_names[] = {
87 #define REOP_DEF(opcode, name) name,
88 #include "jsreops.tbl"
89 #undef REOP_DEF
90 NULL
92 #endif
94 #ifdef __GNUC__
95 static int
96 re_debug(const char *fmt, ...) __attribute__ ((format(printf, 1, 2)));
97 #endif
99 #ifdef REGEXP_DEBUG
100 static int
101 re_debug(const char *fmt, ...)
103 va_list ap;
104 int retval;
106 va_start(ap, fmt);
107 retval = vprintf(fmt, ap);
108 va_end(ap);
109 return retval;
112 static void
113 re_debug_chars(const jschar *chrs, size_t length)
115 int i = 0;
117 printf(" \"");
118 while (*chrs && i++ < length) {
119 putchar((char)*chrs++);
121 printf("\"");
123 #else /* !REGEXP_DEBUG */
124 /* This should be optimized to a no-op by our tier-1 compilers. */
125 static int
126 re_debug(const char *fmt, ...)
128 return 0;
131 static void
132 re_debug_chars(const jschar *chrs, size_t length)
135 #endif /* !REGEXP_DEBUG */
137 struct RENode {
138 REOp op; /* r.e. op bytecode */
139 RENode *next; /* next in concatenation order */
140 void *kid; /* first operand */
141 union {
142 void *kid2; /* second operand */
143 jsint num; /* could be a number */
144 size_t parenIndex; /* or a parenthesis index */
145 struct { /* or a quantifier range */
146 uintN min;
147 uintN max;
148 JSPackedBool greedy;
149 } range;
150 struct { /* or a character class */
151 size_t startIndex;
152 size_t kidlen; /* length of string at kid, in jschars */
153 size_t index; /* index into class list */
154 uint16 bmsize; /* bitmap size, based on max char code */
155 JSPackedBool sense;
156 } ucclass;
157 struct { /* or a literal sequence */
158 jschar chr; /* of one character */
159 size_t length; /* or many (via the kid) */
160 } flat;
161 struct {
162 RENode *kid2; /* second operand from ALT */
163 jschar ch1; /* match char for ALTPREREQ */
164 jschar ch2; /* ditto, or class index for ALTPREREQ2 */
165 } altprereq;
166 } u;
169 #define RE_IS_LETTER(c) (((c >= 'A') && (c <= 'Z')) || \
170 ((c >= 'a') && (c <= 'z')) )
171 #define RE_IS_LINE_TERM(c) ((c == '\n') || (c == '\r') || \
172 (c == LINE_SEPARATOR) || (c == PARA_SEPARATOR))
174 #define CLASS_CACHE_SIZE 4
176 typedef struct CompilerState {
177 JSContext *context;
178 JSTokenStream *tokenStream; /* For reporting errors */
179 const jschar *cpbegin;
180 const jschar *cpend;
181 const jschar *cp;
182 size_t parenCount;
183 size_t classCount; /* number of [] encountered */
184 size_t treeDepth; /* maximum depth of parse tree */
185 size_t progLength; /* estimated bytecode length */
186 RENode *result;
187 size_t classBitmapsMem; /* memory to hold all class bitmaps */
188 struct {
189 const jschar *start; /* small cache of class strings */
190 size_t length; /* since they're often the same */
191 size_t index;
192 } classCache[CLASS_CACHE_SIZE];
193 uint16 flags;
194 } CompilerState;
196 typedef struct EmitStateStackEntry {
197 jsbytecode *altHead; /* start of REOP_ALT* opcode */
198 jsbytecode *nextAltFixup; /* fixup pointer to next-alt offset */
199 jsbytecode *nextTermFixup; /* fixup ptr. to REOP_JUMP offset */
200 jsbytecode *endTermFixup; /* fixup ptr. to REOPT_ALTPREREQ* offset */
201 RENode *continueNode; /* original REOP_ALT* node being stacked */
202 jsbytecode continueOp; /* REOP_JUMP or REOP_ENDALT continuation */
203 JSPackedBool jumpToJumpFlag; /* true if we've patched jump-to-jump to
204 avoid 16-bit unsigned offset overflow */
205 } EmitStateStackEntry;
208 * Immediate operand sizes and getter/setters. Unlike the ones in jsopcode.h,
209 * the getters and setters take the pc of the offset, not of the opcode before
210 * the offset.
212 #define ARG_LEN 2
213 #define GET_ARG(pc) ((uint16)(((pc)[0] << 8) | (pc)[1]))
214 #define SET_ARG(pc, arg) ((pc)[0] = (jsbytecode) ((arg) >> 8), \
215 (pc)[1] = (jsbytecode) (arg))
217 #define OFFSET_LEN ARG_LEN
218 #define OFFSET_MAX (JS_BIT(ARG_LEN * 8) - 1)
219 #define GET_OFFSET(pc) GET_ARG(pc)
222 * Maximum supported tree depth is maximum size of EmitStateStackEntry stack.
223 * For sanity, we limit it to 2^24 bytes.
225 #define TREE_DEPTH_MAX (JS_BIT(24) / sizeof(EmitStateStackEntry))
228 * The maximum memory that can be allocated for class bitmaps.
229 * For sanity, we limit it to 2^24 bytes.
231 #define CLASS_BITMAPS_MEM_LIMIT JS_BIT(24)
234 * Functions to get size and write/read bytecode that represent small indexes
235 * compactly.
236 * Each byte in the code represent 7-bit chunk of the index. 8th bit when set
237 * indicates that the following byte brings more bits to the index. Otherwise
238 * this is the last byte in the index bytecode representing highest index bits.
240 static size_t
241 GetCompactIndexWidth(size_t index)
243 size_t width;
245 for (width = 1; (index >>= 7) != 0; ++width) { }
246 return width;
249 static JS_ALWAYS_INLINE jsbytecode *
250 WriteCompactIndex(jsbytecode *pc, size_t index)
252 size_t next;
254 while ((next = index >> 7) != 0) {
255 *pc++ = (jsbytecode)(index | 0x80);
256 index = next;
258 *pc++ = (jsbytecode)index;
259 return pc;
262 static JS_ALWAYS_INLINE jsbytecode *
263 ReadCompactIndex(jsbytecode *pc, size_t *result)
265 size_t nextByte;
267 nextByte = *pc++;
268 if ((nextByte & 0x80) == 0) {
270 * Short-circuit the most common case when compact index <= 127.
272 *result = nextByte;
273 } else {
274 size_t shift = 7;
275 *result = 0x7F & nextByte;
276 do {
277 nextByte = *pc++;
278 *result |= (nextByte & 0x7F) << shift;
279 shift += 7;
280 } while ((nextByte & 0x80) != 0);
282 return pc;
285 typedef struct RECapture {
286 ptrdiff_t index; /* start of contents, -1 for empty */
287 size_t length; /* length of capture */
288 } RECapture;
290 typedef struct REMatchState {
291 const jschar *cp;
292 RECapture parens[1]; /* first of 're->parenCount' captures,
293 allocated at end of this struct */
294 } REMatchState;
296 struct REBackTrackData;
298 typedef struct REProgState {
299 jsbytecode *continue_pc; /* current continuation data */
300 jsbytecode continue_op;
301 ptrdiff_t index; /* progress in text */
302 size_t parenSoFar; /* highest indexed paren started */
303 union {
304 struct {
305 uintN min; /* current quantifier limits */
306 uintN max;
307 } quantifier;
308 struct {
309 size_t top; /* backtrack stack state */
310 size_t sz;
311 } assertion;
312 } u;
313 } REProgState;
315 typedef struct REBackTrackData {
316 size_t sz; /* size of previous stack entry */
317 jsbytecode *backtrack_pc; /* where to backtrack to */
318 jsbytecode backtrack_op;
319 const jschar *cp; /* index in text of match at backtrack */
320 size_t parenIndex; /* start index of saved paren contents */
321 size_t parenCount; /* # of saved paren contents */
322 size_t saveStateStackTop; /* number of parent states */
323 /* saved parent states follow */
324 /* saved paren contents follow */
325 } REBackTrackData;
327 #define INITIAL_STATESTACK 100
328 #define INITIAL_BACKTRACK 8000
330 typedef struct REGlobalData {
331 JSContext *cx;
332 JSRegExp *regexp; /* the RE in execution */
333 JSBool ok; /* runtime error (out_of_memory only?) */
334 size_t start; /* offset to start at */
335 ptrdiff_t skipped; /* chars skipped anchoring this r.e. */
336 const jschar *cpbegin; /* text base address */
337 const jschar *cpend; /* text limit address */
339 REProgState *stateStack; /* stack of state of current parents */
340 size_t stateStackTop;
341 size_t stateStackLimit;
343 REBackTrackData *backTrackStack;/* stack of matched-so-far positions */
344 REBackTrackData *backTrackSP;
345 size_t backTrackStackSize;
346 size_t cursz; /* size of current stack entry */
347 size_t backTrackCount; /* how many times we've backtracked */
348 size_t backTrackLimit; /* upper limit on backtrack states */
349 } REGlobalData;
352 * 1. If IgnoreCase is false, return ch.
353 * 2. Let u be ch converted to upper case as if by calling
354 * String.prototype.toUpperCase on the one-character string ch.
355 * 3. If u does not consist of a single character, return ch.
356 * 4. Let cu be u's character.
357 * 5. If ch's code point value is greater than or equal to decimal 128 and cu's
358 * code point value is less than decimal 128, then return ch.
359 * 6. Return cu.
361 static JS_ALWAYS_INLINE uintN
362 upcase(uintN ch)
364 uintN cu;
366 JS_ASSERT((uintN) (jschar) ch == ch);
367 if (ch < 128) {
368 if (ch - (uintN) 'a' <= (uintN) ('z' - 'a'))
369 ch -= (uintN) ('a' - 'A');
370 return ch;
373 cu = JS_TOUPPER(ch);
374 return (cu < 128) ? ch : cu;
377 static JS_ALWAYS_INLINE uintN
378 downcase(uintN ch)
380 JS_ASSERT((uintN) (jschar) ch == ch);
381 if (ch < 128) {
382 if (ch - (uintN) 'A' <= (uintN) ('Z' - 'A'))
383 ch += (uintN) ('a' - 'A');
384 return ch;
387 return JS_TOLOWER(ch);
390 /* Construct and initialize an RENode, returning NULL for out-of-memory */
391 static RENode *
392 NewRENode(CompilerState *state, REOp op)
394 JSContext *cx;
395 RENode *ren;
397 cx = state->context;
398 JS_ARENA_ALLOCATE_CAST(ren, RENode *, &cx->tempPool, sizeof *ren);
399 if (!ren) {
400 js_ReportOutOfScriptQuota(cx);
401 return NULL;
403 ren->op = op;
404 ren->next = NULL;
405 ren->kid = NULL;
406 return ren;
410 * Validates and converts hex ascii value.
412 static JSBool
413 isASCIIHexDigit(jschar c, uintN *digit)
415 uintN cv = c;
417 if (cv < '0')
418 return JS_FALSE;
419 if (cv <= '9') {
420 *digit = cv - '0';
421 return JS_TRUE;
423 cv |= 0x20;
424 if (cv >= 'a' && cv <= 'f') {
425 *digit = cv - 'a' + 10;
426 return JS_TRUE;
428 return JS_FALSE;
432 typedef struct {
433 REOp op;
434 const jschar *errPos;
435 size_t parenIndex;
436 } REOpData;
438 static JSBool
439 ReportRegExpErrorHelper(CompilerState *state, uintN flags, uintN errorNumber,
440 const jschar *arg)
442 if (state->tokenStream) {
443 return js_ReportCompileErrorNumber(state->context, state->tokenStream,
444 NULL, JSREPORT_UC | flags,
445 errorNumber, arg);
447 return JS_ReportErrorFlagsAndNumberUC(state->context, flags,
448 js_GetErrorMessage, NULL,
449 errorNumber, arg);
452 static JSBool
453 ReportRegExpError(CompilerState *state, uintN flags, uintN errorNumber)
455 return ReportRegExpErrorHelper(state, flags, errorNumber, NULL);
459 * Process the op against the two top operands, reducing them to a single
460 * operand in the penultimate slot. Update progLength and treeDepth.
462 static JSBool
463 ProcessOp(CompilerState *state, REOpData *opData, RENode **operandStack,
464 intN operandSP)
466 RENode *result;
468 switch (opData->op) {
469 case REOP_ALT:
470 result = NewRENode(state, REOP_ALT);
471 if (!result)
472 return JS_FALSE;
473 result->kid = operandStack[operandSP - 2];
474 result->u.kid2 = operandStack[operandSP - 1];
475 operandStack[operandSP - 2] = result;
477 if (state->treeDepth == TREE_DEPTH_MAX) {
478 ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX);
479 return JS_FALSE;
481 ++state->treeDepth;
484 * Look at both alternates to see if there's a FLAT or a CLASS at
485 * the start of each. If so, use a prerequisite match.
487 if (((RENode *) result->kid)->op == REOP_FLAT &&
488 ((RENode *) result->u.kid2)->op == REOP_FLAT &&
489 (state->flags & JSREG_FOLD) == 0) {
490 result->op = REOP_ALTPREREQ;
491 result->u.altprereq.ch1 = ((RENode *) result->kid)->u.flat.chr;
492 result->u.altprereq.ch2 = ((RENode *) result->u.kid2)->u.flat.chr;
493 /* ALTPREREQ, <end>, uch1, uch2, <next>, ...,
494 JUMP, <end> ... ENDALT */
495 state->progLength += 13;
497 else
498 if (((RENode *) result->kid)->op == REOP_CLASS &&
499 ((RENode *) result->kid)->u.ucclass.index < 256 &&
500 ((RENode *) result->u.kid2)->op == REOP_FLAT &&
501 (state->flags & JSREG_FOLD) == 0) {
502 result->op = REOP_ALTPREREQ2;
503 result->u.altprereq.ch1 = ((RENode *) result->u.kid2)->u.flat.chr;
504 result->u.altprereq.ch2 = ((RENode *) result->kid)->u.ucclass.index;
505 /* ALTPREREQ2, <end>, uch1, uch2, <next>, ...,
506 JUMP, <end> ... ENDALT */
507 state->progLength += 13;
509 else
510 if (((RENode *) result->kid)->op == REOP_FLAT &&
511 ((RENode *) result->u.kid2)->op == REOP_CLASS &&
512 ((RENode *) result->u.kid2)->u.ucclass.index < 256 &&
513 (state->flags & JSREG_FOLD) == 0) {
514 result->op = REOP_ALTPREREQ2;
515 result->u.altprereq.ch1 = ((RENode *) result->kid)->u.flat.chr;
516 result->u.altprereq.ch2 =
517 ((RENode *) result->u.kid2)->u.ucclass.index;
518 /* ALTPREREQ2, <end>, uch1, uch2, <next>, ...,
519 JUMP, <end> ... ENDALT */
520 state->progLength += 13;
522 else {
523 /* ALT, <next>, ..., JUMP, <end> ... ENDALT */
524 state->progLength += 7;
526 break;
528 case REOP_CONCAT:
529 result = operandStack[operandSP - 2];
530 while (result->next)
531 result = result->next;
532 result->next = operandStack[operandSP - 1];
533 break;
535 case REOP_ASSERT:
536 case REOP_ASSERT_NOT:
537 case REOP_LPARENNON:
538 case REOP_LPAREN:
539 /* These should have been processed by a close paren. */
540 ReportRegExpErrorHelper(state, JSREPORT_ERROR, JSMSG_MISSING_PAREN,
541 opData->errPos);
542 return JS_FALSE;
544 default:;
546 return JS_TRUE;
550 * Parser forward declarations.
552 static JSBool ParseTerm(CompilerState *state);
553 static JSBool ParseQuantifier(CompilerState *state);
554 static intN ParseMinMaxQuantifier(CompilerState *state, JSBool ignoreValues);
557 * Top-down regular expression grammar, based closely on Perl4.
559 * regexp: altern A regular expression is one or more
560 * altern '|' regexp alternatives separated by vertical bar.
562 #define INITIAL_STACK_SIZE 128
564 static JSBool
565 ParseRegExp(CompilerState *state)
567 size_t parenIndex;
568 RENode *operand;
569 REOpData *operatorStack;
570 RENode **operandStack;
571 REOp op;
572 intN i;
573 JSBool result = JS_FALSE;
575 intN operatorSP = 0, operatorStackSize = INITIAL_STACK_SIZE;
576 intN operandSP = 0, operandStackSize = INITIAL_STACK_SIZE;
578 /* Watch out for empty regexp */
579 if (state->cp == state->cpend) {
580 state->result = NewRENode(state, REOP_EMPTY);
581 return (state->result != NULL);
584 operatorStack = (REOpData *)
585 JS_malloc(state->context, sizeof(REOpData) * operatorStackSize);
586 if (!operatorStack)
587 return JS_FALSE;
589 operandStack = (RENode **)
590 JS_malloc(state->context, sizeof(RENode *) * operandStackSize);
591 if (!operandStack)
592 goto out;
594 for (;;) {
595 parenIndex = state->parenCount;
596 if (state->cp == state->cpend) {
598 * If we are at the end of the regexp and we're short one or more
599 * operands, the regexp must have the form /x|/ or some such, with
600 * left parentheses making us short more than one operand.
602 if (operatorSP >= operandSP) {
603 operand = NewRENode(state, REOP_EMPTY);
604 if (!operand)
605 goto out;
606 goto pushOperand;
608 } else {
609 switch (*state->cp) {
610 case '(':
611 ++state->cp;
612 if (state->cp + 1 < state->cpend &&
613 *state->cp == '?' &&
614 (state->cp[1] == '=' ||
615 state->cp[1] == '!' ||
616 state->cp[1] == ':')) {
617 switch (state->cp[1]) {
618 case '=':
619 op = REOP_ASSERT;
620 /* ASSERT, <next>, ... ASSERTTEST */
621 state->progLength += 4;
622 break;
623 case '!':
624 op = REOP_ASSERT_NOT;
625 /* ASSERTNOT, <next>, ... ASSERTNOTTEST */
626 state->progLength += 4;
627 break;
628 default:
629 op = REOP_LPARENNON;
630 break;
632 state->cp += 2;
633 } else {
634 op = REOP_LPAREN;
635 /* LPAREN, <index>, ... RPAREN, <index> */
636 state->progLength
637 += 2 * (1 + GetCompactIndexWidth(parenIndex));
638 state->parenCount++;
639 if (state->parenCount == 65535) {
640 ReportRegExpError(state, JSREPORT_ERROR,
641 JSMSG_TOO_MANY_PARENS);
642 goto out;
645 goto pushOperator;
647 case ')':
649 * If there's no stacked open parenthesis, throw syntax error.
651 for (i = operatorSP - 1; ; i--) {
652 if (i < 0) {
653 ReportRegExpError(state, JSREPORT_ERROR,
654 JSMSG_UNMATCHED_RIGHT_PAREN);
655 goto out;
657 if (operatorStack[i].op == REOP_ASSERT ||
658 operatorStack[i].op == REOP_ASSERT_NOT ||
659 operatorStack[i].op == REOP_LPARENNON ||
660 operatorStack[i].op == REOP_LPAREN) {
661 break;
664 /* FALL THROUGH */
666 case '|':
667 /* Expected an operand before these, so make an empty one */
668 operand = NewRENode(state, REOP_EMPTY);
669 if (!operand)
670 goto out;
671 goto pushOperand;
673 default:
674 if (!ParseTerm(state))
675 goto out;
676 operand = state->result;
677 pushOperand:
678 if (operandSP == operandStackSize) {
679 RENode **tmp;
680 operandStackSize += operandStackSize;
681 tmp = (RENode **)
682 JS_realloc(state->context, operandStack,
683 sizeof(RENode *) * operandStackSize);
684 if (!tmp)
685 goto out;
686 operandStack = tmp;
688 operandStack[operandSP++] = operand;
689 break;
693 /* At the end; process remaining operators. */
694 restartOperator:
695 if (state->cp == state->cpend) {
696 while (operatorSP) {
697 --operatorSP;
698 if (!ProcessOp(state, &operatorStack[operatorSP],
699 operandStack, operandSP))
700 goto out;
701 --operandSP;
703 JS_ASSERT(operandSP == 1);
704 state->result = operandStack[0];
705 result = JS_TRUE;
706 goto out;
709 switch (*state->cp) {
710 case '|':
711 /* Process any stacked 'concat' operators */
712 ++state->cp;
713 while (operatorSP &&
714 operatorStack[operatorSP - 1].op == REOP_CONCAT) {
715 --operatorSP;
716 if (!ProcessOp(state, &operatorStack[operatorSP],
717 operandStack, operandSP)) {
718 goto out;
720 --operandSP;
722 op = REOP_ALT;
723 goto pushOperator;
725 case ')':
727 * If there's no stacked open parenthesis, throw syntax error.
729 for (i = operatorSP - 1; ; i--) {
730 if (i < 0) {
731 ReportRegExpError(state, JSREPORT_ERROR,
732 JSMSG_UNMATCHED_RIGHT_PAREN);
733 goto out;
735 if (operatorStack[i].op == REOP_ASSERT ||
736 operatorStack[i].op == REOP_ASSERT_NOT ||
737 operatorStack[i].op == REOP_LPARENNON ||
738 operatorStack[i].op == REOP_LPAREN) {
739 break;
742 ++state->cp;
744 /* Process everything on the stack until the open parenthesis. */
745 for (;;) {
746 JS_ASSERT(operatorSP);
747 --operatorSP;
748 switch (operatorStack[operatorSP].op) {
749 case REOP_ASSERT:
750 case REOP_ASSERT_NOT:
751 case REOP_LPAREN:
752 operand = NewRENode(state, operatorStack[operatorSP].op);
753 if (!operand)
754 goto out;
755 operand->u.parenIndex =
756 operatorStack[operatorSP].parenIndex;
757 JS_ASSERT(operandSP);
758 operand->kid = operandStack[operandSP - 1];
759 operandStack[operandSP - 1] = operand;
760 if (state->treeDepth == TREE_DEPTH_MAX) {
761 ReportRegExpError(state, JSREPORT_ERROR,
762 JSMSG_REGEXP_TOO_COMPLEX);
763 goto out;
765 ++state->treeDepth;
766 /* FALL THROUGH */
768 case REOP_LPARENNON:
769 state->result = operandStack[operandSP - 1];
770 if (!ParseQuantifier(state))
771 goto out;
772 operandStack[operandSP - 1] = state->result;
773 goto restartOperator;
774 default:
775 if (!ProcessOp(state, &operatorStack[operatorSP],
776 operandStack, operandSP))
777 goto out;
778 --operandSP;
779 break;
782 break;
784 case '{':
786 const jschar *errp = state->cp;
788 if (ParseMinMaxQuantifier(state, JS_TRUE) < 0) {
790 * This didn't even scan correctly as a quantifier, so we should
791 * treat it as flat.
793 op = REOP_CONCAT;
794 goto pushOperator;
797 state->cp = errp;
798 /* FALL THROUGH */
801 case '+':
802 case '*':
803 case '?':
804 ReportRegExpErrorHelper(state, JSREPORT_ERROR, JSMSG_BAD_QUANTIFIER,
805 state->cp);
806 result = JS_FALSE;
807 goto out;
809 default:
810 /* Anything else is the start of the next term. */
811 op = REOP_CONCAT;
812 pushOperator:
813 if (operatorSP == operatorStackSize) {
814 REOpData *tmp;
815 operatorStackSize += operatorStackSize;
816 tmp = (REOpData *)
817 JS_realloc(state->context, operatorStack,
818 sizeof(REOpData) * operatorStackSize);
819 if (!tmp)
820 goto out;
821 operatorStack = tmp;
823 operatorStack[operatorSP].op = op;
824 operatorStack[operatorSP].errPos = state->cp;
825 operatorStack[operatorSP++].parenIndex = parenIndex;
826 break;
829 out:
830 if (operatorStack)
831 JS_free(state->context, operatorStack);
832 if (operandStack)
833 JS_free(state->context, operandStack);
834 return result;
838 * Hack two bits in CompilerState.flags, for use within FindParenCount to flag
839 * its being on the stack, and to propagate errors to its callers.
841 #define JSREG_FIND_PAREN_COUNT 0x8000
842 #define JSREG_FIND_PAREN_ERROR 0x4000
845 * Magic return value from FindParenCount and GetDecimalValue, to indicate
846 * overflow beyond GetDecimalValue's max parameter, or a computed maximum if
847 * its findMax parameter is non-null.
849 #define OVERFLOW_VALUE ((uintN)-1)
851 static uintN
852 FindParenCount(CompilerState *state)
854 CompilerState temp;
855 int i;
857 if (state->flags & JSREG_FIND_PAREN_COUNT)
858 return OVERFLOW_VALUE;
861 * Copy state into temp, flag it so we never report an invalid backref,
862 * and reset its members to parse the entire regexp. This is obviously
863 * suboptimal, but GetDecimalValue calls us only if a backref appears to
864 * refer to a forward parenthetical, which is rare.
866 temp = *state;
867 temp.flags |= JSREG_FIND_PAREN_COUNT;
868 temp.cp = temp.cpbegin;
869 temp.parenCount = 0;
870 temp.classCount = 0;
871 temp.progLength = 0;
872 temp.treeDepth = 0;
873 temp.classBitmapsMem = 0;
874 for (i = 0; i < CLASS_CACHE_SIZE; i++)
875 temp.classCache[i].start = NULL;
877 if (!ParseRegExp(&temp)) {
878 state->flags |= JSREG_FIND_PAREN_ERROR;
879 return OVERFLOW_VALUE;
881 return temp.parenCount;
885 * Extract and return a decimal value at state->cp. The initial character c
886 * has already been read. Return OVERFLOW_VALUE if the result exceeds max.
887 * Callers who pass a non-null findMax should test JSREG_FIND_PAREN_ERROR in
888 * state->flags to discover whether an error occurred under findMax.
890 static uintN
891 GetDecimalValue(jschar c, uintN max, uintN (*findMax)(CompilerState *state),
892 CompilerState *state)
894 uintN value = JS7_UNDEC(c);
895 JSBool overflow = (value > max && (!findMax || value > findMax(state)));
897 /* The following restriction allows simpler overflow checks. */
898 JS_ASSERT(max <= ((uintN)-1 - 9) / 10);
899 while (state->cp < state->cpend) {
900 c = *state->cp;
901 if (!JS7_ISDEC(c))
902 break;
903 value = 10 * value + JS7_UNDEC(c);
904 if (!overflow && value > max && (!findMax || value > findMax(state)))
905 overflow = JS_TRUE;
906 ++state->cp;
908 return overflow ? OVERFLOW_VALUE : value;
912 * Calculate the total size of the bitmap required for a class expression.
914 static JSBool
915 CalculateBitmapSize(CompilerState *state, RENode *target, const jschar *src,
916 const jschar *end)
918 uintN max = 0;
919 JSBool inRange = JS_FALSE;
920 jschar c, rangeStart = 0;
921 uintN n, digit, nDigits, i;
923 target->u.ucclass.bmsize = 0;
924 target->u.ucclass.sense = JS_TRUE;
926 if (src == end)
927 return JS_TRUE;
929 if (*src == '^') {
930 ++src;
931 target->u.ucclass.sense = JS_FALSE;
934 while (src != end) {
935 JSBool canStartRange = JS_TRUE;
936 uintN localMax = 0;
938 switch (*src) {
939 case '\\':
940 ++src;
941 c = *src++;
942 switch (c) {
943 case 'b':
944 localMax = 0x8;
945 break;
946 case 'f':
947 localMax = 0xC;
948 break;
949 case 'n':
950 localMax = 0xA;
951 break;
952 case 'r':
953 localMax = 0xD;
954 break;
955 case 't':
956 localMax = 0x9;
957 break;
958 case 'v':
959 localMax = 0xB;
960 break;
961 case 'c':
962 if (src < end && RE_IS_LETTER(*src)) {
963 localMax = (uintN) (*src++) & 0x1F;
964 } else {
965 --src;
966 localMax = '\\';
968 break;
969 case 'x':
970 nDigits = 2;
971 goto lexHex;
972 case 'u':
973 nDigits = 4;
974 lexHex:
975 n = 0;
976 for (i = 0; (i < nDigits) && (src < end); i++) {
977 c = *src++;
978 if (!isASCIIHexDigit(c, &digit)) {
980 * Back off to accepting the original
981 *'\' as a literal.
983 src -= i + 1;
984 n = '\\';
985 break;
987 n = (n << 4) | digit;
989 localMax = n;
990 break;
991 case 'd':
992 canStartRange = JS_FALSE;
993 if (inRange) {
994 JS_ReportErrorNumber(state->context,
995 js_GetErrorMessage, NULL,
996 JSMSG_BAD_CLASS_RANGE);
997 return JS_FALSE;
999 localMax = '9';
1000 break;
1001 case 'D':
1002 case 's':
1003 case 'S':
1004 case 'w':
1005 case 'W':
1006 canStartRange = JS_FALSE;
1007 if (inRange) {
1008 JS_ReportErrorNumber(state->context,
1009 js_GetErrorMessage, NULL,
1010 JSMSG_BAD_CLASS_RANGE);
1011 return JS_FALSE;
1013 max = 65535;
1016 * If this is the start of a range, ensure that it's less than
1017 * the end.
1019 localMax = 0;
1020 break;
1021 case '0':
1022 case '1':
1023 case '2':
1024 case '3':
1025 case '4':
1026 case '5':
1027 case '6':
1028 case '7':
1030 * This is a non-ECMA extension - decimal escapes (in this
1031 * case, octal!) are supposed to be an error inside class
1032 * ranges, but supported here for backwards compatibility.
1035 n = JS7_UNDEC(c);
1036 c = *src;
1037 if ('0' <= c && c <= '7') {
1038 src++;
1039 n = 8 * n + JS7_UNDEC(c);
1040 c = *src;
1041 if ('0' <= c && c <= '7') {
1042 src++;
1043 i = 8 * n + JS7_UNDEC(c);
1044 if (i <= 0377)
1045 n = i;
1046 else
1047 src--;
1050 localMax = n;
1051 break;
1053 default:
1054 localMax = c;
1055 break;
1057 break;
1058 default:
1059 localMax = *src++;
1060 break;
1063 if (inRange) {
1064 /* Throw a SyntaxError here, per ECMA-262, 15.10.2.15. */
1065 if (rangeStart > localMax) {
1066 JS_ReportErrorNumber(state->context,
1067 js_GetErrorMessage, NULL,
1068 JSMSG_BAD_CLASS_RANGE);
1069 return JS_FALSE;
1071 inRange = JS_FALSE;
1072 } else {
1073 if (canStartRange && src < end - 1) {
1074 if (*src == '-') {
1075 ++src;
1076 inRange = JS_TRUE;
1077 rangeStart = (jschar)localMax;
1078 continue;
1081 if (state->flags & JSREG_FOLD)
1082 rangeStart = localMax; /* one run of the uc/dc loop below */
1085 if (state->flags & JSREG_FOLD) {
1086 jschar maxch = localMax;
1088 for (i = rangeStart; i <= localMax; i++) {
1089 jschar uch, dch;
1091 uch = upcase(i);
1092 dch = downcase(i);
1093 maxch = JS_MAX(maxch, uch);
1094 maxch = JS_MAX(maxch, dch);
1096 localMax = maxch;
1099 if (localMax > max)
1100 max = localMax;
1102 target->u.ucclass.bmsize = max;
1103 return JS_TRUE;
1107 * item: assertion An item is either an assertion or
1108 * quantatom a quantified atom.
1110 * assertion: '^' Assertions match beginning of string
1111 * (or line if the class static property
1112 * RegExp.multiline is true).
1113 * '$' End of string (or line if the class
1114 * static property RegExp.multiline is
1115 * true).
1116 * '\b' Word boundary (between \w and \W).
1117 * '\B' Word non-boundary.
1119 * quantatom: atom An unquantified atom.
1120 * quantatom '{' n ',' m '}'
1121 * Atom must occur between n and m times.
1122 * quantatom '{' n ',' '}' Atom must occur at least n times.
1123 * quantatom '{' n '}' Atom must occur exactly n times.
1124 * quantatom '*' Zero or more times (same as {0,}).
1125 * quantatom '+' One or more times (same as {1,}).
1126 * quantatom '?' Zero or one time (same as {0,1}).
1128 * any of which can be optionally followed by '?' for ungreedy
1130 * atom: '(' regexp ')' A parenthesized regexp (what matched
1131 * can be addressed using a backreference,
1132 * see '\' n below).
1133 * '.' Matches any char except '\n'.
1134 * '[' classlist ']' A character class.
1135 * '[' '^' classlist ']' A negated character class.
1136 * '\f' Form Feed.
1137 * '\n' Newline (Line Feed).
1138 * '\r' Carriage Return.
1139 * '\t' Horizontal Tab.
1140 * '\v' Vertical Tab.
1141 * '\d' A digit (same as [0-9]).
1142 * '\D' A non-digit.
1143 * '\w' A word character, [0-9a-z_A-Z].
1144 * '\W' A non-word character.
1145 * '\s' A whitespace character, [ \b\f\n\r\t\v].
1146 * '\S' A non-whitespace character.
1147 * '\' n A backreference to the nth (n decimal
1148 * and positive) parenthesized expression.
1149 * '\' octal An octal escape sequence (octal must be
1150 * two or three digits long, unless it is
1151 * 0 for the null character).
1152 * '\x' hex A hex escape (hex must be two digits).
1153 * '\u' unicode A unicode escape (must be four digits).
1154 * '\c' ctrl A control character, ctrl is a letter.
1155 * '\' literalatomchar Any character except one of the above
1156 * that follow '\' in an atom.
1157 * otheratomchar Any character not first among the other
1158 * atom right-hand sides.
1160 static JSBool
1161 ParseTerm(CompilerState *state)
1163 jschar c = *state->cp++;
1164 uintN nDigits;
1165 uintN num, tmp, n, i;
1166 const jschar *termStart;
1168 switch (c) {
1169 /* assertions and atoms */
1170 case '^':
1171 state->result = NewRENode(state, REOP_BOL);
1172 if (!state->result)
1173 return JS_FALSE;
1174 state->progLength++;
1175 return JS_TRUE;
1176 case '$':
1177 state->result = NewRENode(state, REOP_EOL);
1178 if (!state->result)
1179 return JS_FALSE;
1180 state->progLength++;
1181 return JS_TRUE;
1182 case '\\':
1183 if (state->cp >= state->cpend) {
1184 /* a trailing '\' is an error */
1185 ReportRegExpError(state, JSREPORT_ERROR, JSMSG_TRAILING_SLASH);
1186 return JS_FALSE;
1188 c = *state->cp++;
1189 switch (c) {
1190 /* assertion escapes */
1191 case 'b' :
1192 state->result = NewRENode(state, REOP_WBDRY);
1193 if (!state->result)
1194 return JS_FALSE;
1195 state->progLength++;
1196 return JS_TRUE;
1197 case 'B':
1198 state->result = NewRENode(state, REOP_WNONBDRY);
1199 if (!state->result)
1200 return JS_FALSE;
1201 state->progLength++;
1202 return JS_TRUE;
1203 /* Decimal escape */
1204 case '0':
1205 /* Give a strict warning. See also the note below. */
1206 if (!ReportRegExpError(state, JSREPORT_WARNING | JSREPORT_STRICT,
1207 JSMSG_INVALID_BACKREF)) {
1208 return JS_FALSE;
1210 doOctal:
1211 num = 0;
1212 while (state->cp < state->cpend) {
1213 c = *state->cp;
1214 if (c < '0' || '7' < c)
1215 break;
1216 state->cp++;
1217 tmp = 8 * num + (uintN)JS7_UNDEC(c);
1218 if (tmp > 0377)
1219 break;
1220 num = tmp;
1222 c = (jschar)num;
1223 doFlat:
1224 state->result = NewRENode(state, REOP_FLAT);
1225 if (!state->result)
1226 return JS_FALSE;
1227 state->result->u.flat.chr = c;
1228 state->result->u.flat.length = 1;
1229 state->progLength += 3;
1230 break;
1231 case '1':
1232 case '2':
1233 case '3':
1234 case '4':
1235 case '5':
1236 case '6':
1237 case '7':
1238 case '8':
1239 case '9':
1240 termStart = state->cp - 1;
1241 num = GetDecimalValue(c, state->parenCount, FindParenCount, state);
1242 if (state->flags & JSREG_FIND_PAREN_ERROR)
1243 return JS_FALSE;
1244 if (num == OVERFLOW_VALUE) {
1245 /* Give a strict mode warning. */
1246 if (!ReportRegExpError(state,
1247 JSREPORT_WARNING | JSREPORT_STRICT,
1248 (c >= '8')
1249 ? JSMSG_INVALID_BACKREF
1250 : JSMSG_BAD_BACKREF)) {
1251 return JS_FALSE;
1255 * Note: ECMA 262, 15.10.2.9 says that we should throw a syntax
1256 * error here. However, for compatibility with IE, we treat the
1257 * whole backref as flat if the first character in it is not a
1258 * valid octal character, and as an octal escape otherwise.
1260 state->cp = termStart;
1261 if (c >= '8') {
1262 /* Treat this as flat. termStart - 1 is the \. */
1263 c = '\\';
1264 goto asFlat;
1267 /* Treat this as an octal escape. */
1268 goto doOctal;
1270 JS_ASSERT(1 <= num && num <= 0x10000);
1271 state->result = NewRENode(state, REOP_BACKREF);
1272 if (!state->result)
1273 return JS_FALSE;
1274 state->result->u.parenIndex = num - 1;
1275 state->progLength
1276 += 1 + GetCompactIndexWidth(state->result->u.parenIndex);
1277 break;
1278 /* Control escape */
1279 case 'f':
1280 c = 0xC;
1281 goto doFlat;
1282 case 'n':
1283 c = 0xA;
1284 goto doFlat;
1285 case 'r':
1286 c = 0xD;
1287 goto doFlat;
1288 case 't':
1289 c = 0x9;
1290 goto doFlat;
1291 case 'v':
1292 c = 0xB;
1293 goto doFlat;
1294 /* Control letter */
1295 case 'c':
1296 if (state->cp < state->cpend && RE_IS_LETTER(*state->cp)) {
1297 c = (jschar) (*state->cp++ & 0x1F);
1298 } else {
1299 /* back off to accepting the original '\' as a literal */
1300 --state->cp;
1301 c = '\\';
1303 goto doFlat;
1304 /* HexEscapeSequence */
1305 case 'x':
1306 nDigits = 2;
1307 goto lexHex;
1308 /* UnicodeEscapeSequence */
1309 case 'u':
1310 nDigits = 4;
1311 lexHex:
1312 n = 0;
1313 for (i = 0; i < nDigits && state->cp < state->cpend; i++) {
1314 uintN digit;
1315 c = *state->cp++;
1316 if (!isASCIIHexDigit(c, &digit)) {
1318 * Back off to accepting the original 'u' or 'x' as a
1319 * literal.
1321 state->cp -= i + 2;
1322 n = *state->cp++;
1323 break;
1325 n = (n << 4) | digit;
1327 c = (jschar) n;
1328 goto doFlat;
1329 /* Character class escapes */
1330 case 'd':
1331 state->result = NewRENode(state, REOP_DIGIT);
1332 doSimple:
1333 if (!state->result)
1334 return JS_FALSE;
1335 state->progLength++;
1336 break;
1337 case 'D':
1338 state->result = NewRENode(state, REOP_NONDIGIT);
1339 goto doSimple;
1340 case 's':
1341 state->result = NewRENode(state, REOP_SPACE);
1342 goto doSimple;
1343 case 'S':
1344 state->result = NewRENode(state, REOP_NONSPACE);
1345 goto doSimple;
1346 case 'w':
1347 state->result = NewRENode(state, REOP_ALNUM);
1348 goto doSimple;
1349 case 'W':
1350 state->result = NewRENode(state, REOP_NONALNUM);
1351 goto doSimple;
1352 /* IdentityEscape */
1353 default:
1354 state->result = NewRENode(state, REOP_FLAT);
1355 if (!state->result)
1356 return JS_FALSE;
1357 state->result->u.flat.chr = c;
1358 state->result->u.flat.length = 1;
1359 state->result->kid = (void *) (state->cp - 1);
1360 state->progLength += 3;
1361 break;
1363 break;
1364 case '[':
1365 state->result = NewRENode(state, REOP_CLASS);
1366 if (!state->result)
1367 return JS_FALSE;
1368 termStart = state->cp;
1369 state->result->u.ucclass.startIndex = termStart - state->cpbegin;
1370 for (;;) {
1371 if (state->cp == state->cpend) {
1372 ReportRegExpErrorHelper(state, JSREPORT_ERROR,
1373 JSMSG_UNTERM_CLASS, termStart);
1375 return JS_FALSE;
1377 if (*state->cp == '\\') {
1378 state->cp++;
1379 if (state->cp != state->cpend)
1380 state->cp++;
1381 continue;
1383 if (*state->cp == ']') {
1384 state->result->u.ucclass.kidlen = state->cp - termStart;
1385 break;
1387 state->cp++;
1389 for (i = 0; i < CLASS_CACHE_SIZE; i++) {
1390 if (!state->classCache[i].start) {
1391 state->classCache[i].start = termStart;
1392 state->classCache[i].length = state->result->u.ucclass.kidlen;
1393 state->classCache[i].index = state->classCount;
1394 break;
1396 if (state->classCache[i].length ==
1397 state->result->u.ucclass.kidlen) {
1398 for (n = 0; ; n++) {
1399 if (n == state->classCache[i].length) {
1400 state->result->u.ucclass.index
1401 = state->classCache[i].index;
1402 goto claim;
1404 if (state->classCache[i].start[n] != termStart[n])
1405 break;
1409 state->result->u.ucclass.index = state->classCount++;
1411 claim:
1413 * Call CalculateBitmapSize now as we want any errors it finds
1414 * to be reported during the parse phase, not at execution.
1416 if (!CalculateBitmapSize(state, state->result, termStart, state->cp++))
1417 return JS_FALSE;
1419 * Update classBitmapsMem with number of bytes to hold bmsize bits,
1420 * which is (bitsCount + 7) / 8 or (highest_bit + 1 + 7) / 8
1421 * or highest_bit / 8 + 1 where highest_bit is u.ucclass.bmsize.
1423 n = (state->result->u.ucclass.bmsize >> 3) + 1;
1424 if (n > CLASS_BITMAPS_MEM_LIMIT - state->classBitmapsMem) {
1425 ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX);
1426 return JS_FALSE;
1428 state->classBitmapsMem += n;
1429 /* CLASS, <index> */
1430 state->progLength
1431 += 1 + GetCompactIndexWidth(state->result->u.ucclass.index);
1432 break;
1434 case '.':
1435 state->result = NewRENode(state, REOP_DOT);
1436 goto doSimple;
1438 case '{':
1440 const jschar *errp = state->cp--;
1441 intN err;
1443 err = ParseMinMaxQuantifier(state, JS_TRUE);
1444 state->cp = errp;
1446 if (err < 0)
1447 goto asFlat;
1449 /* FALL THROUGH */
1451 case '*':
1452 case '+':
1453 case '?':
1454 ReportRegExpErrorHelper(state, JSREPORT_ERROR,
1455 JSMSG_BAD_QUANTIFIER, state->cp - 1);
1456 return JS_FALSE;
1457 default:
1458 asFlat:
1459 state->result = NewRENode(state, REOP_FLAT);
1460 if (!state->result)
1461 return JS_FALSE;
1462 state->result->u.flat.chr = c;
1463 state->result->u.flat.length = 1;
1464 state->result->kid = (void *) (state->cp - 1);
1465 state->progLength += 3;
1466 break;
1468 return ParseQuantifier(state);
1471 static JSBool
1472 ParseQuantifier(CompilerState *state)
1474 RENode *term;
1475 term = state->result;
1476 if (state->cp < state->cpend) {
1477 switch (*state->cp) {
1478 case '+':
1479 state->result = NewRENode(state, REOP_QUANT);
1480 if (!state->result)
1481 return JS_FALSE;
1482 state->result->u.range.min = 1;
1483 state->result->u.range.max = (uintN)-1;
1484 /* <PLUS>, <next> ... <ENDCHILD> */
1485 state->progLength += 4;
1486 goto quantifier;
1487 case '*':
1488 state->result = NewRENode(state, REOP_QUANT);
1489 if (!state->result)
1490 return JS_FALSE;
1491 state->result->u.range.min = 0;
1492 state->result->u.range.max = (uintN)-1;
1493 /* <STAR>, <next> ... <ENDCHILD> */
1494 state->progLength += 4;
1495 goto quantifier;
1496 case '?':
1497 state->result = NewRENode(state, REOP_QUANT);
1498 if (!state->result)
1499 return JS_FALSE;
1500 state->result->u.range.min = 0;
1501 state->result->u.range.max = 1;
1502 /* <OPT>, <next> ... <ENDCHILD> */
1503 state->progLength += 4;
1504 goto quantifier;
1505 case '{': /* balance '}' */
1507 intN err;
1508 const jschar *errp = state->cp;
1510 err = ParseMinMaxQuantifier(state, JS_FALSE);
1511 if (err == 0)
1512 goto quantifier;
1513 if (err == -1)
1514 return JS_TRUE;
1516 ReportRegExpErrorHelper(state, JSREPORT_ERROR, err, errp);
1517 return JS_FALSE;
1519 default:;
1522 return JS_TRUE;
1524 quantifier:
1525 if (state->treeDepth == TREE_DEPTH_MAX) {
1526 ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX);
1527 return JS_FALSE;
1530 ++state->treeDepth;
1531 ++state->cp;
1532 state->result->kid = term;
1533 if (state->cp < state->cpend && *state->cp == '?') {
1534 ++state->cp;
1535 state->result->u.range.greedy = JS_FALSE;
1536 } else {
1537 state->result->u.range.greedy = JS_TRUE;
1539 return JS_TRUE;
1542 static intN
1543 ParseMinMaxQuantifier(CompilerState *state, JSBool ignoreValues)
1545 uintN min, max;
1546 jschar c;
1547 const jschar *errp = state->cp++;
1549 c = *state->cp;
1550 if (JS7_ISDEC(c)) {
1551 ++state->cp;
1552 min = GetDecimalValue(c, 0xFFFF, NULL, state);
1553 c = *state->cp;
1555 if (!ignoreValues && min == OVERFLOW_VALUE)
1556 return JSMSG_MIN_TOO_BIG;
1558 if (c == ',') {
1559 c = *++state->cp;
1560 if (JS7_ISDEC(c)) {
1561 ++state->cp;
1562 max = GetDecimalValue(c, 0xFFFF, NULL, state);
1563 c = *state->cp;
1564 if (!ignoreValues && max == OVERFLOW_VALUE)
1565 return JSMSG_MAX_TOO_BIG;
1566 if (!ignoreValues && min > max)
1567 return JSMSG_OUT_OF_ORDER;
1568 } else {
1569 max = (uintN)-1;
1571 } else {
1572 max = min;
1574 if (c == '}') {
1575 state->result = NewRENode(state, REOP_QUANT);
1576 if (!state->result)
1577 return JSMSG_OUT_OF_MEMORY;
1578 state->result->u.range.min = min;
1579 state->result->u.range.max = max;
1581 * QUANT, <min>, <max>, <next> ... <ENDCHILD>
1582 * where <max> is written as compact(max+1) to make
1583 * (uintN)-1 sentinel to occupy 1 byte, not width_of(max)+1.
1585 state->progLength += (1 + GetCompactIndexWidth(min)
1586 + GetCompactIndexWidth(max + 1)
1587 +3);
1588 return 0;
1592 state->cp = errp;
1593 return -1;
1596 static JSBool
1597 SetForwardJumpOffset(jsbytecode *jump, jsbytecode *target)
1599 ptrdiff_t offset = target - jump;
1601 /* Check that target really points forward. */
1602 JS_ASSERT(offset >= 2);
1603 if ((size_t)offset > OFFSET_MAX)
1604 return JS_FALSE;
1606 jump[0] = JUMP_OFFSET_HI(offset);
1607 jump[1] = JUMP_OFFSET_LO(offset);
1608 return JS_TRUE;
1611 /* Copy the charset data from a character class node to the charset list
1612 * in the regexp object. */
1613 static JS_ALWAYS_INLINE RECharSet *
1614 InitNodeCharSet(JSRegExp *re, RENode *node)
1616 RECharSet *charSet = &re->classList[node->u.ucclass.index];
1617 charSet->converted = JS_FALSE;
1618 charSet->length = node->u.ucclass.bmsize;
1619 charSet->u.src.startIndex = node->u.ucclass.startIndex;
1620 charSet->u.src.length = node->u.ucclass.kidlen;
1621 charSet->sense = node->u.ucclass.sense;
1622 return charSet;
1626 * Generate bytecode for the tree rooted at t using an explicit stack instead
1627 * of recursion.
1629 static jsbytecode *
1630 EmitREBytecode(CompilerState *state, JSRegExp *re, size_t treeDepth,
1631 jsbytecode *pc, RENode *t)
1633 EmitStateStackEntry *emitStateSP, *emitStateStack;
1634 REOp op;
1636 if (treeDepth == 0) {
1637 emitStateStack = NULL;
1638 } else {
1639 emitStateStack =
1640 (EmitStateStackEntry *)JS_malloc(state->context,
1641 sizeof(EmitStateStackEntry) *
1642 treeDepth);
1643 if (!emitStateStack)
1644 return NULL;
1646 emitStateSP = emitStateStack;
1647 op = t->op;
1648 JS_ASSERT(op < REOP_LIMIT);
1650 for (;;) {
1651 *pc++ = op;
1652 switch (op) {
1653 case REOP_EMPTY:
1654 --pc;
1655 break;
1657 case REOP_ALTPREREQ2:
1658 case REOP_ALTPREREQ:
1659 JS_ASSERT(emitStateSP);
1660 emitStateSP->altHead = pc - 1;
1661 emitStateSP->endTermFixup = pc;
1662 pc += OFFSET_LEN;
1663 SET_ARG(pc, t->u.altprereq.ch1);
1664 pc += ARG_LEN;
1665 SET_ARG(pc, t->u.altprereq.ch2);
1666 pc += ARG_LEN;
1668 emitStateSP->nextAltFixup = pc; /* offset to next alternate */
1669 pc += OFFSET_LEN;
1671 emitStateSP->continueNode = t;
1672 emitStateSP->continueOp = REOP_JUMP;
1673 emitStateSP->jumpToJumpFlag = JS_FALSE;
1674 ++emitStateSP;
1675 JS_ASSERT((size_t)(emitStateSP - emitStateStack) <= treeDepth);
1676 t = (RENode *) t->kid;
1677 op = t->op;
1678 JS_ASSERT(op < REOP_LIMIT);
1679 continue;
1681 case REOP_JUMP:
1682 emitStateSP->nextTermFixup = pc; /* offset to following term */
1683 pc += OFFSET_LEN;
1684 if (!SetForwardJumpOffset(emitStateSP->nextAltFixup, pc))
1685 goto jump_too_big;
1686 emitStateSP->continueOp = REOP_ENDALT;
1687 ++emitStateSP;
1688 JS_ASSERT((size_t)(emitStateSP - emitStateStack) <= treeDepth);
1689 t = (RENode *) t->u.kid2;
1690 op = t->op;
1691 JS_ASSERT(op < REOP_LIMIT);
1692 continue;
1694 case REOP_ENDALT:
1696 * If we already patched emitStateSP->nextTermFixup to jump to
1697 * a nearer jump, to avoid 16-bit immediate offset overflow, we
1698 * are done here.
1700 if (emitStateSP->jumpToJumpFlag)
1701 break;
1704 * Fix up the REOP_JUMP offset to go to the op after REOP_ENDALT.
1705 * REOP_ENDALT is executed only on successful match of the last
1706 * alternate in a group.
1708 if (!SetForwardJumpOffset(emitStateSP->nextTermFixup, pc))
1709 goto jump_too_big;
1710 if (t->op != REOP_ALT) {
1711 if (!SetForwardJumpOffset(emitStateSP->endTermFixup, pc))
1712 goto jump_too_big;
1716 * If the program is bigger than the REOP_JUMP offset range, then
1717 * we must check for alternates before this one that are part of
1718 * the same group, and fix up their jump offsets to target jumps
1719 * close enough to fit in a 16-bit unsigned offset immediate.
1721 if ((size_t)(pc - re->program) > OFFSET_MAX &&
1722 emitStateSP > emitStateStack) {
1723 EmitStateStackEntry *esp, *esp2;
1724 jsbytecode *alt, *jump;
1725 ptrdiff_t span, header;
1727 esp2 = emitStateSP;
1728 alt = esp2->altHead;
1729 for (esp = esp2 - 1; esp >= emitStateStack; --esp) {
1730 if (esp->continueOp == REOP_ENDALT &&
1731 !esp->jumpToJumpFlag &&
1732 esp->nextTermFixup + OFFSET_LEN == alt &&
1733 (size_t)(pc - ((esp->continueNode->op != REOP_ALT)
1734 ? esp->endTermFixup
1735 : esp->nextTermFixup)) > OFFSET_MAX) {
1736 alt = esp->altHead;
1737 jump = esp->nextTermFixup;
1740 * The span must be 1 less than the distance from
1741 * jump offset to jump offset, so we actually jump
1742 * to a REOP_JUMP bytecode, not to its offset!
1744 for (;;) {
1745 JS_ASSERT(jump < esp2->nextTermFixup);
1746 span = esp2->nextTermFixup - jump - 1;
1747 if ((size_t)span <= OFFSET_MAX)
1748 break;
1749 do {
1750 if (--esp2 == esp)
1751 goto jump_too_big;
1752 } while (esp2->continueOp != REOP_ENDALT);
1755 jump[0] = JUMP_OFFSET_HI(span);
1756 jump[1] = JUMP_OFFSET_LO(span);
1758 if (esp->continueNode->op != REOP_ALT) {
1760 * We must patch the offset at esp->endTermFixup
1761 * as well, for the REOP_ALTPREREQ{,2} opcodes.
1762 * If we're unlucky and endTermFixup is more than
1763 * OFFSET_MAX bytes from its target, we cheat by
1764 * jumping 6 bytes to the jump whose offset is at
1765 * esp->nextTermFixup, which has the same target.
1767 jump = esp->endTermFixup;
1768 header = esp->nextTermFixup - jump;
1769 span += header;
1770 if ((size_t)span > OFFSET_MAX)
1771 span = header;
1773 jump[0] = JUMP_OFFSET_HI(span);
1774 jump[1] = JUMP_OFFSET_LO(span);
1777 esp->jumpToJumpFlag = JS_TRUE;
1781 break;
1783 case REOP_ALT:
1784 JS_ASSERT(emitStateSP);
1785 emitStateSP->altHead = pc - 1;
1786 emitStateSP->nextAltFixup = pc; /* offset to next alternate */
1787 pc += OFFSET_LEN;
1788 emitStateSP->continueNode = t;
1789 emitStateSP->continueOp = REOP_JUMP;
1790 emitStateSP->jumpToJumpFlag = JS_FALSE;
1791 ++emitStateSP;
1792 JS_ASSERT((size_t)(emitStateSP - emitStateStack) <= treeDepth);
1793 t = (RENode *) t->kid;
1794 op = t->op;
1795 JS_ASSERT(op < REOP_LIMIT);
1796 continue;
1798 case REOP_FLAT:
1800 * Coalesce FLATs if possible and if it would not increase bytecode
1801 * beyond preallocated limit. The latter happens only when bytecode
1802 * size for coalesced string with offset p and length 2 exceeds 6
1803 * bytes preallocated for 2 single char nodes, i.e. when
1804 * 1 + GetCompactIndexWidth(p) + GetCompactIndexWidth(2) > 6 or
1805 * GetCompactIndexWidth(p) > 4.
1806 * Since when GetCompactIndexWidth(p) <= 4 coalescing of 3 or more
1807 * nodes strictly decreases bytecode size, the check has to be
1808 * done only for the first coalescing.
1810 if (t->kid &&
1811 GetCompactIndexWidth((jschar *)t->kid - state->cpbegin) <= 4)
1813 while (t->next &&
1814 t->next->op == REOP_FLAT &&
1815 (jschar*)t->kid + t->u.flat.length ==
1816 (jschar*)t->next->kid) {
1817 t->u.flat.length += t->next->u.flat.length;
1818 t->next = t->next->next;
1821 if (t->kid && t->u.flat.length > 1) {
1822 pc[-1] = (state->flags & JSREG_FOLD) ? REOP_FLATi : REOP_FLAT;
1823 pc = WriteCompactIndex(pc, (jschar *)t->kid - state->cpbegin);
1824 pc = WriteCompactIndex(pc, t->u.flat.length);
1825 } else if (t->u.flat.chr < 256) {
1826 pc[-1] = (state->flags & JSREG_FOLD) ? REOP_FLAT1i : REOP_FLAT1;
1827 *pc++ = (jsbytecode) t->u.flat.chr;
1828 } else {
1829 pc[-1] = (state->flags & JSREG_FOLD)
1830 ? REOP_UCFLAT1i
1831 : REOP_UCFLAT1;
1832 SET_ARG(pc, t->u.flat.chr);
1833 pc += ARG_LEN;
1835 break;
1837 case REOP_LPAREN:
1838 JS_ASSERT(emitStateSP);
1839 pc = WriteCompactIndex(pc, t->u.parenIndex);
1840 emitStateSP->continueNode = t;
1841 emitStateSP->continueOp = REOP_RPAREN;
1842 ++emitStateSP;
1843 JS_ASSERT((size_t)(emitStateSP - emitStateStack) <= treeDepth);
1844 t = (RENode *) t->kid;
1845 op = t->op;
1846 continue;
1848 case REOP_RPAREN:
1849 pc = WriteCompactIndex(pc, t->u.parenIndex);
1850 break;
1852 case REOP_BACKREF:
1853 pc = WriteCompactIndex(pc, t->u.parenIndex);
1854 break;
1856 case REOP_ASSERT:
1857 JS_ASSERT(emitStateSP);
1858 emitStateSP->nextTermFixup = pc;
1859 pc += OFFSET_LEN;
1860 emitStateSP->continueNode = t;
1861 emitStateSP->continueOp = REOP_ASSERTTEST;
1862 ++emitStateSP;
1863 JS_ASSERT((size_t)(emitStateSP - emitStateStack) <= treeDepth);
1864 t = (RENode *) t->kid;
1865 op = t->op;
1866 continue;
1868 case REOP_ASSERTTEST:
1869 case REOP_ASSERTNOTTEST:
1870 if (!SetForwardJumpOffset(emitStateSP->nextTermFixup, pc))
1871 goto jump_too_big;
1872 break;
1874 case REOP_ASSERT_NOT:
1875 JS_ASSERT(emitStateSP);
1876 emitStateSP->nextTermFixup = pc;
1877 pc += OFFSET_LEN;
1878 emitStateSP->continueNode = t;
1879 emitStateSP->continueOp = REOP_ASSERTNOTTEST;
1880 ++emitStateSP;
1881 JS_ASSERT((size_t)(emitStateSP - emitStateStack) <= treeDepth);
1882 t = (RENode *) t->kid;
1883 op = t->op;
1884 continue;
1886 case REOP_QUANT:
1887 JS_ASSERT(emitStateSP);
1888 if (t->u.range.min == 0 && t->u.range.max == (uintN)-1) {
1889 pc[-1] = (t->u.range.greedy) ? REOP_STAR : REOP_MINIMALSTAR;
1890 } else if (t->u.range.min == 0 && t->u.range.max == 1) {
1891 pc[-1] = (t->u.range.greedy) ? REOP_OPT : REOP_MINIMALOPT;
1892 } else if (t->u.range.min == 1 && t->u.range.max == (uintN) -1) {
1893 pc[-1] = (t->u.range.greedy) ? REOP_PLUS : REOP_MINIMALPLUS;
1894 } else {
1895 if (!t->u.range.greedy)
1896 pc[-1] = REOP_MINIMALQUANT;
1897 pc = WriteCompactIndex(pc, t->u.range.min);
1899 * Write max + 1 to avoid using size_t(max) + 1 bytes
1900 * for (uintN)-1 sentinel.
1902 pc = WriteCompactIndex(pc, t->u.range.max + 1);
1904 emitStateSP->nextTermFixup = pc;
1905 pc += OFFSET_LEN;
1906 emitStateSP->continueNode = t;
1907 emitStateSP->continueOp = REOP_ENDCHILD;
1908 ++emitStateSP;
1909 JS_ASSERT((size_t)(emitStateSP - emitStateStack) <= treeDepth);
1910 t = (RENode *) t->kid;
1911 op = t->op;
1912 continue;
1914 case REOP_ENDCHILD:
1915 if (!SetForwardJumpOffset(emitStateSP->nextTermFixup, pc))
1916 goto jump_too_big;
1917 break;
1919 case REOP_CLASS:
1920 if (!t->u.ucclass.sense)
1921 pc[-1] = REOP_NCLASS;
1922 pc = WriteCompactIndex(pc, t->u.ucclass.index);
1923 InitNodeCharSet(re, t);
1924 break;
1926 default:
1927 break;
1930 t = t->next;
1931 if (t) {
1932 op = t->op;
1933 } else {
1934 if (emitStateSP == emitStateStack)
1935 break;
1936 --emitStateSP;
1937 t = emitStateSP->continueNode;
1938 op = (REOp) emitStateSP->continueOp;
1942 cleanup:
1943 if (emitStateStack)
1944 JS_free(state->context, emitStateStack);
1945 return pc;
1947 jump_too_big:
1948 ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX);
1949 pc = NULL;
1950 goto cleanup;
1953 static JSBool
1954 CompileRegExpToAST(JSContext* cx, JSTokenStream* ts,
1955 JSString* str, uintN flags, CompilerState& state)
1957 uintN i;
1958 size_t len;
1960 len = JSSTRING_LENGTH(str);
1962 state.context = cx;
1963 state.tokenStream = ts;
1964 state.cp = js_UndependString(cx, str);
1965 if (!state.cp)
1966 return JS_FALSE;
1967 state.cpbegin = state.cp;
1968 state.cpend = state.cp + len;
1969 state.flags = flags;
1970 state.parenCount = 0;
1971 state.classCount = 0;
1972 state.progLength = 0;
1973 state.treeDepth = 0;
1974 state.classBitmapsMem = 0;
1975 for (i = 0; i < CLASS_CACHE_SIZE; i++)
1976 state.classCache[i].start = NULL;
1978 if (len != 0 && (flags & JSREG_FLAT)) {
1979 state.result = NewRENode(&state, REOP_FLAT);
1980 if (!state.result)
1981 return JS_FALSE;
1982 state.result->u.flat.chr = *state.cpbegin;
1983 state.result->u.flat.length = len;
1984 state.result->kid = (void *) state.cpbegin;
1985 /* Flat bytecode: REOP_FLAT compact(string_offset) compact(len). */
1986 state.progLength += 1 + GetCompactIndexWidth(0)
1987 + GetCompactIndexWidth(len);
1988 return JS_TRUE;
1991 return ParseRegExp(&state);
1994 #ifdef JS_TRACER
1995 typedef List<LIns*, LIST_NonGCObjects> LInsList;
1997 /* Dummy GC for nanojit placement new. */
1998 static GC gc;
2000 static void*
2001 HashRegExp(uint16 flags, jschar* s, size_t n)
2003 uint32 h;
2005 for (h = 0; n; s++, n--)
2006 h = JS_ROTATE_LEFT32(h, 4) ^ *s;
2007 return (void*)(h + flags);
2010 struct RESideExit : public SideExit {
2011 size_t re_length;
2012 uint16 re_flags;
2013 jschar re_chars[1];
2016 /* Return the cached fragment for the given regexp, or NULL. */
2017 static Fragment*
2018 LookupNativeRegExp(JSContext* cx, void* hash, uint16 re_flags,
2019 jschar* re_chars, size_t re_length)
2021 Fragmento* fragmento = JS_TRACE_MONITOR(cx).reFragmento;
2022 Fragment* fragment = fragmento->getLoop(hash);
2023 while (fragment) {
2024 if (fragment->lastIns) {
2025 RESideExit* exit = (RESideExit*)fragment->lastIns->record()->exit;
2026 if (exit->re_flags == re_flags &&
2027 exit->re_length == re_length &&
2028 !memcmp(exit->re_chars, re_chars, re_length)) {
2029 return fragment;
2032 fragment = fragment->peer;
2034 return NULL;
2037 static JSBool
2038 ProcessCharSet(JSContext *cx, JSRegExp *re, RECharSet *charSet);
2040 class RegExpNativeCompiler {
2041 private:
2042 JSContext* cx;
2043 JSRegExp* re;
2044 CompilerState* cs; /* RegExp to compile */
2045 Fragment* fragment;
2046 LirBuffer* lirbuf;
2047 LirWriter* lir;
2048 LirBufWriter* lirBufWriter; /* for skip */
2050 LIns* state;
2051 LIns* gdata;
2052 LIns* cpend;
2054 JSBool isCaseInsensitive() const { return cs->flags & JSREG_FOLD; }
2056 JSBool targetCurrentPoint(LIns* ins)
2058 if (fragment->lirbuf->outOMem())
2059 return JS_FALSE;
2060 ins->target(lir->ins0(LIR_label));
2061 return JS_TRUE;
2064 JSBool targetCurrentPoint(LInsList& fails)
2066 if (fragment->lirbuf->outOMem())
2067 return JS_FALSE;
2068 LIns* fail = lir->ins0(LIR_label);
2069 for (size_t i = 0; i < fails.size(); ++i) {
2070 fails[i]->target(fail);
2072 fails.clear();
2073 return JS_TRUE;
2077 * These functions return the new position after their match operation,
2078 * or NULL if there was an error.
2080 LIns* compileEmpty(RENode* node, LIns* pos, LInsList& fails)
2082 return pos;
2085 LIns* compileFlatSingleChar(jschar ch, LIns* pos, LInsList& fails)
2088 * Fast case-insensitive test for ASCII letters: convert text
2089 * char to lower case by bit-or-ing in 32 and compare.
2091 JSBool useFastCI = JS_FALSE;
2092 jschar ch2 = ch; /* 2nd char to test for if ci */
2093 if (cs->flags & JSREG_FOLD) {
2094 if ((L'A' <= ch && ch <= L'Z') || (L'a' <= ch && ch <= L'z')) {
2095 ch |= 32;
2096 ch2 = ch;
2097 useFastCI = JS_TRUE;
2098 } else if (JS_TOLOWER(ch) != ch) {
2099 ch2 = JS_TOLOWER(ch);
2100 ch = JS_TOUPPER(ch);
2104 LIns* to_fail = lir->insBranch(LIR_jf, lir->ins2(LIR_lt, pos, cpend), 0);
2105 fails.add(to_fail);
2106 LIns* text_ch = lir->insLoad(LIR_ldcs, pos, lir->insImm(0));
2107 LIns* comp_ch = useFastCI ?
2108 lir->ins2(LIR_or, text_ch, lir->insImm(32)) :
2109 text_ch;
2110 if (ch == ch2) {
2111 fails.add(lir->insBranch(LIR_jf, lir->ins2(LIR_eq, comp_ch, lir->insImm(ch)), 0));
2112 } else {
2113 LIns* to_ok = lir->insBranch(LIR_jt, lir->ins2(LIR_eq, comp_ch, lir->insImm(ch)), 0);
2114 fails.add(lir->insBranch(LIR_jf, lir->ins2(LIR_eq, comp_ch, lir->insImm(ch2)), 0));
2115 if (!targetCurrentPoint(to_ok))
2116 return NULL;
2119 return lir->ins2(LIR_piadd, pos, lir->insImm(2));
2122 LIns* compileClass(RENode* node, LIns* pos, LInsList& fails)
2124 if (!node->u.ucclass.sense)
2125 return JS_FALSE;
2127 * If we share generated native code, we need to make a copy
2128 * of the bitmap because the original regexp's copy is destroyed
2129 * when that regexp is.
2131 RECharSet *charSet = &re->classList[node->u.ucclass.index];
2132 size_t bitmapLen = (charSet->length >> 3) + 1;
2133 /* skip() can't hold large data blocks. */
2134 if (bitmapLen > 1024)
2135 return NULL;
2136 /* The following line allocates charSet.u.bits if successful. */
2137 if (!charSet->converted && !ProcessCharSet(cx, re, charSet))
2138 return NULL;
2139 LIns* skip = lirBufWriter->skip(bitmapLen);
2140 if (fragment->lirbuf->outOMem())
2141 return NULL;
2142 void* bitmapData = skip->payload();
2143 memcpy(bitmapData, charSet->u.bits, bitmapLen);
2145 LIns* to_fail = lir->insBranch(LIR_jf, lir->ins2(LIR_lt, pos, cpend), 0);
2146 fails.add(to_fail);
2147 LIns* text_ch = lir->insLoad(LIR_ldcs, pos, lir->insImm(0));
2148 fails.add(lir->insBranch(LIR_jf, lir->ins2(LIR_le, text_ch, lir->insImm(charSet->length)), 0));
2149 LIns* byteIndex = lir->ins2(LIR_rsh, text_ch, lir->insImm(3));
2150 LIns* bitmap = lir->insImmPtr(bitmapData);
2151 LIns* byte = lir->insLoad(LIR_ldcb, lir->ins2(LIR_piadd, bitmap, byteIndex), (int) 0);
2152 LIns* bitMask = lir->ins2(LIR_lsh, lir->insImm(1),
2153 lir->ins2(LIR_and, text_ch, lir->insImm(0x7)));
2154 LIns* test = lir->ins2(LIR_eq, lir->ins2(LIR_and, byte, bitMask), lir->insImm(0));
2156 LIns* to_next = lir->insBranch(LIR_jt, test, 0);
2157 fails.add(to_next);
2158 return lir->ins2(LIR_piadd, pos, lir->insImm(2));
2161 LIns* compileAlt(RENode* node, LIns* pos, LInsList& fails)
2163 LInsList kidFails(NULL);
2164 if (!compileNode((RENode *) node->kid, pos, kidFails))
2165 return NULL;
2166 if (!compileNode(node->next, pos, kidFails))
2167 return NULL;
2169 if (!targetCurrentPoint(kidFails))
2170 return NULL;
2171 if (!compileNode(node->u.altprereq.kid2, pos, fails))
2172 return NULL;
2174 * Disable compilation for any regexp where something follows an
2175 * alternation. To make this work, we need to redesign to either
2176 * (a) pass around continuations so we know the right place to go
2177 * when we logically return, or (b) generate explicit backtracking
2178 * code.
2180 if (node->next)
2181 return NULL;
2182 return pos;
2185 JSBool compileNode(RENode* node, LIns* pos, LInsList& fails)
2187 for (; node; node = node->next) {
2188 if (fragment->lirbuf->outOMem())
2189 return JS_FALSE;
2191 switch (node->op) {
2192 case REOP_EMPTY:
2193 pos = compileEmpty(node, pos, fails);
2194 break;
2195 case REOP_FLAT:
2196 if (node->u.flat.length == 1) {
2197 pos = compileFlatSingleChar(node->u.flat.chr, pos, fails);
2198 } else {
2199 for (size_t i = 0; i < node->u.flat.length; ++i) {
2200 if (fragment->lirbuf->outOMem())
2201 return JS_FALSE;
2202 pos = compileFlatSingleChar(((jschar*) node->kid)[i], pos, fails);
2203 if (!pos) break;
2206 break;
2207 case REOP_ALT:
2208 case REOP_ALTPREREQ:
2209 pos = compileAlt(node, pos, fails);
2210 break;
2211 case REOP_CLASS:
2212 pos = compileClass(node, pos, fails);
2213 break;
2214 default:
2215 return JS_FALSE;
2217 if (!pos)
2218 return JS_FALSE;
2221 lir->insStorei(pos, state, (int) offsetof(REMatchState, cp));
2222 lir->ins1(LIR_ret, state);
2223 return JS_TRUE;
2226 JSBool compileSticky(RENode* root, LIns* start)
2228 LInsList fails(NULL);
2229 if (!compileNode(root, start, fails))
2230 return JS_FALSE;
2231 if (!targetCurrentPoint(fails))
2232 return JS_FALSE;
2233 lir->ins1(LIR_ret, lir->insImm(0));
2234 return JS_TRUE;
2237 JSBool compileAnchoring(RENode* root, LIns* start)
2239 /* Even at the end, the empty regexp would match. */
2240 LIns* to_next = lir->insBranch(LIR_jf,
2241 lir->ins2(LIR_le, start, cpend), 0);
2242 LInsList fails(NULL);
2243 if (!compileNode(root, start, fails))
2244 return JS_FALSE;
2246 if (!targetCurrentPoint(to_next))
2247 return JS_FALSE;
2248 lir->ins1(LIR_ret, lir->insImm(0));
2250 if (!targetCurrentPoint(fails))
2251 return JS_FALSE;
2252 lir->insStorei(lir->ins2(LIR_piadd, start, lir->insImm(2)), gdata,
2253 (int) offsetof(REGlobalData, skipped));
2255 return JS_TRUE;
2258 inline LIns*
2259 addName(LirBuffer* lirbuf, LIns* ins, const char* name)
2261 #ifdef NJ_VERBOSE
2262 debug_only_v(lirbuf->names->addName(ins, name);)
2263 #endif
2264 return ins;
2268 * Insert the side exit and guard record for a compiled regexp. Most
2269 * of the fields are not used. The important part is the regexp source
2270 * and flags, which we use as the fragment lookup key.
2272 GuardRecord* insertGuard(jschar* re_chars, size_t re_length)
2274 LIns* skip = lirBufWriter->skip(sizeof(GuardRecord) +
2275 sizeof(RESideExit) +
2276 re_length - sizeof(jschar));
2277 GuardRecord* guard = (GuardRecord *) skip->payload();
2278 memset(guard, 0, sizeof(*guard));
2279 RESideExit* exit = (RESideExit*)(guard+1);
2280 guard->exit = exit;
2281 guard->exit->target = fragment;
2282 exit->re_flags = re->flags;
2283 exit->re_length = re_length;
2284 memcpy(exit->re_chars, re_chars, re_length);
2285 fragment->lastIns = lir->insGuard(LIR_loop, lir->insImm(1), skip);
2286 return guard;
2289 public:
2290 RegExpNativeCompiler(JSRegExp* re, CompilerState* cs, Fragment* fragment)
2291 : re(re), cs(cs), fragment(fragment), lir(NULL), lirBufWriter(NULL) { }
2293 JSBool compile(JSContext* cx)
2295 GuardRecord* guard = NULL;
2296 LIns* start;
2297 bool oom = false;
2298 jschar* re_chars;
2299 size_t re_length;
2300 Fragmento* fragmento = JS_TRACE_MONITOR(cx).reFragmento;
2302 JSSTRING_CHARS_AND_LENGTH(re->source, re_chars, re_length);
2304 * If the regexp is too long nanojit will assert when we
2305 * try to insert the guard record.
2307 if (re_length > 1024)
2308 return JS_FALSE;
2310 this->cx = cx;
2311 /* At this point we have an empty fragment. */
2312 LirBuffer* lirbuf = fragment->lirbuf;
2313 if (lirbuf->outOMem())
2314 goto fail;
2315 /* FIXME Use bug 463260 smart pointer when available. */
2316 lir = lirBufWriter = new (&gc) LirBufWriter(lirbuf);
2318 /* FIXME Use bug 463260 smart pointer when available. */
2319 #ifdef NJ_VERBOSE
2320 debug_only_v(fragment->lirbuf->names = new (&gc) LirNameMap(&gc, NULL, fragmento->labels);)
2321 #endif
2322 /* FIXME Use bug 463260 smart pointer when available. */
2323 #ifdef NJ_VERBOSE
2324 debug_only_v(lir = new (&gc) VerboseWriter(&gc, lir, lirbuf->names);)
2325 #endif
2327 lir->ins0(LIR_start);
2328 lirbuf->state = state = addName(lirbuf, lir->insParam(0, 0), "state");
2329 lirbuf->param1 = gdata = addName(lirbuf, lir->insParam(1, 0), "gdata");
2330 start = addName(lirbuf, lir->insLoad(LIR_ldp, lirbuf->param1, (int) offsetof(REGlobalData, skipped)), "start");
2331 cpend = addName(lirbuf, lir->insLoad(LIR_ldp, lirbuf->param1, offsetof(REGlobalData, cpend)), "cpend");
2333 if (cs->flags & JSREG_STICKY) {
2334 if (!compileSticky(cs->result, start))
2335 goto fail;
2336 } else {
2337 if (!compileAnchoring(cs->result, start))
2338 goto fail;
2341 guard = insertGuard(re_chars, re_length);
2343 if (lirbuf->outOMem())
2344 goto fail;
2345 ::compile(fragmento->assm(), fragment);
2346 if (fragmento->assm()->error() != nanojit::None) {
2347 oom = fragmento->assm()->error() == nanojit::OutOMem;
2348 goto fail;
2351 delete lirBufWriter;
2352 #ifdef NJ_VERBOSE
2353 debug_only_v(delete lir;)
2354 #endif
2355 return JS_TRUE;
2356 fail:
2357 if (lirbuf->outOMem() || oom) {
2358 fragmento->clearFrags();
2359 lirbuf->rewind();
2360 } else {
2361 if (!guard) insertGuard(re_chars, re_length);
2362 fragment->blacklist();
2364 delete lirBufWriter;
2365 #ifdef NJ_VERBOSE
2366 debug_only_v(delete lir;)
2367 #endif
2368 return JS_FALSE;
2373 * Compile a regexp to native code in the given fragment.
2375 static inline JSBool
2376 CompileRegExpToNative(JSContext* cx, JSRegExp* re, Fragment* fragment)
2378 JSBool rv = JS_FALSE;
2379 void* mark;
2380 CompilerState state;
2381 RegExpNativeCompiler rc(re, &state, fragment);
2383 JS_ASSERT(!fragment->code());
2384 JS_ASSERT(!fragment->isBlacklisted());
2386 mark = JS_ARENA_MARK(&cx->tempPool);
2387 if (!CompileRegExpToAST(cx, NULL, re->source, re->flags, state)) {
2388 goto out;
2390 rv = rc.compile(cx);
2391 out:
2392 JS_ARENA_RELEASE(&cx->tempPool, mark);
2393 return rv;
2396 /* Function type for a compiled native regexp. */
2397 typedef REMatchState* (FASTCALL *NativeRegExp)(REMatchState*, REGlobalData*);
2400 * Return a compiled native regexp if one already exists or can be created
2401 * now, or NULL otherwise.
2403 static NativeRegExp
2404 GetNativeRegExp(JSContext* cx, JSRegExp* re)
2406 Fragment *fragment;
2407 jschar* re_chars;
2408 size_t re_length;
2409 Fragmento* fragmento = JS_TRACE_MONITOR(cx).reFragmento;
2411 JSSTRING_CHARS_AND_LENGTH(re->source, re_chars, re_length);
2412 void* hash = HashRegExp(re->flags, re_chars, re_length);
2413 fragment = LookupNativeRegExp(cx, hash, re->flags, re_chars, re_length);
2414 if (fragment) {
2415 if (fragment->code())
2416 goto ok;
2417 if (fragment->isBlacklisted())
2418 return NULL;
2419 } else {
2420 fragment = fragmento->getAnchor(hash);
2421 fragment->lirbuf = JS_TRACE_MONITOR(cx).reLirBuf;
2422 fragment->root = fragment;
2425 if (!CompileRegExpToNative(cx, re, fragment))
2426 return NULL;
2428 union { NIns *code; NativeRegExp func; } u;
2429 u.code = fragment->code();
2430 return u.func;
2432 #endif
2434 JSRegExp *
2435 js_NewRegExp(JSContext *cx, JSTokenStream *ts,
2436 JSString *str, uintN flags, JSBool flat)
2438 JSRegExp *re;
2439 void *mark;
2440 CompilerState state;
2441 size_t resize;
2442 jsbytecode *endPC;
2443 uintN i;
2445 re = NULL;
2446 mark = JS_ARENA_MARK(&cx->tempPool);
2449 * Parsing the string as flat is now expressed internally using
2450 * a flag, so that we keep this information in the JSRegExp, but
2451 * we keep the 'flat' parameter for now for compatibility.
2453 if (flat) flags |= JSREG_FLAT;
2454 if (!CompileRegExpToAST(cx, ts, str, flags, state))
2455 goto out;
2457 resize = offsetof(JSRegExp, program) + state.progLength + 1;
2458 re = (JSRegExp *) JS_malloc(cx, resize);
2459 if (!re)
2460 goto out;
2462 re->nrefs = 1;
2463 JS_ASSERT(state.classBitmapsMem <= CLASS_BITMAPS_MEM_LIMIT);
2464 re->classCount = state.classCount;
2465 if (re->classCount) {
2466 re->classList = (RECharSet *)
2467 JS_malloc(cx, re->classCount * sizeof(RECharSet));
2468 if (!re->classList) {
2469 js_DestroyRegExp(cx, re);
2470 re = NULL;
2471 goto out;
2473 for (i = 0; i < re->classCount; i++)
2474 re->classList[i].converted = JS_FALSE;
2475 } else {
2476 re->classList = NULL;
2479 /* Compile the bytecode version. */
2480 endPC = EmitREBytecode(&state, re, state.treeDepth, re->program, state.result);
2481 if (!endPC) {
2482 js_DestroyRegExp(cx, re);
2483 re = NULL;
2484 goto out;
2486 *endPC++ = REOP_END;
2488 * Check whether size was overestimated and shrink using realloc.
2489 * This is safe since no pointers to newly parsed regexp or its parts
2490 * besides re exist here.
2492 if ((size_t)(endPC - re->program) != state.progLength + 1) {
2493 JSRegExp *tmp;
2494 JS_ASSERT((size_t)(endPC - re->program) < state.progLength + 1);
2495 resize = offsetof(JSRegExp, program) + (endPC - re->program);
2496 tmp = (JSRegExp *) JS_realloc(cx, re, resize);
2497 if (tmp)
2498 re = tmp;
2501 re->flags = flags;
2502 re->parenCount = state.parenCount;
2503 re->source = str;
2505 out:
2506 JS_ARENA_RELEASE(&cx->tempPool, mark);
2507 return re;
2510 JSRegExp *
2511 js_NewRegExpOpt(JSContext *cx, JSString *str, JSString *opt, JSBool flat)
2513 uintN flags;
2514 jschar *s;
2515 size_t i, n;
2516 char charBuf[2];
2518 flags = 0;
2519 if (opt) {
2520 JSSTRING_CHARS_AND_LENGTH(opt, s, n);
2521 for (i = 0; i < n; i++) {
2522 switch (s[i]) {
2523 case 'g':
2524 flags |= JSREG_GLOB;
2525 break;
2526 case 'i':
2527 flags |= JSREG_FOLD;
2528 break;
2529 case 'm':
2530 flags |= JSREG_MULTILINE;
2531 break;
2532 case 'y':
2533 flags |= JSREG_STICKY;
2534 break;
2535 default:
2536 charBuf[0] = (char)s[i];
2537 charBuf[1] = '\0';
2538 JS_ReportErrorFlagsAndNumber(cx, JSREPORT_ERROR,
2539 js_GetErrorMessage, NULL,
2540 JSMSG_BAD_FLAG, charBuf);
2541 return NULL;
2545 return js_NewRegExp(cx, NULL, str, flags, flat);
2549 * Save the current state of the match - the position in the input
2550 * text as well as the position in the bytecode. The state of any
2551 * parent expressions is also saved (preceding state).
2552 * Contents of parenCount parentheses from parenIndex are also saved.
2554 static REBackTrackData *
2555 PushBackTrackState(REGlobalData *gData, REOp op,
2556 jsbytecode *target, REMatchState *x, const jschar *cp,
2557 size_t parenIndex, size_t parenCount)
2559 size_t i;
2560 REBackTrackData *result =
2561 (REBackTrackData *) ((char *)gData->backTrackSP + gData->cursz);
2563 size_t sz = sizeof(REBackTrackData) +
2564 gData->stateStackTop * sizeof(REProgState) +
2565 parenCount * sizeof(RECapture);
2567 ptrdiff_t btsize = gData->backTrackStackSize;
2568 ptrdiff_t btincr = ((char *)result + sz) -
2569 ((char *)gData->backTrackStack + btsize);
2571 re_debug("\tBT_Push: %lu,%lu",
2572 (unsigned long) parenIndex, (unsigned long) parenCount);
2574 JS_COUNT_OPERATION(gData->cx, JSOW_JUMP * (1 + parenCount));
2575 if (btincr > 0) {
2576 ptrdiff_t offset = (char *)result - (char *)gData->backTrackStack;
2578 JS_COUNT_OPERATION(gData->cx, JSOW_ALLOCATION);
2579 btincr = JS_ROUNDUP(btincr, btsize);
2580 JS_ARENA_GROW_CAST(gData->backTrackStack, REBackTrackData *,
2581 &gData->cx->regexpPool, btsize, btincr);
2582 if (!gData->backTrackStack) {
2583 js_ReportOutOfScriptQuota(gData->cx);
2584 gData->ok = JS_FALSE;
2585 return NULL;
2587 gData->backTrackStackSize = btsize + btincr;
2588 result = (REBackTrackData *) ((char *)gData->backTrackStack + offset);
2590 gData->backTrackSP = result;
2591 result->sz = gData->cursz;
2592 gData->cursz = sz;
2594 result->backtrack_op = op;
2595 result->backtrack_pc = target;
2596 result->cp = cp;
2597 result->parenCount = parenCount;
2598 result->parenIndex = parenIndex;
2600 result->saveStateStackTop = gData->stateStackTop;
2601 JS_ASSERT(gData->stateStackTop);
2602 memcpy(result + 1, gData->stateStack,
2603 sizeof(REProgState) * result->saveStateStackTop);
2605 if (parenCount != 0) {
2606 memcpy((char *)(result + 1) +
2607 sizeof(REProgState) * result->saveStateStackTop,
2608 &x->parens[parenIndex],
2609 sizeof(RECapture) * parenCount);
2610 for (i = 0; i != parenCount; i++)
2611 x->parens[parenIndex + i].index = -1;
2614 return result;
2619 * Consecutive literal characters.
2621 #if 0
2622 static REMatchState *
2623 FlatNMatcher(REGlobalData *gData, REMatchState *x, jschar *matchChars,
2624 size_t length)
2626 size_t i;
2627 if (length > gData->cpend - x->cp)
2628 return NULL;
2629 for (i = 0; i != length; i++) {
2630 if (matchChars[i] != x->cp[i])
2631 return NULL;
2633 x->cp += length;
2634 return x;
2636 #endif
2638 static JS_ALWAYS_INLINE REMatchState *
2639 FlatNIMatcher(REGlobalData *gData, REMatchState *x, jschar *matchChars,
2640 size_t length)
2642 size_t i;
2643 JS_ASSERT(gData->cpend >= x->cp);
2644 if (length > (size_t)(gData->cpend - x->cp))
2645 return NULL;
2646 for (i = 0; i != length; i++) {
2647 if (upcase(matchChars[i]) != upcase(x->cp[i]))
2648 return NULL;
2650 x->cp += length;
2651 return x;
2655 * 1. Evaluate DecimalEscape to obtain an EscapeValue E.
2656 * 2. If E is not a character then go to step 6.
2657 * 3. Let ch be E's character.
2658 * 4. Let A be a one-element RECharSet containing the character ch.
2659 * 5. Call CharacterSetMatcher(A, false) and return its Matcher result.
2660 * 6. E must be an integer. Let n be that integer.
2661 * 7. If n=0 or n>NCapturingParens then throw a SyntaxError exception.
2662 * 8. Return an internal Matcher closure that takes two arguments, a State x
2663 * and a Continuation c, and performs the following:
2664 * 1. Let cap be x's captures internal array.
2665 * 2. Let s be cap[n].
2666 * 3. If s is undefined, then call c(x) and return its result.
2667 * 4. Let e be x's endIndex.
2668 * 5. Let len be s's length.
2669 * 6. Let f be e+len.
2670 * 7. If f>InputLength, return failure.
2671 * 8. If there exists an integer i between 0 (inclusive) and len (exclusive)
2672 * such that Canonicalize(s[i]) is not the same character as
2673 * Canonicalize(Input [e+i]), then return failure.
2674 * 9. Let y be the State (f, cap).
2675 * 10. Call c(y) and return its result.
2677 static REMatchState *
2678 BackrefMatcher(REGlobalData *gData, REMatchState *x, size_t parenIndex)
2680 size_t len, i;
2681 const jschar *parenContent;
2682 RECapture *cap = &x->parens[parenIndex];
2684 if (cap->index == -1)
2685 return x;
2687 len = cap->length;
2688 if (x->cp + len > gData->cpend)
2689 return NULL;
2691 parenContent = &gData->cpbegin[cap->index];
2692 if (gData->regexp->flags & JSREG_FOLD) {
2693 for (i = 0; i < len; i++) {
2694 if (upcase(parenContent[i]) != upcase(x->cp[i]))
2695 return NULL;
2697 } else {
2698 for (i = 0; i < len; i++) {
2699 if (parenContent[i] != x->cp[i])
2700 return NULL;
2703 x->cp += len;
2704 return x;
2708 /* Add a single character to the RECharSet */
2709 static void
2710 AddCharacterToCharSet(RECharSet *cs, jschar c)
2712 uintN byteIndex = (uintN)(c >> 3);
2713 JS_ASSERT(c <= cs->length);
2714 cs->u.bits[byteIndex] |= 1 << (c & 0x7);
2718 /* Add a character range, c1 to c2 (inclusive) to the RECharSet */
2719 static void
2720 AddCharacterRangeToCharSet(RECharSet *cs, uintN c1, uintN c2)
2722 uintN i;
2724 uintN byteIndex1 = c1 >> 3;
2725 uintN byteIndex2 = c2 >> 3;
2727 JS_ASSERT(c2 <= cs->length && c1 <= c2);
2729 c1 &= 0x7;
2730 c2 &= 0x7;
2732 if (byteIndex1 == byteIndex2) {
2733 cs->u.bits[byteIndex1] |= ((uint8)0xFF >> (7 - (c2 - c1))) << c1;
2734 } else {
2735 cs->u.bits[byteIndex1] |= 0xFF << c1;
2736 for (i = byteIndex1 + 1; i < byteIndex2; i++)
2737 cs->u.bits[i] = 0xFF;
2738 cs->u.bits[byteIndex2] |= (uint8)0xFF >> (7 - c2);
2742 struct CharacterRange {
2743 jschar start;
2744 jschar end;
2748 * The following characters are taken from the ECMA-262 standard, section 7.2
2749 * and 7.3, and the Unicode 3 standard, Table 6-1.
2751 static const CharacterRange WhiteSpaceRanges[] = {
2752 /* TAB, LF, VT, FF, CR */
2753 { 0x0009, 0x000D },
2754 /* SPACE */
2755 { 0x0020, 0x0020 },
2756 /* NO-BREAK SPACE */
2757 { 0x00A0, 0x00A0 },
2759 * EN QUAD, EM QUAD, EN SPACE, EM SPACE, THREE-PER-EM SPACE, FOUR-PER-EM
2760 * SPACE, SIX-PER-EM SPACE, FIGURE SPACE, PUNCTUATION SPACE, THIN SPACE,
2761 * HAIR SPACE, ZERO WIDTH SPACE
2763 { 0x2000, 0x200B },
2764 /* LS, PS */
2765 { 0x2028, 0x2029 },
2766 /* NARROW NO-BREAK SPACE */
2767 { 0x202F, 0x202F },
2768 /* IDEOGRAPHIC SPACE */
2769 { 0x3000, 0x3000 }
2772 /* ECMA-262 standard, section 15.10.2.6. */
2773 static const CharacterRange WordRanges[] = {
2774 { jschar('0'), jschar('9') },
2775 { jschar('A'), jschar('Z') },
2776 { jschar('_'), jschar('_') },
2777 { jschar('a'), jschar('z') }
2780 static void
2781 AddCharacterRanges(RECharSet *charSet,
2782 const CharacterRange *range,
2783 const CharacterRange *end)
2785 for (; range < end; ++range)
2786 AddCharacterRangeToCharSet(charSet, range->start, range->end);
2789 static void
2790 AddInvertedCharacterRanges(RECharSet *charSet,
2791 const CharacterRange *range,
2792 const CharacterRange *end)
2794 uint16 previous = 0;
2795 for (; range < end; ++range) {
2796 AddCharacterRangeToCharSet(charSet, previous, range->start - 1);
2797 previous = range->end + 1;
2799 AddCharacterRangeToCharSet(charSet, previous, charSet->length);
2802 /* Compile the source of the class into a RECharSet */
2803 static JSBool
2804 ProcessCharSet(JSContext *cx, JSRegExp *re, RECharSet *charSet)
2806 const jschar *src, *end;
2807 JSBool inRange = JS_FALSE;
2808 jschar rangeStart = 0;
2809 uintN byteLength, n;
2810 jschar c, thisCh;
2811 intN nDigits, i;
2813 JS_ASSERT(!charSet->converted);
2815 * Assert that startIndex and length points to chars inside [] inside
2816 * source string.
2818 JS_ASSERT(1 <= charSet->u.src.startIndex);
2819 JS_ASSERT(charSet->u.src.startIndex
2820 < JSSTRING_LENGTH(re->source));
2821 JS_ASSERT(charSet->u.src.length <= JSSTRING_LENGTH(re->source)
2822 - 1 - charSet->u.src.startIndex);
2824 charSet->converted = JS_TRUE;
2825 src = JSSTRING_CHARS(re->source) + charSet->u.src.startIndex;
2826 end = src + charSet->u.src.length;
2827 JS_ASSERT(src[-1] == '[');
2828 JS_ASSERT(end[0] == ']');
2830 byteLength = (charSet->length >> 3) + 1;
2831 charSet->u.bits = (uint8 *)JS_malloc(cx, byteLength);
2832 if (!charSet->u.bits) {
2833 JS_ReportOutOfMemory(cx);
2834 return JS_FALSE;
2836 memset(charSet->u.bits, 0, byteLength);
2838 if (src == end)
2839 return JS_TRUE;
2841 if (*src == '^') {
2842 JS_ASSERT(charSet->sense == JS_FALSE);
2843 ++src;
2844 } else {
2845 JS_ASSERT(charSet->sense == JS_TRUE);
2848 while (src != end) {
2849 switch (*src) {
2850 case '\\':
2851 ++src;
2852 c = *src++;
2853 switch (c) {
2854 case 'b':
2855 thisCh = 0x8;
2856 break;
2857 case 'f':
2858 thisCh = 0xC;
2859 break;
2860 case 'n':
2861 thisCh = 0xA;
2862 break;
2863 case 'r':
2864 thisCh = 0xD;
2865 break;
2866 case 't':
2867 thisCh = 0x9;
2868 break;
2869 case 'v':
2870 thisCh = 0xB;
2871 break;
2872 case 'c':
2873 if (src < end && JS_ISWORD(*src)) {
2874 thisCh = (jschar)(*src++ & 0x1F);
2875 } else {
2876 --src;
2877 thisCh = '\\';
2879 break;
2880 case 'x':
2881 nDigits = 2;
2882 goto lexHex;
2883 case 'u':
2884 nDigits = 4;
2885 lexHex:
2886 n = 0;
2887 for (i = 0; (i < nDigits) && (src < end); i++) {
2888 uintN digit;
2889 c = *src++;
2890 if (!isASCIIHexDigit(c, &digit)) {
2892 * Back off to accepting the original '\'
2893 * as a literal
2895 src -= i + 1;
2896 n = '\\';
2897 break;
2899 n = (n << 4) | digit;
2901 thisCh = (jschar)n;
2902 break;
2903 case '0':
2904 case '1':
2905 case '2':
2906 case '3':
2907 case '4':
2908 case '5':
2909 case '6':
2910 case '7':
2912 * This is a non-ECMA extension - decimal escapes (in this
2913 * case, octal!) are supposed to be an error inside class
2914 * ranges, but supported here for backwards compatibility.
2916 n = JS7_UNDEC(c);
2917 c = *src;
2918 if ('0' <= c && c <= '7') {
2919 src++;
2920 n = 8 * n + JS7_UNDEC(c);
2921 c = *src;
2922 if ('0' <= c && c <= '7') {
2923 src++;
2924 i = 8 * n + JS7_UNDEC(c);
2925 if (i <= 0377)
2926 n = i;
2927 else
2928 src--;
2931 thisCh = (jschar)n;
2932 break;
2934 case 'd':
2935 AddCharacterRangeToCharSet(charSet, '0', '9');
2936 continue; /* don't need range processing */
2937 case 'D':
2938 AddCharacterRangeToCharSet(charSet, 0, '0' - 1);
2939 AddCharacterRangeToCharSet(charSet,
2940 (jschar)('9' + 1),
2941 (jschar)charSet->length);
2942 continue;
2943 case 's':
2944 AddCharacterRanges(charSet, WhiteSpaceRanges,
2945 WhiteSpaceRanges + JS_ARRAY_LENGTH(WhiteSpaceRanges));
2946 continue;
2947 case 'S':
2948 AddInvertedCharacterRanges(charSet, WhiteSpaceRanges,
2949 WhiteSpaceRanges + JS_ARRAY_LENGTH(WhiteSpaceRanges));
2950 continue;
2951 case 'w':
2952 AddCharacterRanges(charSet, WordRanges,
2953 WordRanges + JS_ARRAY_LENGTH(WordRanges));
2954 continue;
2955 case 'W':
2956 AddInvertedCharacterRanges(charSet, WordRanges,
2957 WordRanges + JS_ARRAY_LENGTH(WordRanges));
2958 continue;
2959 default:
2960 thisCh = c;
2961 break;
2964 break;
2966 default:
2967 thisCh = *src++;
2968 break;
2971 if (inRange) {
2972 if (re->flags & JSREG_FOLD) {
2973 int i;
2975 JS_ASSERT(rangeStart <= thisCh);
2976 for (i = rangeStart; i <= thisCh; i++) {
2977 jschar uch, dch;
2979 AddCharacterToCharSet(charSet, i);
2980 uch = upcase(i);
2981 dch = downcase(i);
2982 if (i != uch)
2983 AddCharacterToCharSet(charSet, uch);
2984 if (i != dch)
2985 AddCharacterToCharSet(charSet, dch);
2987 } else {
2988 AddCharacterRangeToCharSet(charSet, rangeStart, thisCh);
2990 inRange = JS_FALSE;
2991 } else {
2992 if (re->flags & JSREG_FOLD) {
2993 AddCharacterToCharSet(charSet, upcase(thisCh));
2994 AddCharacterToCharSet(charSet, downcase(thisCh));
2995 } else {
2996 AddCharacterToCharSet(charSet, thisCh);
2998 if (src < end - 1) {
2999 if (*src == '-') {
3000 ++src;
3001 inRange = JS_TRUE;
3002 rangeStart = thisCh;
3007 return JS_TRUE;
3010 static inline JSBool
3011 MatcherProcessCharSet(REGlobalData *gData, RECharSet *charSet) {
3012 JSBool rv = ProcessCharSet(gData->cx, gData->regexp, charSet);
3013 if (!rv) gData->ok = JS_FALSE;
3014 return rv;
3017 void
3018 js_DestroyRegExp(JSContext *cx, JSRegExp *re)
3020 if (JS_ATOMIC_DECREMENT(&re->nrefs) == 0) {
3021 if (re->classList) {
3022 uintN i;
3023 for (i = 0; i < re->classCount; i++) {
3024 if (re->classList[i].converted)
3025 JS_free(cx, re->classList[i].u.bits);
3026 re->classList[i].u.bits = NULL;
3028 JS_free(cx, re->classList);
3030 JS_free(cx, re);
3034 static JSBool
3035 ReallocStateStack(REGlobalData *gData)
3037 size_t limit = gData->stateStackLimit;
3038 size_t sz = sizeof(REProgState) * limit;
3040 JS_ARENA_GROW_CAST(gData->stateStack, REProgState *,
3041 &gData->cx->regexpPool, sz, sz);
3042 if (!gData->stateStack) {
3043 js_ReportOutOfScriptQuota(gData->cx);
3044 gData->ok = JS_FALSE;
3045 return JS_FALSE;
3047 gData->stateStackLimit = limit + limit;
3048 return JS_TRUE;
3051 #define PUSH_STATE_STACK(data) \
3052 JS_BEGIN_MACRO \
3053 ++(data)->stateStackTop; \
3054 if ((data)->stateStackTop == (data)->stateStackLimit && \
3055 !ReallocStateStack((data))) { \
3056 return NULL; \
3058 JS_END_MACRO
3061 * Apply the current op against the given input to see if it's going to match
3062 * or fail. Return false if we don't get a match, true if we do. If updatecp is
3063 * true, then update the current state's cp. Always update startpc to the next
3064 * op.
3066 static JS_ALWAYS_INLINE REMatchState *
3067 SimpleMatch(REGlobalData *gData, REMatchState *x, REOp op,
3068 jsbytecode **startpc, JSBool updatecp)
3070 REMatchState *result = NULL;
3071 jschar matchCh;
3072 size_t parenIndex;
3073 size_t offset, length, index;
3074 jsbytecode *pc = *startpc; /* pc has already been incremented past op */
3075 jschar *source;
3076 const jschar *startcp = x->cp;
3077 jschar ch;
3078 RECharSet *charSet;
3080 #ifdef REGEXP_DEBUG
3081 const char *opname = reop_names[op];
3082 re_debug("\n%06d: %*s%s", pc - gData->regexp->program,
3083 gData->stateStackTop * 2, "", opname);
3084 #endif
3085 switch (op) {
3086 case REOP_EMPTY:
3087 result = x;
3088 break;
3089 case REOP_BOL:
3090 if (x->cp != gData->cpbegin) {
3091 if (!gData->cx->regExpStatics.multiline &&
3092 !(gData->regexp->flags & JSREG_MULTILINE)) {
3093 break;
3095 if (!RE_IS_LINE_TERM(x->cp[-1]))
3096 break;
3098 result = x;
3099 break;
3100 case REOP_EOL:
3101 if (x->cp != gData->cpend) {
3102 if (!gData->cx->regExpStatics.multiline &&
3103 !(gData->regexp->flags & JSREG_MULTILINE)) {
3104 break;
3106 if (!RE_IS_LINE_TERM(*x->cp))
3107 break;
3109 result = x;
3110 break;
3111 case REOP_WBDRY:
3112 if ((x->cp == gData->cpbegin || !JS_ISWORD(x->cp[-1])) ^
3113 !(x->cp != gData->cpend && JS_ISWORD(*x->cp))) {
3114 result = x;
3116 break;
3117 case REOP_WNONBDRY:
3118 if ((x->cp == gData->cpbegin || !JS_ISWORD(x->cp[-1])) ^
3119 (x->cp != gData->cpend && JS_ISWORD(*x->cp))) {
3120 result = x;
3122 break;
3123 case REOP_DOT:
3124 if (x->cp != gData->cpend && !RE_IS_LINE_TERM(*x->cp)) {
3125 result = x;
3126 result->cp++;
3128 break;
3129 case REOP_DIGIT:
3130 if (x->cp != gData->cpend && JS7_ISDEC(*x->cp)) {
3131 result = x;
3132 result->cp++;
3134 break;
3135 case REOP_NONDIGIT:
3136 if (x->cp != gData->cpend && !JS7_ISDEC(*x->cp)) {
3137 result = x;
3138 result->cp++;
3140 break;
3141 case REOP_ALNUM:
3142 if (x->cp != gData->cpend && JS_ISWORD(*x->cp)) {
3143 result = x;
3144 result->cp++;
3146 break;
3147 case REOP_NONALNUM:
3148 if (x->cp != gData->cpend && !JS_ISWORD(*x->cp)) {
3149 result = x;
3150 result->cp++;
3152 break;
3153 case REOP_SPACE:
3154 if (x->cp != gData->cpend && JS_ISSPACE(*x->cp)) {
3155 result = x;
3156 result->cp++;
3158 break;
3159 case REOP_NONSPACE:
3160 if (x->cp != gData->cpend && !JS_ISSPACE(*x->cp)) {
3161 result = x;
3162 result->cp++;
3164 break;
3165 case REOP_BACKREF:
3166 pc = ReadCompactIndex(pc, &parenIndex);
3167 JS_ASSERT(parenIndex < gData->regexp->parenCount);
3168 result = BackrefMatcher(gData, x, parenIndex);
3169 break;
3170 case REOP_FLAT:
3171 pc = ReadCompactIndex(pc, &offset);
3172 JS_ASSERT(offset < JSSTRING_LENGTH(gData->regexp->source));
3173 pc = ReadCompactIndex(pc, &length);
3174 JS_ASSERT(1 <= length);
3175 JS_ASSERT(length <= JSSTRING_LENGTH(gData->regexp->source) - offset);
3176 if (length <= (size_t)(gData->cpend - x->cp)) {
3177 source = JSSTRING_CHARS(gData->regexp->source) + offset;
3178 re_debug_chars(source, length);
3179 for (index = 0; index != length; index++) {
3180 if (source[index] != x->cp[index])
3181 return NULL;
3183 x->cp += length;
3184 result = x;
3186 break;
3187 case REOP_FLAT1:
3188 matchCh = *pc++;
3189 re_debug(" '%c' == '%c'", (char)matchCh, (char)*x->cp);
3190 if (x->cp != gData->cpend && *x->cp == matchCh) {
3191 result = x;
3192 result->cp++;
3194 break;
3195 case REOP_FLATi:
3196 pc = ReadCompactIndex(pc, &offset);
3197 JS_ASSERT(offset < JSSTRING_LENGTH(gData->regexp->source));
3198 pc = ReadCompactIndex(pc, &length);
3199 JS_ASSERT(1 <= length);
3200 JS_ASSERT(length <= JSSTRING_LENGTH(gData->regexp->source) - offset);
3201 source = JSSTRING_CHARS(gData->regexp->source);
3202 result = FlatNIMatcher(gData, x, source + offset, length);
3203 break;
3204 case REOP_FLAT1i:
3205 matchCh = *pc++;
3206 if (x->cp != gData->cpend && upcase(*x->cp) == upcase(matchCh)) {
3207 result = x;
3208 result->cp++;
3210 break;
3211 case REOP_UCFLAT1:
3212 matchCh = GET_ARG(pc);
3213 re_debug(" '%c' == '%c'", (char)matchCh, (char)*x->cp);
3214 pc += ARG_LEN;
3215 if (x->cp != gData->cpend && *x->cp == matchCh) {
3216 result = x;
3217 result->cp++;
3219 break;
3220 case REOP_UCFLAT1i:
3221 matchCh = GET_ARG(pc);
3222 pc += ARG_LEN;
3223 if (x->cp != gData->cpend && upcase(*x->cp) == upcase(matchCh)) {
3224 result = x;
3225 result->cp++;
3227 break;
3228 case REOP_CLASS:
3229 pc = ReadCompactIndex(pc, &index);
3230 JS_ASSERT(index < gData->regexp->classCount);
3231 if (x->cp != gData->cpend) {
3232 charSet = &gData->regexp->classList[index];
3233 JS_ASSERT(charSet->converted);
3234 ch = *x->cp;
3235 index = ch >> 3;
3236 if (charSet->length != 0 &&
3237 ch <= charSet->length &&
3238 (charSet->u.bits[index] & (1 << (ch & 0x7)))) {
3239 result = x;
3240 result->cp++;
3243 break;
3244 case REOP_NCLASS:
3245 pc = ReadCompactIndex(pc, &index);
3246 JS_ASSERT(index < gData->regexp->classCount);
3247 if (x->cp != gData->cpend) {
3248 charSet = &gData->regexp->classList[index];
3249 JS_ASSERT(charSet->converted);
3250 ch = *x->cp;
3251 index = ch >> 3;
3252 if (charSet->length == 0 ||
3253 ch > charSet->length ||
3254 !(charSet->u.bits[index] & (1 << (ch & 0x7)))) {
3255 result = x;
3256 result->cp++;
3259 break;
3261 default:
3262 JS_ASSERT(JS_FALSE);
3264 if (result) {
3265 if (!updatecp)
3266 x->cp = startcp;
3267 *startpc = pc;
3268 re_debug(" * ");
3269 return result;
3271 x->cp = startcp;
3272 return NULL;
3275 static JS_ALWAYS_INLINE REMatchState *
3276 ExecuteREBytecode(REGlobalData *gData, REMatchState *x)
3278 REMatchState *result = NULL;
3279 REBackTrackData *backTrackData;
3280 jsbytecode *nextpc, *testpc;
3281 REOp nextop;
3282 RECapture *cap;
3283 REProgState *curState;
3284 const jschar *startcp;
3285 size_t parenIndex, k;
3286 size_t parenSoFar = 0;
3288 jschar matchCh1, matchCh2;
3289 RECharSet *charSet;
3291 JSBool anchor;
3292 jsbytecode *pc = gData->regexp->program;
3293 REOp op = (REOp) *pc++;
3296 * If the first node is a simple match, step the index into the string
3297 * until that match is made, or fail if it can't be found at all.
3299 if (REOP_IS_SIMPLE(op) && !(gData->regexp->flags & JSREG_STICKY)) {
3300 anchor = JS_FALSE;
3301 while (x->cp <= gData->cpend) {
3302 nextpc = pc; /* reset back to start each time */
3303 result = SimpleMatch(gData, x, op, &nextpc, JS_TRUE);
3304 if (result) {
3305 anchor = JS_TRUE;
3306 x = result;
3307 pc = nextpc; /* accept skip to next opcode */
3308 op = (REOp) *pc++;
3309 JS_ASSERT(op < REOP_LIMIT);
3310 break;
3312 gData->skipped++;
3313 x->cp++;
3315 if (!anchor)
3316 goto bad;
3319 for (;;) {
3320 #ifdef REGEXP_DEBUG
3321 const char *opname = reop_names[op];
3322 re_debug("\n%06d: %*s%s", pc - gData->regexp->program,
3323 gData->stateStackTop * 2, "", opname);
3324 #endif
3325 if (REOP_IS_SIMPLE(op)) {
3326 result = SimpleMatch(gData, x, op, &pc, JS_TRUE);
3327 } else {
3328 curState = &gData->stateStack[gData->stateStackTop];
3329 switch (op) {
3330 case REOP_END:
3331 goto good;
3332 case REOP_ALTPREREQ2:
3333 nextpc = pc + GET_OFFSET(pc); /* start of next op */
3334 pc += ARG_LEN;
3335 matchCh2 = GET_ARG(pc);
3336 pc += ARG_LEN;
3337 k = GET_ARG(pc);
3338 pc += ARG_LEN;
3340 if (x->cp != gData->cpend) {
3341 if (*x->cp == matchCh2)
3342 goto doAlt;
3344 charSet = &gData->regexp->classList[k];
3345 if (!charSet->converted && !MatcherProcessCharSet(gData, charSet))
3346 goto bad;
3347 matchCh1 = *x->cp;
3348 k = matchCh1 >> 3;
3349 if ((charSet->length == 0 ||
3350 matchCh1 > charSet->length ||
3351 !(charSet->u.bits[k] & (1 << (matchCh1 & 0x7)))) ^
3352 charSet->sense) {
3353 goto doAlt;
3356 result = NULL;
3357 break;
3359 case REOP_ALTPREREQ:
3360 nextpc = pc + GET_OFFSET(pc); /* start of next op */
3361 pc += ARG_LEN;
3362 matchCh1 = GET_ARG(pc);
3363 pc += ARG_LEN;
3364 matchCh2 = GET_ARG(pc);
3365 pc += ARG_LEN;
3366 if (x->cp == gData->cpend ||
3367 (*x->cp != matchCh1 && *x->cp != matchCh2)) {
3368 result = NULL;
3369 break;
3371 /* else false thru... */
3373 case REOP_ALT:
3374 doAlt:
3375 nextpc = pc + GET_OFFSET(pc); /* start of next alternate */
3376 pc += ARG_LEN; /* start of this alternate */
3377 curState->parenSoFar = parenSoFar;
3378 PUSH_STATE_STACK(gData);
3379 op = (REOp) *pc++;
3380 startcp = x->cp;
3381 if (REOP_IS_SIMPLE(op)) {
3382 if (!SimpleMatch(gData, x, op, &pc, JS_TRUE)) {
3383 op = (REOp) *nextpc++;
3384 pc = nextpc;
3385 continue;
3387 result = x;
3388 op = (REOp) *pc++;
3390 nextop = (REOp) *nextpc++;
3391 if (!PushBackTrackState(gData, nextop, nextpc, x, startcp, 0, 0))
3392 goto bad;
3393 continue;
3396 * Occurs at (successful) end of REOP_ALT,
3398 case REOP_JUMP:
3400 * If we have not gotten a result here, it is because of an
3401 * empty match. Do the same thing REOP_EMPTY would do.
3403 if (!result)
3404 result = x;
3406 --gData->stateStackTop;
3407 pc += GET_OFFSET(pc);
3408 op = (REOp) *pc++;
3409 continue;
3412 * Occurs at last (successful) end of REOP_ALT,
3414 case REOP_ENDALT:
3416 * If we have not gotten a result here, it is because of an
3417 * empty match. Do the same thing REOP_EMPTY would do.
3419 if (!result)
3420 result = x;
3422 --gData->stateStackTop;
3423 op = (REOp) *pc++;
3424 continue;
3426 case REOP_LPAREN:
3427 pc = ReadCompactIndex(pc, &parenIndex);
3428 re_debug("[ %lu ]", (unsigned long) parenIndex);
3429 JS_ASSERT(parenIndex < gData->regexp->parenCount);
3430 if (parenIndex + 1 > parenSoFar)
3431 parenSoFar = parenIndex + 1;
3432 x->parens[parenIndex].index = x->cp - gData->cpbegin;
3433 x->parens[parenIndex].length = 0;
3434 op = (REOp) *pc++;
3435 continue;
3437 case REOP_RPAREN:
3439 ptrdiff_t delta;
3441 pc = ReadCompactIndex(pc, &parenIndex);
3442 JS_ASSERT(parenIndex < gData->regexp->parenCount);
3443 cap = &x->parens[parenIndex];
3444 delta = x->cp - (gData->cpbegin + cap->index);
3445 cap->length = (delta < 0) ? 0 : (size_t) delta;
3446 op = (REOp) *pc++;
3448 if (!result)
3449 result = x;
3450 continue;
3452 case REOP_ASSERT:
3453 nextpc = pc + GET_OFFSET(pc); /* start of term after ASSERT */
3454 pc += ARG_LEN; /* start of ASSERT child */
3455 op = (REOp) *pc++;
3456 testpc = pc;
3457 if (REOP_IS_SIMPLE(op) &&
3458 !SimpleMatch(gData, x, op, &testpc, JS_FALSE)) {
3459 result = NULL;
3460 break;
3462 curState->u.assertion.top =
3463 (char *)gData->backTrackSP - (char *)gData->backTrackStack;
3464 curState->u.assertion.sz = gData->cursz;
3465 curState->index = x->cp - gData->cpbegin;
3466 curState->parenSoFar = parenSoFar;
3467 PUSH_STATE_STACK(gData);
3468 if (!PushBackTrackState(gData, REOP_ASSERTTEST,
3469 nextpc, x, x->cp, 0, 0)) {
3470 goto bad;
3472 continue;
3474 case REOP_ASSERT_NOT:
3475 nextpc = pc + GET_OFFSET(pc);
3476 pc += ARG_LEN;
3477 op = (REOp) *pc++;
3478 testpc = pc;
3479 if (REOP_IS_SIMPLE(op) /* Note - fail to fail! */ &&
3480 SimpleMatch(gData, x, op, &testpc, JS_FALSE) &&
3481 *testpc == REOP_ASSERTNOTTEST) {
3482 result = NULL;
3483 break;
3485 curState->u.assertion.top
3486 = (char *)gData->backTrackSP -
3487 (char *)gData->backTrackStack;
3488 curState->u.assertion.sz = gData->cursz;
3489 curState->index = x->cp - gData->cpbegin;
3490 curState->parenSoFar = parenSoFar;
3491 PUSH_STATE_STACK(gData);
3492 if (!PushBackTrackState(gData, REOP_ASSERTNOTTEST,
3493 nextpc, x, x->cp, 0, 0)) {
3494 goto bad;
3496 continue;
3498 case REOP_ASSERTTEST:
3499 --gData->stateStackTop;
3500 --curState;
3501 x->cp = gData->cpbegin + curState->index;
3502 gData->backTrackSP =
3503 (REBackTrackData *) ((char *)gData->backTrackStack +
3504 curState->u.assertion.top);
3505 gData->cursz = curState->u.assertion.sz;
3506 if (result)
3507 result = x;
3508 break;
3510 case REOP_ASSERTNOTTEST:
3511 --gData->stateStackTop;
3512 --curState;
3513 x->cp = gData->cpbegin + curState->index;
3514 gData->backTrackSP =
3515 (REBackTrackData *) ((char *)gData->backTrackStack +
3516 curState->u.assertion.top);
3517 gData->cursz = curState->u.assertion.sz;
3518 result = (!result) ? x : NULL;
3519 break;
3520 case REOP_STAR:
3521 curState->u.quantifier.min = 0;
3522 curState->u.quantifier.max = (uintN)-1;
3523 goto quantcommon;
3524 case REOP_PLUS:
3525 curState->u.quantifier.min = 1;
3526 curState->u.quantifier.max = (uintN)-1;
3527 goto quantcommon;
3528 case REOP_OPT:
3529 curState->u.quantifier.min = 0;
3530 curState->u.quantifier.max = 1;
3531 goto quantcommon;
3532 case REOP_QUANT:
3533 pc = ReadCompactIndex(pc, &k);
3534 curState->u.quantifier.min = k;
3535 pc = ReadCompactIndex(pc, &k);
3536 /* max is k - 1 to use one byte for (uintN)-1 sentinel. */
3537 curState->u.quantifier.max = k - 1;
3538 JS_ASSERT(curState->u.quantifier.min
3539 <= curState->u.quantifier.max);
3540 quantcommon:
3541 if (curState->u.quantifier.max == 0) {
3542 pc = pc + GET_OFFSET(pc);
3543 op = (REOp) *pc++;
3544 result = x;
3545 continue;
3547 /* Step over <next> */
3548 nextpc = pc + ARG_LEN;
3549 op = (REOp) *nextpc++;
3550 startcp = x->cp;
3551 if (REOP_IS_SIMPLE(op)) {
3552 if (!SimpleMatch(gData, x, op, &nextpc, JS_TRUE)) {
3553 if (curState->u.quantifier.min == 0)
3554 result = x;
3555 else
3556 result = NULL;
3557 pc = pc + GET_OFFSET(pc);
3558 break;
3560 op = (REOp) *nextpc++;
3561 result = x;
3563 curState->index = startcp - gData->cpbegin;
3564 curState->continue_op = REOP_REPEAT;
3565 curState->continue_pc = pc;
3566 curState->parenSoFar = parenSoFar;
3567 PUSH_STATE_STACK(gData);
3568 if (curState->u.quantifier.min == 0 &&
3569 !PushBackTrackState(gData, REOP_REPEAT, pc, x, startcp,
3570 0, 0)) {
3571 goto bad;
3573 pc = nextpc;
3574 continue;
3576 case REOP_ENDCHILD: /* marks the end of a quantifier child */
3577 pc = curState[-1].continue_pc;
3578 op = (REOp) curState[-1].continue_op;
3580 if (!result)
3581 result = x;
3582 continue;
3584 case REOP_REPEAT:
3585 --curState;
3586 do {
3587 --gData->stateStackTop;
3588 if (!result) {
3589 /* Failed, see if we have enough children. */
3590 if (curState->u.quantifier.min == 0)
3591 goto repeatDone;
3592 goto break_switch;
3594 if (curState->u.quantifier.min == 0 &&
3595 x->cp == gData->cpbegin + curState->index) {
3596 /* matched an empty string, that'll get us nowhere */
3597 result = NULL;
3598 goto break_switch;
3600 if (curState->u.quantifier.min != 0)
3601 curState->u.quantifier.min--;
3602 if (curState->u.quantifier.max != (uintN) -1)
3603 curState->u.quantifier.max--;
3604 if (curState->u.quantifier.max == 0)
3605 goto repeatDone;
3606 nextpc = pc + ARG_LEN;
3607 nextop = (REOp) *nextpc;
3608 startcp = x->cp;
3609 if (REOP_IS_SIMPLE(nextop)) {
3610 nextpc++;
3611 if (!SimpleMatch(gData, x, nextop, &nextpc, JS_TRUE)) {
3612 if (curState->u.quantifier.min == 0)
3613 goto repeatDone;
3614 result = NULL;
3615 goto break_switch;
3617 result = x;
3619 curState->index = startcp - gData->cpbegin;
3620 PUSH_STATE_STACK(gData);
3621 if (curState->u.quantifier.min == 0 &&
3622 !PushBackTrackState(gData, REOP_REPEAT,
3623 pc, x, startcp,
3624 curState->parenSoFar,
3625 parenSoFar -
3626 curState->parenSoFar)) {
3627 goto bad;
3629 } while (*nextpc == REOP_ENDCHILD);
3630 pc = nextpc;
3631 op = (REOp) *pc++;
3632 parenSoFar = curState->parenSoFar;
3633 continue;
3635 repeatDone:
3636 result = x;
3637 pc += GET_OFFSET(pc);
3638 goto break_switch;
3640 case REOP_MINIMALSTAR:
3641 curState->u.quantifier.min = 0;
3642 curState->u.quantifier.max = (uintN)-1;
3643 goto minimalquantcommon;
3644 case REOP_MINIMALPLUS:
3645 curState->u.quantifier.min = 1;
3646 curState->u.quantifier.max = (uintN)-1;
3647 goto minimalquantcommon;
3648 case REOP_MINIMALOPT:
3649 curState->u.quantifier.min = 0;
3650 curState->u.quantifier.max = 1;
3651 goto minimalquantcommon;
3652 case REOP_MINIMALQUANT:
3653 pc = ReadCompactIndex(pc, &k);
3654 curState->u.quantifier.min = k;
3655 pc = ReadCompactIndex(pc, &k);
3656 /* See REOP_QUANT comments about k - 1. */
3657 curState->u.quantifier.max = k - 1;
3658 JS_ASSERT(curState->u.quantifier.min
3659 <= curState->u.quantifier.max);
3660 minimalquantcommon:
3661 curState->index = x->cp - gData->cpbegin;
3662 curState->parenSoFar = parenSoFar;
3663 PUSH_STATE_STACK(gData);
3664 if (curState->u.quantifier.min != 0) {
3665 curState->continue_op = REOP_MINIMALREPEAT;
3666 curState->continue_pc = pc;
3667 /* step over <next> */
3668 pc += OFFSET_LEN;
3669 op = (REOp) *pc++;
3670 } else {
3671 if (!PushBackTrackState(gData, REOP_MINIMALREPEAT,
3672 pc, x, x->cp, 0, 0)) {
3673 goto bad;
3675 --gData->stateStackTop;
3676 pc = pc + GET_OFFSET(pc);
3677 op = (REOp) *pc++;
3679 continue;
3681 case REOP_MINIMALREPEAT:
3682 --gData->stateStackTop;
3683 --curState;
3685 re_debug("{%d,%d}", curState->u.quantifier.min,
3686 curState->u.quantifier.max);
3687 #define PREPARE_REPEAT() \
3688 JS_BEGIN_MACRO \
3689 curState->index = x->cp - gData->cpbegin; \
3690 curState->continue_op = REOP_MINIMALREPEAT; \
3691 curState->continue_pc = pc; \
3692 pc += ARG_LEN; \
3693 for (k = curState->parenSoFar; k < parenSoFar; k++) \
3694 x->parens[k].index = -1; \
3695 PUSH_STATE_STACK(gData); \
3696 op = (REOp) *pc++; \
3697 JS_ASSERT(op < REOP_LIMIT); \
3698 JS_END_MACRO
3700 if (!result) {
3701 re_debug(" - ");
3703 * Non-greedy failure - try to consume another child.
3705 if (curState->u.quantifier.max == (uintN) -1 ||
3706 curState->u.quantifier.max > 0) {
3707 PREPARE_REPEAT();
3708 continue;
3710 /* Don't need to adjust pc since we're going to pop. */
3711 break;
3713 if (curState->u.quantifier.min == 0 &&
3714 x->cp == gData->cpbegin + curState->index) {
3715 /* Matched an empty string, that'll get us nowhere. */
3716 result = NULL;
3717 break;
3719 if (curState->u.quantifier.min != 0)
3720 curState->u.quantifier.min--;
3721 if (curState->u.quantifier.max != (uintN) -1)
3722 curState->u.quantifier.max--;
3723 if (curState->u.quantifier.min != 0) {
3724 PREPARE_REPEAT();
3725 continue;
3727 curState->index = x->cp - gData->cpbegin;
3728 curState->parenSoFar = parenSoFar;
3729 PUSH_STATE_STACK(gData);
3730 if (!PushBackTrackState(gData, REOP_MINIMALREPEAT,
3731 pc, x, x->cp,
3732 curState->parenSoFar,
3733 parenSoFar - curState->parenSoFar)) {
3734 goto bad;
3736 --gData->stateStackTop;
3737 pc = pc + GET_OFFSET(pc);
3738 op = (REOp) *pc++;
3739 JS_ASSERT(op < REOP_LIMIT);
3740 continue;
3741 default:
3742 JS_ASSERT(JS_FALSE);
3743 result = NULL;
3745 break_switch:;
3749 * If the match failed and there's a backtrack option, take it.
3750 * Otherwise this is a complete and utter failure.
3752 if (!result) {
3753 if (gData->cursz == 0)
3754 return NULL;
3755 if (!JS_CHECK_OPERATION_LIMIT(gData->cx, JSOW_JUMP)) {
3756 gData->ok = JS_FALSE;
3757 return NULL;
3760 /* Potentially detect explosive regex here. */
3761 gData->backTrackCount++;
3762 if (gData->backTrackLimit &&
3763 gData->backTrackCount >= gData->backTrackLimit) {
3764 JS_ReportErrorNumber(gData->cx, js_GetErrorMessage, NULL,
3765 JSMSG_REGEXP_TOO_COMPLEX);
3766 gData->ok = JS_FALSE;
3767 return NULL;
3770 backTrackData = gData->backTrackSP;
3771 gData->cursz = backTrackData->sz;
3772 gData->backTrackSP =
3773 (REBackTrackData *) ((char *)backTrackData - backTrackData->sz);
3774 x->cp = backTrackData->cp;
3775 pc = backTrackData->backtrack_pc;
3776 op = (REOp) backTrackData->backtrack_op;
3777 JS_ASSERT(op < REOP_LIMIT);
3778 gData->stateStackTop = backTrackData->saveStateStackTop;
3779 JS_ASSERT(gData->stateStackTop);
3781 memcpy(gData->stateStack, backTrackData + 1,
3782 sizeof(REProgState) * backTrackData->saveStateStackTop);
3783 curState = &gData->stateStack[gData->stateStackTop - 1];
3785 if (backTrackData->parenCount) {
3786 memcpy(&x->parens[backTrackData->parenIndex],
3787 (char *)(backTrackData + 1) +
3788 sizeof(REProgState) * backTrackData->saveStateStackTop,
3789 sizeof(RECapture) * backTrackData->parenCount);
3790 parenSoFar = backTrackData->parenIndex + backTrackData->parenCount;
3791 } else {
3792 for (k = curState->parenSoFar; k < parenSoFar; k++)
3793 x->parens[k].index = -1;
3794 parenSoFar = curState->parenSoFar;
3797 re_debug("\tBT_Pop: %ld,%ld",
3798 (unsigned long) backTrackData->parenIndex,
3799 (unsigned long) backTrackData->parenCount);
3800 continue;
3802 x = result;
3805 * Continue with the expression.
3807 op = (REOp)*pc++;
3808 JS_ASSERT(op < REOP_LIMIT);
3811 bad:
3812 re_debug("\n");
3813 return NULL;
3815 good:
3816 re_debug("\n");
3817 return x;
3820 static REMatchState *
3821 MatchRegExp(REGlobalData *gData, REMatchState *x)
3823 REMatchState *result;
3824 const jschar *cp = x->cp;
3825 const jschar *cp2;
3826 uintN j;
3827 #ifdef JS_TRACER
3828 NativeRegExp native;
3830 /* Run with native regexp if possible. */
3831 if (TRACING_ENABLED(gData->cx) &&
3832 (native = GetNativeRegExp(gData->cx, gData->regexp))) {
3833 gData->skipped = (ptrdiff_t) x->cp;
3835 #ifdef JS_JIT_SPEW
3836 debug_only_v({
3837 VOUCH_DOES_NOT_REQUIRE_STACK();
3838 JSStackFrame *caller = (JS_ON_TRACE(gData->cx))
3839 ? NULL
3840 : js_GetScriptedCaller(gData->cx, NULL);
3841 printf("entering REGEXP trace at %s:%u@%u, code: %p\n",
3842 caller ? caller->script->filename : "<unknown>",
3843 caller ? js_FramePCToLineNumber(gData->cx, caller) : 0,
3844 caller ? FramePCOffset(caller) : 0,
3845 (void *) native);
3847 #endif
3849 #if defined(JS_NO_FASTCALL) && defined(NANOJIT_IA32)
3850 SIMULATE_FASTCALL(result, x, gData, native);
3851 #else
3852 result = native(x, gData);
3853 #endif
3855 debug_only_v(printf("leaving REGEXP trace\n"));
3857 gData->skipped = ((const jschar *) gData->skipped) - cp;
3858 return result;
3860 #endif
3862 * Have to include the position beyond the last character
3863 * in order to detect end-of-input/line condition.
3865 for (cp2 = cp; cp2 <= gData->cpend; cp2++) {
3866 gData->skipped = cp2 - cp;
3867 x->cp = cp2;
3868 for (j = 0; j < gData->regexp->parenCount; j++)
3869 x->parens[j].index = -1;
3870 result = ExecuteREBytecode(gData, x);
3871 if (!gData->ok || result || (gData->regexp->flags & JSREG_STICKY))
3872 return result;
3873 gData->backTrackSP = gData->backTrackStack;
3874 gData->cursz = 0;
3875 gData->stateStackTop = 0;
3876 cp2 = cp + gData->skipped;
3878 return NULL;
3881 #define MIN_BACKTRACK_LIMIT 400000
3883 static REMatchState *
3884 InitMatch(JSContext *cx, REGlobalData *gData, JSRegExp *re, size_t length)
3886 REMatchState *result;
3887 uintN i;
3889 gData->backTrackStackSize = INITIAL_BACKTRACK;
3890 JS_ARENA_ALLOCATE_CAST(gData->backTrackStack, REBackTrackData *,
3891 &cx->regexpPool,
3892 INITIAL_BACKTRACK);
3893 if (!gData->backTrackStack)
3894 goto bad;
3896 gData->backTrackSP = gData->backTrackStack;
3897 gData->cursz = 0;
3898 gData->backTrackCount = 0;
3899 gData->backTrackLimit = 0;
3900 if (JS_GetOptions(cx) & JSOPTION_RELIMIT) {
3901 gData->backTrackLimit = length * length * length; /* O(n^3) */
3902 if (gData->backTrackLimit < MIN_BACKTRACK_LIMIT)
3903 gData->backTrackLimit = MIN_BACKTRACK_LIMIT;
3906 gData->stateStackLimit = INITIAL_STATESTACK;
3907 JS_ARENA_ALLOCATE_CAST(gData->stateStack, REProgState *,
3908 &cx->regexpPool,
3909 sizeof(REProgState) * INITIAL_STATESTACK);
3910 if (!gData->stateStack)
3911 goto bad;
3913 gData->stateStackTop = 0;
3914 gData->cx = cx;
3915 gData->regexp = re;
3916 gData->ok = JS_TRUE;
3918 JS_ARENA_ALLOCATE_CAST(result, REMatchState *,
3919 &cx->regexpPool,
3920 offsetof(REMatchState, parens)
3921 + re->parenCount * sizeof(RECapture));
3922 if (!result)
3923 goto bad;
3925 for (i = 0; i < re->classCount; i++) {
3926 if (!re->classList[i].converted &&
3927 !MatcherProcessCharSet(gData, &re->classList[i])) {
3928 return NULL;
3932 return result;
3934 bad:
3935 js_ReportOutOfScriptQuota(cx);
3936 gData->ok = JS_FALSE;
3937 return NULL;
3940 JSBool
3941 js_ExecuteRegExp(JSContext *cx, JSRegExp *re, JSString *str, size_t *indexp,
3942 JSBool test, jsval *rval)
3944 REGlobalData gData;
3945 REMatchState *x, *result;
3947 const jschar *cp, *ep;
3948 size_t i, length, start;
3949 JSSubString *morepar;
3950 JSBool ok;
3951 JSRegExpStatics *res;
3952 ptrdiff_t matchlen;
3953 uintN num, morenum;
3954 JSString *parstr, *matchstr;
3955 JSObject *obj;
3957 RECapture *parsub = NULL;
3958 void *mark;
3959 int64 *timestamp;
3962 * It's safe to load from cp because JSStrings have a zero at the end,
3963 * and we never let cp get beyond cpend.
3965 start = *indexp;
3966 JSSTRING_CHARS_AND_LENGTH(str, cp, length);
3967 if (start > length)
3968 start = length;
3969 gData.cpbegin = cp;
3970 gData.cpend = cp + length;
3971 cp += start;
3972 gData.start = start;
3973 gData.skipped = 0;
3975 if (!cx->regexpPool.first.next) {
3977 * The first arena in the regexpPool must have a timestamp at its base.
3979 JS_ARENA_ALLOCATE_CAST(timestamp, int64 *,
3980 &cx->regexpPool, sizeof *timestamp);
3981 if (!timestamp)
3982 return JS_FALSE;
3983 *timestamp = JS_Now();
3985 mark = JS_ARENA_MARK(&cx->regexpPool);
3987 x = InitMatch(cx, &gData, re, length);
3989 if (!x) {
3990 ok = JS_FALSE;
3991 goto out;
3993 x->cp = cp;
3996 * Call the recursive matcher to do the real work. Return null on mismatch
3997 * whether testing or not. On match, return an extended Array object.
3999 result = MatchRegExp(&gData, x);
4000 ok = gData.ok;
4001 if (!ok)
4002 goto out;
4003 if (!result) {
4004 *rval = JSVAL_NULL;
4005 goto out;
4007 cp = result->cp;
4008 i = cp - gData.cpbegin;
4009 *indexp = i;
4010 matchlen = i - (start + gData.skipped);
4011 JS_ASSERT(matchlen >= 0);
4012 ep = cp;
4013 cp -= matchlen;
4015 if (test) {
4017 * Testing for a match and updating cx->regExpStatics: don't allocate
4018 * an array object, do return true.
4020 *rval = JSVAL_TRUE;
4022 /* Avoid warning. (gcc doesn't detect that obj is needed iff !test); */
4023 obj = NULL;
4024 } else {
4026 * The array returned on match has element 0 bound to the matched
4027 * string, elements 1 through state.parenCount bound to the paren
4028 * matches, an index property telling the length of the left context,
4029 * and an input property referring to the input string.
4031 obj = js_NewSlowArrayObject(cx);
4032 if (!obj) {
4033 ok = JS_FALSE;
4034 goto out;
4036 *rval = OBJECT_TO_JSVAL(obj);
4038 #define DEFVAL(val, id) { \
4039 ok = js_DefineProperty(cx, obj, id, val, \
4040 JS_PropertyStub, JS_PropertyStub, \
4041 JSPROP_ENUMERATE, NULL); \
4042 if (!ok) { \
4043 cx->weakRoots.newborn[GCX_OBJECT] = NULL; \
4044 cx->weakRoots.newborn[GCX_STRING] = NULL; \
4045 goto out; \
4049 matchstr = js_NewDependentString(cx, str, cp - JSSTRING_CHARS(str),
4050 matchlen);
4051 if (!matchstr) {
4052 cx->weakRoots.newborn[GCX_OBJECT] = NULL;
4053 ok = JS_FALSE;
4054 goto out;
4056 DEFVAL(STRING_TO_JSVAL(matchstr), INT_TO_JSID(0));
4059 res = &cx->regExpStatics;
4060 res->input = str;
4061 res->parenCount = re->parenCount;
4062 if (re->parenCount == 0) {
4063 res->lastParen = js_EmptySubString;
4064 } else {
4065 for (num = 0; num < re->parenCount; num++) {
4066 parsub = &result->parens[num];
4067 if (num < 9) {
4068 if (parsub->index == -1) {
4069 res->parens[num].chars = NULL;
4070 res->parens[num].length = 0;
4071 } else {
4072 res->parens[num].chars = gData.cpbegin + parsub->index;
4073 res->parens[num].length = parsub->length;
4075 } else {
4076 morenum = num - 9;
4077 morepar = res->moreParens;
4078 if (!morepar) {
4079 res->moreLength = 10;
4080 morepar = (JSSubString*)
4081 JS_malloc(cx, 10 * sizeof(JSSubString));
4082 } else if (morenum >= res->moreLength) {
4083 res->moreLength += 10;
4084 morepar = (JSSubString*)
4085 JS_realloc(cx, morepar,
4086 res->moreLength * sizeof(JSSubString));
4088 if (!morepar) {
4089 cx->weakRoots.newborn[GCX_OBJECT] = NULL;
4090 cx->weakRoots.newborn[GCX_STRING] = NULL;
4091 ok = JS_FALSE;
4092 goto out;
4094 res->moreParens = morepar;
4095 if (parsub->index == -1) {
4096 morepar[morenum].chars = NULL;
4097 morepar[morenum].length = 0;
4098 } else {
4099 morepar[morenum].chars = gData.cpbegin + parsub->index;
4100 morepar[morenum].length = parsub->length;
4103 if (test)
4104 continue;
4105 if (parsub->index == -1) {
4106 ok = js_DefineProperty(cx, obj, INT_TO_JSID(num + 1),
4107 JSVAL_VOID, NULL, NULL,
4108 JSPROP_ENUMERATE, NULL);
4109 } else {
4110 parstr = js_NewDependentString(cx, str,
4111 gData.cpbegin + parsub->index -
4112 JSSTRING_CHARS(str),
4113 parsub->length);
4114 if (!parstr) {
4115 cx->weakRoots.newborn[GCX_OBJECT] = NULL;
4116 cx->weakRoots.newborn[GCX_STRING] = NULL;
4117 ok = JS_FALSE;
4118 goto out;
4120 ok = js_DefineProperty(cx, obj, INT_TO_JSID(num + 1),
4121 STRING_TO_JSVAL(parstr), NULL, NULL,
4122 JSPROP_ENUMERATE, NULL);
4124 if (!ok) {
4125 cx->weakRoots.newborn[GCX_OBJECT] = NULL;
4126 cx->weakRoots.newborn[GCX_STRING] = NULL;
4127 goto out;
4130 if (parsub->index == -1) {
4131 res->lastParen = js_EmptySubString;
4132 } else {
4133 res->lastParen.chars = gData.cpbegin + parsub->index;
4134 res->lastParen.length = parsub->length;
4138 if (!test) {
4140 * Define the index and input properties last for better for/in loop
4141 * order (so they come after the elements).
4143 DEFVAL(INT_TO_JSVAL(start + gData.skipped),
4144 ATOM_TO_JSID(cx->runtime->atomState.indexAtom));
4145 DEFVAL(STRING_TO_JSVAL(str),
4146 ATOM_TO_JSID(cx->runtime->atomState.inputAtom));
4149 #undef DEFVAL
4151 res->lastMatch.chars = cp;
4152 res->lastMatch.length = matchlen;
4155 * For JS1.3 and ECMAv2, emulate Perl5 exactly:
4157 * js1.3 "hi", "hi there" "hihitherehi therebye"
4159 res->leftContext.chars = JSSTRING_CHARS(str);
4160 res->leftContext.length = start + gData.skipped;
4161 res->rightContext.chars = ep;
4162 res->rightContext.length = gData.cpend - ep;
4164 out:
4165 JS_ARENA_RELEASE(&cx->regexpPool, mark);
4166 return ok;
4169 /************************************************************************/
4171 #define REGEXP_PROP_ATTRS (JSPROP_PERMANENT | JSPROP_SHARED)
4172 #define RO_REGEXP_PROP_ATTRS (REGEXP_PROP_ATTRS | JSPROP_READONLY)
4174 static JSPropertySpec regexp_props[] = {
4175 {"source", REGEXP_SOURCE, RO_REGEXP_PROP_ATTRS,0,0},
4176 {"global", REGEXP_GLOBAL, RO_REGEXP_PROP_ATTRS,0,0},
4177 {"ignoreCase", REGEXP_IGNORE_CASE, RO_REGEXP_PROP_ATTRS,0,0},
4178 {"lastIndex", REGEXP_LAST_INDEX, REGEXP_PROP_ATTRS,0,0},
4179 {"multiline", REGEXP_MULTILINE, RO_REGEXP_PROP_ATTRS,0,0},
4180 {"sticky", REGEXP_STICKY, RO_REGEXP_PROP_ATTRS,0,0},
4181 {0,0,0,0,0}
4184 static JSBool
4185 regexp_getProperty(JSContext *cx, JSObject *obj, jsval id, jsval *vp)
4187 jsint slot;
4188 JSRegExp *re;
4190 if (!JSVAL_IS_INT(id))
4191 return JS_TRUE;
4192 while (OBJ_GET_CLASS(cx, obj) != &js_RegExpClass) {
4193 obj = OBJ_GET_PROTO(cx, obj);
4194 if (!obj)
4195 return JS_TRUE;
4197 slot = JSVAL_TO_INT(id);
4198 if (slot == REGEXP_LAST_INDEX)
4199 return JS_GetReservedSlot(cx, obj, 0, vp);
4201 JS_LOCK_OBJ(cx, obj);
4202 re = (JSRegExp *) JS_GetPrivate(cx, obj);
4203 if (re) {
4204 switch (slot) {
4205 case REGEXP_SOURCE:
4206 *vp = STRING_TO_JSVAL(re->source);
4207 break;
4208 case REGEXP_GLOBAL:
4209 *vp = BOOLEAN_TO_JSVAL((re->flags & JSREG_GLOB) != 0);
4210 break;
4211 case REGEXP_IGNORE_CASE:
4212 *vp = BOOLEAN_TO_JSVAL((re->flags & JSREG_FOLD) != 0);
4213 break;
4214 case REGEXP_MULTILINE:
4215 *vp = BOOLEAN_TO_JSVAL((re->flags & JSREG_MULTILINE) != 0);
4216 break;
4217 case REGEXP_STICKY:
4218 *vp = BOOLEAN_TO_JSVAL((re->flags & JSREG_STICKY) != 0);
4219 break;
4222 JS_UNLOCK_OBJ(cx, obj);
4223 return JS_TRUE;
4226 static JSBool
4227 regexp_setProperty(JSContext *cx, JSObject *obj, jsval id, jsval *vp)
4229 JSBool ok;
4230 jsint slot;
4231 jsdouble lastIndex;
4233 ok = JS_TRUE;
4234 if (!JSVAL_IS_INT(id))
4235 return ok;
4236 while (OBJ_GET_CLASS(cx, obj) != &js_RegExpClass) {
4237 obj = OBJ_GET_PROTO(cx, obj);
4238 if (!obj)
4239 return JS_TRUE;
4241 slot = JSVAL_TO_INT(id);
4242 if (slot == REGEXP_LAST_INDEX) {
4243 if (!JS_ValueToNumber(cx, *vp, &lastIndex))
4244 return JS_FALSE;
4245 lastIndex = js_DoubleToInteger(lastIndex);
4246 ok = JS_NewNumberValue(cx, lastIndex, vp) &&
4247 JS_SetReservedSlot(cx, obj, 0, *vp);
4249 return ok;
4253 * RegExp class static properties and their Perl counterparts:
4255 * RegExp.input $_
4256 * RegExp.multiline $*
4257 * RegExp.lastMatch $&
4258 * RegExp.lastParen $+
4259 * RegExp.leftContext $`
4260 * RegExp.rightContext $'
4262 enum regexp_static_tinyid {
4263 REGEXP_STATIC_INPUT = -1,
4264 REGEXP_STATIC_MULTILINE = -2,
4265 REGEXP_STATIC_LAST_MATCH = -3,
4266 REGEXP_STATIC_LAST_PAREN = -4,
4267 REGEXP_STATIC_LEFT_CONTEXT = -5,
4268 REGEXP_STATIC_RIGHT_CONTEXT = -6
4271 void
4272 js_InitRegExpStatics(JSContext *cx)
4275 * To avoid multiple allocations in InitMatch(), the arena size parameter
4276 * should be at least as big as:
4277 * INITIAL_BACKTRACK
4278 * + (sizeof(REProgState) * INITIAL_STATESTACK)
4279 * + (offsetof(REMatchState, parens) + avgParanSize * sizeof(RECapture))
4281 JS_INIT_ARENA_POOL(&cx->regexpPool, "regexp",
4282 12 * 1024 - 40, /* FIXME: bug 421435 */
4283 sizeof(void *), &cx->scriptStackQuota);
4285 JS_ClearRegExpStatics(cx);
4288 void
4289 js_TraceRegExpStatics(JSTracer *trc, JSContext *acx)
4291 JSRegExpStatics *res = &acx->regExpStatics;
4293 if (res->input)
4294 JS_CALL_STRING_TRACER(trc, res->input, "res->input");
4297 void
4298 js_FreeRegExpStatics(JSContext *cx)
4300 JSRegExpStatics *res = &cx->regExpStatics;
4302 if (res->moreParens) {
4303 JS_free(cx, res->moreParens);
4304 res->moreParens = NULL;
4306 JS_FinishArenaPool(&cx->regexpPool);
4309 static JSBool
4310 regexp_static_getProperty(JSContext *cx, JSObject *obj, jsval id, jsval *vp)
4312 jsint slot;
4313 JSRegExpStatics *res;
4314 JSString *str;
4315 JSSubString *sub;
4317 res = &cx->regExpStatics;
4318 if (!JSVAL_IS_INT(id))
4319 return JS_TRUE;
4320 slot = JSVAL_TO_INT(id);
4321 switch (slot) {
4322 case REGEXP_STATIC_INPUT:
4323 *vp = res->input ? STRING_TO_JSVAL(res->input)
4324 : JS_GetEmptyStringValue(cx);
4325 return JS_TRUE;
4326 case REGEXP_STATIC_MULTILINE:
4327 *vp = BOOLEAN_TO_JSVAL(res->multiline);
4328 return JS_TRUE;
4329 case REGEXP_STATIC_LAST_MATCH:
4330 sub = &res->lastMatch;
4331 break;
4332 case REGEXP_STATIC_LAST_PAREN:
4333 sub = &res->lastParen;
4334 break;
4335 case REGEXP_STATIC_LEFT_CONTEXT:
4336 sub = &res->leftContext;
4337 break;
4338 case REGEXP_STATIC_RIGHT_CONTEXT:
4339 sub = &res->rightContext;
4340 break;
4341 default:
4342 sub = REGEXP_PAREN_SUBSTRING(res, slot);
4343 break;
4345 str = js_NewStringCopyN(cx, sub->chars, sub->length);
4346 if (!str)
4347 return JS_FALSE;
4348 *vp = STRING_TO_JSVAL(str);
4349 return JS_TRUE;
4352 static JSBool
4353 regexp_static_setProperty(JSContext *cx, JSObject *obj, jsval id, jsval *vp)
4355 JSRegExpStatics *res;
4357 if (!JSVAL_IS_INT(id))
4358 return JS_TRUE;
4359 res = &cx->regExpStatics;
4360 /* XXX use if-else rather than switch to keep MSVC1.52 from crashing */
4361 if (JSVAL_TO_INT(id) == REGEXP_STATIC_INPUT) {
4362 if (!JSVAL_IS_STRING(*vp) &&
4363 !JS_ConvertValue(cx, *vp, JSTYPE_STRING, vp)) {
4364 return JS_FALSE;
4366 res->input = JSVAL_TO_STRING(*vp);
4367 } else if (JSVAL_TO_INT(id) == REGEXP_STATIC_MULTILINE) {
4368 if (!JSVAL_IS_BOOLEAN(*vp) &&
4369 !JS_ConvertValue(cx, *vp, JSTYPE_BOOLEAN, vp)) {
4370 return JS_FALSE;
4372 res->multiline = JSVAL_TO_BOOLEAN(*vp);
4374 return JS_TRUE;
4376 #define REGEXP_STATIC_PROP_ATTRS (REGEXP_PROP_ATTRS | JSPROP_ENUMERATE)
4377 #define RO_REGEXP_STATIC_PROP_ATTRS (REGEXP_STATIC_PROP_ATTRS | JSPROP_READONLY)
4379 static JSPropertySpec regexp_static_props[] = {
4380 {"input",
4381 REGEXP_STATIC_INPUT,
4382 REGEXP_STATIC_PROP_ATTRS,
4383 regexp_static_getProperty, regexp_static_setProperty},
4384 {"multiline",
4385 REGEXP_STATIC_MULTILINE,
4386 REGEXP_STATIC_PROP_ATTRS,
4387 regexp_static_getProperty, regexp_static_setProperty},
4388 {"lastMatch",
4389 REGEXP_STATIC_LAST_MATCH,
4390 RO_REGEXP_STATIC_PROP_ATTRS,
4391 regexp_static_getProperty, regexp_static_getProperty},
4392 {"lastParen",
4393 REGEXP_STATIC_LAST_PAREN,
4394 RO_REGEXP_STATIC_PROP_ATTRS,
4395 regexp_static_getProperty, regexp_static_getProperty},
4396 {"leftContext",
4397 REGEXP_STATIC_LEFT_CONTEXT,
4398 RO_REGEXP_STATIC_PROP_ATTRS,
4399 regexp_static_getProperty, regexp_static_getProperty},
4400 {"rightContext",
4401 REGEXP_STATIC_RIGHT_CONTEXT,
4402 RO_REGEXP_STATIC_PROP_ATTRS,
4403 regexp_static_getProperty, regexp_static_getProperty},
4405 /* XXX should have block scope and local $1, etc. */
4406 {"$1", 0, RO_REGEXP_STATIC_PROP_ATTRS,
4407 regexp_static_getProperty, regexp_static_getProperty},
4408 {"$2", 1, RO_REGEXP_STATIC_PROP_ATTRS,
4409 regexp_static_getProperty, regexp_static_getProperty},
4410 {"$3", 2, RO_REGEXP_STATIC_PROP_ATTRS,
4411 regexp_static_getProperty, regexp_static_getProperty},
4412 {"$4", 3, RO_REGEXP_STATIC_PROP_ATTRS,
4413 regexp_static_getProperty, regexp_static_getProperty},
4414 {"$5", 4, RO_REGEXP_STATIC_PROP_ATTRS,
4415 regexp_static_getProperty, regexp_static_getProperty},
4416 {"$6", 5, RO_REGEXP_STATIC_PROP_ATTRS,
4417 regexp_static_getProperty, regexp_static_getProperty},
4418 {"$7", 6, RO_REGEXP_STATIC_PROP_ATTRS,
4419 regexp_static_getProperty, regexp_static_getProperty},
4420 {"$8", 7, RO_REGEXP_STATIC_PROP_ATTRS,
4421 regexp_static_getProperty, regexp_static_getProperty},
4422 {"$9", 8, RO_REGEXP_STATIC_PROP_ATTRS,
4423 regexp_static_getProperty, regexp_static_getProperty},
4425 {0,0,0,0,0}
4428 static void
4429 regexp_finalize(JSContext *cx, JSObject *obj)
4431 JSRegExp *re;
4433 re = (JSRegExp *) JS_GetPrivate(cx, obj);
4434 if (!re)
4435 return;
4436 js_DestroyRegExp(cx, re);
4439 /* Forward static prototype. */
4440 static JSBool
4441 regexp_exec_sub(JSContext *cx, JSObject *obj, uintN argc, jsval *argv,
4442 JSBool test, jsval *rval);
4444 static JSBool
4445 regexp_call(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval)
4447 return regexp_exec_sub(cx, JSVAL_TO_OBJECT(argv[-2]), argc, argv,
4448 JS_FALSE, rval);
4451 #if JS_HAS_XDR
4453 #include "jsxdrapi.h"
4455 static JSBool
4456 regexp_xdrObject(JSXDRState *xdr, JSObject **objp)
4458 JSRegExp *re;
4459 JSString *source;
4460 uint32 flagsword;
4461 JSObject *obj;
4463 if (xdr->mode == JSXDR_ENCODE) {
4464 re = (JSRegExp *) JS_GetPrivate(xdr->cx, *objp);
4465 if (!re)
4466 return JS_FALSE;
4467 source = re->source;
4468 flagsword = (uint32)re->flags;
4470 if (!JS_XDRString(xdr, &source) ||
4471 !JS_XDRUint32(xdr, &flagsword)) {
4472 return JS_FALSE;
4474 if (xdr->mode == JSXDR_DECODE) {
4475 obj = js_NewObject(xdr->cx, &js_RegExpClass, NULL, NULL, 0);
4476 if (!obj)
4477 return JS_FALSE;
4478 STOBJ_CLEAR_PARENT(obj);
4479 STOBJ_CLEAR_PROTO(obj);
4480 re = js_NewRegExp(xdr->cx, NULL, source, (uint8)flagsword, JS_FALSE);
4481 if (!re)
4482 return JS_FALSE;
4483 if (!JS_SetPrivate(xdr->cx, obj, re) ||
4484 !js_SetLastIndex(xdr->cx, obj, 0)) {
4485 js_DestroyRegExp(xdr->cx, re);
4486 return JS_FALSE;
4488 *objp = obj;
4490 return JS_TRUE;
4493 #else /* !JS_HAS_XDR */
4495 #define regexp_xdrObject NULL
4497 #endif /* !JS_HAS_XDR */
4499 static void
4500 regexp_trace(JSTracer *trc, JSObject *obj)
4502 JSRegExp *re;
4504 re = (JSRegExp *) JS_GetPrivate(trc->context, obj);
4505 if (re && re->source)
4506 JS_CALL_STRING_TRACER(trc, re->source, "source");
4509 JSClass js_RegExpClass = {
4510 js_RegExp_str,
4511 JSCLASS_HAS_PRIVATE | JSCLASS_HAS_RESERVED_SLOTS(1) |
4512 JSCLASS_MARK_IS_TRACE | JSCLASS_HAS_CACHED_PROTO(JSProto_RegExp),
4513 JS_PropertyStub, JS_PropertyStub,
4514 regexp_getProperty, regexp_setProperty,
4515 JS_EnumerateStub, JS_ResolveStub,
4516 JS_ConvertStub, regexp_finalize,
4517 NULL, NULL,
4518 regexp_call, NULL,
4519 regexp_xdrObject, NULL,
4520 JS_CLASS_TRACE(regexp_trace), 0
4523 static const jschar empty_regexp_ucstr[] = {'(', '?', ':', ')', 0};
4525 JSBool
4526 js_regexp_toString(JSContext *cx, JSObject *obj, jsval *vp)
4528 JSRegExp *re;
4529 const jschar *source;
4530 jschar *chars;
4531 size_t length, nflags;
4532 uintN flags;
4533 JSString *str;
4535 if (!JS_InstanceOf(cx, obj, &js_RegExpClass, vp + 2))
4536 return JS_FALSE;
4537 JS_LOCK_OBJ(cx, obj);
4538 re = (JSRegExp *) JS_GetPrivate(cx, obj);
4539 if (!re) {
4540 JS_UNLOCK_OBJ(cx, obj);
4541 *vp = STRING_TO_JSVAL(cx->runtime->emptyString);
4542 return JS_TRUE;
4545 JSSTRING_CHARS_AND_LENGTH(re->source, source, length);
4546 if (length == 0) {
4547 source = empty_regexp_ucstr;
4548 length = JS_ARRAY_LENGTH(empty_regexp_ucstr) - 1;
4550 length += 2;
4551 nflags = 0;
4552 for (flags = re->flags; flags != 0; flags &= flags - 1)
4553 nflags++;
4554 chars = (jschar*) JS_malloc(cx, (length + nflags + 1) * sizeof(jschar));
4555 if (!chars) {
4556 JS_UNLOCK_OBJ(cx, obj);
4557 return JS_FALSE;
4560 chars[0] = '/';
4561 js_strncpy(&chars[1], source, length - 2);
4562 chars[length-1] = '/';
4563 if (nflags) {
4564 if (re->flags & JSREG_GLOB)
4565 chars[length++] = 'g';
4566 if (re->flags & JSREG_FOLD)
4567 chars[length++] = 'i';
4568 if (re->flags & JSREG_MULTILINE)
4569 chars[length++] = 'm';
4570 if (re->flags & JSREG_STICKY)
4571 chars[length++] = 'y';
4573 JS_UNLOCK_OBJ(cx, obj);
4574 chars[length] = 0;
4576 str = js_NewString(cx, chars, length);
4577 if (!str) {
4578 JS_free(cx, chars);
4579 return JS_FALSE;
4581 *vp = STRING_TO_JSVAL(str);
4582 return JS_TRUE;
4585 static JSBool
4586 regexp_toString(JSContext *cx, uintN argc, jsval *vp)
4588 JSObject *obj;
4590 obj = JS_THIS_OBJECT(cx, vp);
4591 return obj && js_regexp_toString(cx, obj, vp);
4594 static JSBool
4595 regexp_compile_sub(JSContext *cx, JSObject *obj, uintN argc, jsval *argv,
4596 jsval *rval)
4598 JSString *opt, *str;
4599 JSRegExp *oldre, *re;
4600 JSBool ok, ok2;
4601 JSObject *obj2;
4602 size_t length, nbytes;
4603 const jschar *cp, *start, *end;
4604 jschar *nstart, *ncp, *tmp;
4606 if (!JS_InstanceOf(cx, obj, &js_RegExpClass, argv))
4607 return JS_FALSE;
4608 opt = NULL;
4609 if (argc == 0) {
4610 str = cx->runtime->emptyString;
4611 } else {
4612 if (JSVAL_IS_OBJECT(argv[0])) {
4614 * If we get passed in a RegExp object we construct a new
4615 * RegExp that is a duplicate of it by re-compiling the
4616 * original source code. ECMA requires that it be an error
4617 * here if the flags are specified. (We must use the flags
4618 * from the original RegExp also).
4620 obj2 = JSVAL_TO_OBJECT(argv[0]);
4621 if (obj2 && OBJ_GET_CLASS(cx, obj2) == &js_RegExpClass) {
4622 if (argc >= 2 && !JSVAL_IS_VOID(argv[1])) { /* 'flags' passed */
4623 JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL,
4624 JSMSG_NEWREGEXP_FLAGGED);
4625 return JS_FALSE;
4627 JS_LOCK_OBJ(cx, obj2);
4628 re = (JSRegExp *) JS_GetPrivate(cx, obj2);
4629 if (!re) {
4630 JS_UNLOCK_OBJ(cx, obj2);
4631 return JS_FALSE;
4633 re = js_NewRegExp(cx, NULL, re->source, re->flags, JS_FALSE);
4634 JS_UNLOCK_OBJ(cx, obj2);
4635 goto created;
4638 str = js_ValueToString(cx, argv[0]);
4639 if (!str)
4640 return JS_FALSE;
4641 argv[0] = STRING_TO_JSVAL(str);
4642 if (argc > 1) {
4643 if (JSVAL_IS_VOID(argv[1])) {
4644 opt = NULL;
4645 } else {
4646 opt = js_ValueToString(cx, argv[1]);
4647 if (!opt)
4648 return JS_FALSE;
4649 argv[1] = STRING_TO_JSVAL(opt);
4653 /* Escape any naked slashes in the regexp source. */
4654 JSSTRING_CHARS_AND_LENGTH(str, start, length);
4655 end = start + length;
4656 nstart = ncp = NULL;
4657 for (cp = start; cp < end; cp++) {
4658 if (*cp == '/' && (cp == start || cp[-1] != '\\')) {
4659 nbytes = (++length + 1) * sizeof(jschar);
4660 if (!nstart) {
4661 nstart = (jschar *) JS_malloc(cx, nbytes);
4662 if (!nstart)
4663 return JS_FALSE;
4664 ncp = nstart + (cp - start);
4665 js_strncpy(nstart, start, cp - start);
4666 } else {
4667 tmp = (jschar *) JS_realloc(cx, nstart, nbytes);
4668 if (!tmp) {
4669 JS_free(cx, nstart);
4670 return JS_FALSE;
4672 ncp = tmp + (ncp - nstart);
4673 nstart = tmp;
4675 *ncp++ = '\\';
4677 if (nstart)
4678 *ncp++ = *cp;
4681 if (nstart) {
4682 /* Don't forget to store the backstop after the new string. */
4683 JS_ASSERT((size_t)(ncp - nstart) == length);
4684 *ncp = 0;
4685 str = js_NewString(cx, nstart, length);
4686 if (!str) {
4687 JS_free(cx, nstart);
4688 return JS_FALSE;
4690 argv[0] = STRING_TO_JSVAL(str);
4694 re = js_NewRegExpOpt(cx, str, opt, JS_FALSE);
4695 created:
4696 if (!re)
4697 return JS_FALSE;
4698 JS_LOCK_OBJ(cx, obj);
4699 oldre = (JSRegExp *) JS_GetPrivate(cx, obj);
4700 ok = JS_SetPrivate(cx, obj, re);
4701 ok2 = js_SetLastIndex(cx, obj, 0);
4702 JS_UNLOCK_OBJ(cx, obj);
4703 if (!ok) {
4704 js_DestroyRegExp(cx, re);
4705 return JS_FALSE;
4707 if (oldre)
4708 js_DestroyRegExp(cx, oldre);
4709 *rval = OBJECT_TO_JSVAL(obj);
4710 return ok2;
4713 static JSBool
4714 regexp_compile(JSContext *cx, uintN argc, jsval *vp)
4716 JSObject *obj;
4718 obj = JS_THIS_OBJECT(cx, vp);
4719 return obj && regexp_compile_sub(cx, obj, argc, vp + 2, vp);
4722 static JSBool
4723 regexp_exec_sub(JSContext *cx, JSObject *obj, uintN argc, jsval *argv,
4724 JSBool test, jsval *rval)
4726 JSBool ok, sticky;
4727 JSRegExp *re;
4728 jsdouble lastIndex;
4729 JSString *str;
4730 size_t i;
4732 ok = JS_InstanceOf(cx, obj, &js_RegExpClass, argv);
4733 if (!ok)
4734 return JS_FALSE;
4735 JS_LOCK_OBJ(cx, obj);
4736 re = (JSRegExp *) JS_GetPrivate(cx, obj);
4737 if (!re) {
4738 JS_UNLOCK_OBJ(cx, obj);
4739 return JS_TRUE;
4742 /* NB: we must reach out: after this paragraph, in order to drop re. */
4743 HOLD_REGEXP(cx, re);
4744 sticky = (re->flags & JSREG_STICKY) != 0;
4745 if (re->flags & (JSREG_GLOB | JSREG_STICKY)) {
4746 ok = js_GetLastIndex(cx, obj, &lastIndex);
4747 } else {
4748 lastIndex = 0;
4750 JS_UNLOCK_OBJ(cx, obj);
4751 if (!ok)
4752 goto out;
4754 /* Now that obj is unlocked, it's safe to (potentially) grab the GC lock. */
4755 if (argc == 0) {
4756 str = cx->regExpStatics.input;
4757 if (!str) {
4758 const char *bytes = js_GetStringBytes(cx, re->source);
4760 if (bytes) {
4761 JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL,
4762 JSMSG_NO_INPUT,
4763 bytes,
4764 (re->flags & JSREG_GLOB) ? "g" : "",
4765 (re->flags & JSREG_FOLD) ? "i" : "",
4766 (re->flags & JSREG_MULTILINE) ? "m" : "",
4767 (re->flags & JSREG_STICKY) ? "y" : "");
4769 ok = JS_FALSE;
4770 goto out;
4772 } else {
4773 str = js_ValueToString(cx, argv[0]);
4774 if (!str) {
4775 ok = JS_FALSE;
4776 goto out;
4778 argv[0] = STRING_TO_JSVAL(str);
4781 if (lastIndex < 0 || JSSTRING_LENGTH(str) < lastIndex) {
4782 ok = js_SetLastIndex(cx, obj, 0);
4783 *rval = JSVAL_NULL;
4784 } else {
4785 i = (size_t) lastIndex;
4786 ok = js_ExecuteRegExp(cx, re, str, &i, test, rval);
4787 if (ok &&
4788 ((re->flags & JSREG_GLOB) || (*rval != JSVAL_NULL && sticky))) {
4789 ok = js_SetLastIndex(cx, obj, (*rval == JSVAL_NULL) ? 0 : i);
4793 out:
4794 DROP_REGEXP(cx, re);
4795 return ok;
4798 static JSBool
4799 regexp_exec(JSContext *cx, uintN argc, jsval *vp)
4801 return regexp_exec_sub(cx, JS_THIS_OBJECT(cx, vp), argc, vp + 2, JS_FALSE,
4802 vp);
4805 static JSBool
4806 regexp_test(JSContext *cx, uintN argc, jsval *vp)
4808 if (!regexp_exec_sub(cx, JS_THIS_OBJECT(cx, vp), argc, vp + 2, JS_TRUE, vp))
4809 return JS_FALSE;
4810 if (*vp != JSVAL_TRUE)
4811 *vp = JSVAL_FALSE;
4812 return JS_TRUE;
4815 #ifdef JS_TRACER
4816 static jsint FASTCALL
4817 Regexp_p_test(JSContext* cx, JSObject* regexp, JSString* str)
4819 jsval vp[3] = { JSVAL_NULL, OBJECT_TO_JSVAL(regexp), STRING_TO_JSVAL(str) };
4820 if (!regexp_exec_sub(cx, regexp, 1, vp + 2, JS_TRUE, vp))
4821 return JSVAL_TO_BOOLEAN(JSVAL_VOID);
4822 return *vp == JSVAL_TRUE;
4825 JS_DEFINE_TRCINFO_1(regexp_test,
4826 (3, (static, BOOL_RETRY, Regexp_p_test, CONTEXT, THIS, STRING, 1, 1)))
4828 #endif
4830 static JSFunctionSpec regexp_methods[] = {
4831 #if JS_HAS_TOSOURCE
4832 JS_FN(js_toSource_str, regexp_toString, 0,0),
4833 #endif
4834 JS_FN(js_toString_str, regexp_toString, 0,0),
4835 JS_FN("compile", regexp_compile, 2,0),
4836 JS_FN("exec", regexp_exec, 1,0),
4837 JS_TN("test", regexp_test, 1,0, regexp_test_trcinfo),
4838 JS_FS_END
4841 static JSBool
4842 RegExp(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval)
4844 if (!JS_IsConstructing(cx)) {
4846 * If first arg is regexp and no flags are given, just return the arg.
4847 * (regexp_compile_sub detects the regexp + flags case and throws a
4848 * TypeError.) See 10.15.3.1.
4850 if ((argc < 2 || JSVAL_IS_VOID(argv[1])) &&
4851 !JSVAL_IS_PRIMITIVE(argv[0]) &&
4852 OBJ_GET_CLASS(cx, JSVAL_TO_OBJECT(argv[0])) == &js_RegExpClass) {
4853 *rval = argv[0];
4854 return JS_TRUE;
4857 /* Otherwise, replace obj with a new RegExp object. */
4858 obj = js_NewObject(cx, &js_RegExpClass, NULL, NULL, 0);
4859 if (!obj)
4860 return JS_FALSE;
4863 * regexp_compile_sub does not use rval to root its temporaries so we
4864 * can use it to root obj.
4866 *rval = OBJECT_TO_JSVAL(obj);
4868 return regexp_compile_sub(cx, obj, argc, argv, rval);
4871 JSObject *
4872 js_InitRegExpClass(JSContext *cx, JSObject *obj)
4874 JSObject *proto, *ctor;
4875 jsval rval;
4877 proto = JS_InitClass(cx, obj, NULL, &js_RegExpClass, RegExp, 1,
4878 regexp_props, regexp_methods,
4879 regexp_static_props, NULL);
4881 if (!proto || !(ctor = JS_GetConstructor(cx, proto)))
4882 return NULL;
4883 if (!JS_AliasProperty(cx, ctor, "input", "$_") ||
4884 !JS_AliasProperty(cx, ctor, "multiline", "$*") ||
4885 !JS_AliasProperty(cx, ctor, "lastMatch", "$&") ||
4886 !JS_AliasProperty(cx, ctor, "lastParen", "$+") ||
4887 !JS_AliasProperty(cx, ctor, "leftContext", "$`") ||
4888 !JS_AliasProperty(cx, ctor, "rightContext", "$'")) {
4889 goto bad;
4892 /* Give RegExp.prototype private data so it matches the empty string. */
4893 if (!regexp_compile_sub(cx, proto, 0, NULL, &rval))
4894 goto bad;
4895 return proto;
4897 bad:
4898 JS_DeleteProperty(cx, obj, js_RegExpClass.name);
4899 return NULL;
4902 JSObject *
4903 js_NewRegExpObject(JSContext *cx, JSTokenStream *ts,
4904 jschar *chars, size_t length, uintN flags)
4906 JSString *str;
4907 JSObject *obj;
4908 JSRegExp *re;
4910 str = js_NewStringCopyN(cx, chars, length);
4911 if (!str)
4912 return NULL;
4913 JSAutoTempValueRooter tvr(cx, str);
4914 re = js_NewRegExp(cx, ts, str, flags, JS_FALSE);
4915 if (!re)
4916 return NULL;
4917 obj = js_NewObject(cx, &js_RegExpClass, NULL, NULL, 0);
4918 if (!obj || !JS_SetPrivate(cx, obj, re)) {
4919 js_DestroyRegExp(cx, re);
4920 obj = NULL;
4922 if (obj && !js_SetLastIndex(cx, obj, 0))
4923 obj = NULL;
4924 return obj;
4927 JSObject *
4928 js_CloneRegExpObject(JSContext *cx, JSObject *obj, JSObject *parent)
4930 JSObject *clone;
4931 JSRegExp *re;
4933 JS_ASSERT(OBJ_GET_CLASS(cx, obj) == &js_RegExpClass);
4934 clone = js_NewObject(cx, &js_RegExpClass, NULL, parent, 0);
4935 if (!clone)
4936 return NULL;
4937 re = (JSRegExp *) JS_GetPrivate(cx, obj);
4938 if (!JS_SetPrivate(cx, clone, re) || !js_SetLastIndex(cx, clone, 0)) {
4939 cx->weakRoots.newborn[GCX_OBJECT] = NULL;
4940 return NULL;
4942 HOLD_REGEXP(cx, re);
4943 return clone;
4946 JSBool
4947 js_GetLastIndex(JSContext *cx, JSObject *obj, jsdouble *lastIndex)
4949 jsval v;
4951 return JS_GetReservedSlot(cx, obj, 0, &v) &&
4952 JS_ValueToNumber(cx, v, lastIndex);
4955 JSBool
4956 js_SetLastIndex(JSContext *cx, JSObject *obj, jsdouble lastIndex)
4958 jsval v;
4960 return JS_NewNumberValue(cx, lastIndex, &v) &&
4961 JS_SetReservedSlot(cx, obj, 0, v);